blob: 3765c5c9b57ef9f002a5c76b0ab01cee2cdf470d [file] [log] [blame]
Thomas Tsou17bbb9b2013-10-30 21:24:40 -04001/*
2 * Convolution
3 * Copyright (C) 2012, 2013 Thomas Tsou <tom@tsou.cc>
4 *
Pau Espin Pedrol21d03d32019-07-22 12:05:52 +02005 * SPDX-License-Identifier: LGPL-2.1+
6 *
Thomas Tsou17bbb9b2013-10-30 21:24:40 -04007 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040016 */
17
18#include <malloc.h>
19#include <string.h>
20#include <stdio.h>
21
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
25
26/* Base multiply and accumulate complex-real */
Tom Tsouf147b172015-03-25 12:55:11 -070027static void mac_real(const float *x, const float *h, float *y)
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040028{
29 y[0] += x[0] * h[0];
30 y[1] += x[1] * h[0];
31}
32
33/* Base multiply and accumulate complex-complex */
Tom Tsouf147b172015-03-25 12:55:11 -070034static void mac_cmplx(const float *x, const float *h, float *y)
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040035{
36 y[0] += x[0] * h[0] - x[1] * h[1];
37 y[1] += x[0] * h[1] + x[1] * h[0];
38}
39
40/* Base vector complex-complex multiply and accumulate */
Tom Tsouf147b172015-03-25 12:55:11 -070041static void mac_real_vec_n(const float *x, const float *h, float *y,
Sylvain Munauta3934a12018-12-20 19:10:26 +010042 int len)
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040043{
Sylvain Munauta3934a12018-12-20 19:10:26 +010044 for (int i=0; i<len; i++)
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040045 mac_real(&x[2 * i], &h[2 * i], y);
46}
47
48/* Base vector complex-complex multiply and accumulate */
Tom Tsouf147b172015-03-25 12:55:11 -070049static void mac_cmplx_vec_n(const float *x, const float *h, float *y,
Sylvain Munauta3934a12018-12-20 19:10:26 +010050 int len)
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040051{
Sylvain Munauta3934a12018-12-20 19:10:26 +010052 for (int i=0; i<len; i++)
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040053 mac_cmplx(&x[2 * i], &h[2 * i], y);
54}
55
56/* Base complex-real convolution */
Tom Tsouf147b172015-03-25 12:55:11 -070057int _base_convolve_real(const float *x, int x_len,
58 const float *h, int h_len,
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040059 float *y, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +010060 int start, int len)
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040061{
62 for (int i = 0; i < len; i++) {
63 mac_real_vec_n(&x[2 * (i - (h_len - 1) + start)],
64 h,
Sylvain Munauta3934a12018-12-20 19:10:26 +010065 &y[2 * i], h_len);
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040066 }
67
68 return len;
69}
70
71/* Base complex-complex convolution */
Tom Tsouf147b172015-03-25 12:55:11 -070072int _base_convolve_complex(const float *x, int x_len,
73 const float *h, int h_len,
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040074 float *y, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +010075 int start, int len)
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040076{
77 for (int i = 0; i < len; i++) {
78 mac_cmplx_vec_n(&x[2 * (i - (h_len - 1) + start)],
79 h,
80 &y[2 * i],
Sylvain Munauta3934a12018-12-20 19:10:26 +010081 h_len);
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040082 }
83
84 return len;
85}
86
87/* Buffer validity checks */
88int bounds_check(int x_len, int h_len, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +010089 int start, int len)
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040090{
91 if ((x_len < 1) || (h_len < 1) ||
Sylvain Munauta3934a12018-12-20 19:10:26 +010092 (y_len < 1) || (len < 1)) {
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040093 fprintf(stderr, "Convolve: Invalid input\n");
94 return -1;
95 }
96
97 if ((start + len > x_len) || (len > y_len) || (x_len < h_len)) {
98 fprintf(stderr, "Convolve: Boundary exception\n");
99 fprintf(stderr, "start: %i, len: %i, x: %i, h: %i, y: %i\n",
100 start, len, x_len, h_len, y_len);
101 return -1;
102 }
103
104 return 0;
105}
106
107/* API: Non-aligned (no SSE) complex-real */
Tom Tsouf147b172015-03-25 12:55:11 -0700108int base_convolve_real(const float *x, int x_len,
109 const float *h, int h_len,
Thomas Tsou17bbb9b2013-10-30 21:24:40 -0400110 float *y, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +0100111 int start, int len)
Thomas Tsou17bbb9b2013-10-30 21:24:40 -0400112{
Sylvain Munauta3934a12018-12-20 19:10:26 +0100113 if (bounds_check(x_len, h_len, y_len, start, len) < 0)
Thomas Tsou17bbb9b2013-10-30 21:24:40 -0400114 return -1;
115
116 memset(y, 0, len * 2 * sizeof(float));
117
118 return _base_convolve_real(x, x_len,
119 h, h_len,
120 y, y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +0100121 start, len);
Thomas Tsou17bbb9b2013-10-30 21:24:40 -0400122}
123
124/* API: Non-aligned (no SSE) complex-complex */
Tom Tsouf147b172015-03-25 12:55:11 -0700125int base_convolve_complex(const float *x, int x_len,
126 const float *h, int h_len,
Thomas Tsou17bbb9b2013-10-30 21:24:40 -0400127 float *y, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +0100128 int start, int len)
Thomas Tsou17bbb9b2013-10-30 21:24:40 -0400129{
Sylvain Munauta3934a12018-12-20 19:10:26 +0100130 if (bounds_check(x_len, h_len, y_len, start, len) < 0)
Thomas Tsou17bbb9b2013-10-30 21:24:40 -0400131 return -1;
132
133 memset(y, 0, len * 2 * sizeof(float));
134
135 return _base_convolve_complex(x, x_len,
136 h, h_len,
137 y, y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +0100138 start, len);
Thomas Tsou17bbb9b2013-10-30 21:24:40 -0400139}
140
141/* Aligned filter tap allocation */
Pau Espin Pedrolf7331762018-12-03 17:46:04 +0100142void *convolve_h_alloc(size_t len)
Thomas Tsou17bbb9b2013-10-30 21:24:40 -0400143{
144#ifdef HAVE_SSE3
145 return memalign(16, len * 2 * sizeof(float));
146#else
147 return malloc(len * 2 * sizeof(float));
148#endif
149}