blob: be6272198f4a7b5ffdfbde653c727c4c6c31c9d6 [file] [log] [blame]
Thomas Tsou3eaae802013-08-20 19:31:14 -04001/*
2 * SSE Convolution
3 * Copyright (C) 2012, 2013 Thomas Tsou <tom@tsou.cc>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20#include <malloc.h>
21#include <string.h>
22#include <stdio.h>
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040023#include "convolve.h"
Philipp Maiere8ae9fc2017-03-20 12:08:42 +010024#include "convolve_sse_3.h"
Thomas Tsou3eaae802013-08-20 19:31:14 -040025
26#ifdef HAVE_CONFIG_H
27#include "config.h"
28#endif
29
Martin Hauke066fd042019-10-13 19:08:00 +020030/* Architecture dependent function pointers */
Philipp Maier7e07cf22017-03-15 18:09:35 +010031struct convolve_cpu_context {
32 void (*conv_cmplx_4n) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010033 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010034 void (*conv_cmplx_8n) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010035 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010036 void (*conv_cmplx) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010037 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010038 void (*conv_real4) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010039 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010040 void (*conv_real8) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010041 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010042 void (*conv_real12) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010043 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010044 void (*conv_real16) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010045 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010046 void (*conv_real20) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010047 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010048 void (*conv_real4n) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010049 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010050 void (*conv_real) (const float *, int, const float *, int, float *, int,
Sylvain Munauta3934a12018-12-20 19:10:26 +010051 int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010052};
53static struct convolve_cpu_context c;
54
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040055/* Forward declarations from base implementation */
Tom Tsouf147b172015-03-25 12:55:11 -070056int _base_convolve_real(const float *x, int x_len,
57 const float *h, int h_len,
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040058 float *y, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +010059 int start, int len);
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040060
Tom Tsouf147b172015-03-25 12:55:11 -070061int _base_convolve_complex(const float *x, int x_len,
62 const float *h, int h_len,
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040063 float *y, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +010064 int start, int len);
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040065
66int bounds_check(int x_len, int h_len, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +010067 int start, int len);
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040068
Martin Hauke066fd042019-10-13 19:08:00 +020069/* API: Initialize convolve module */
Philipp Maier7e07cf22017-03-15 18:09:35 +010070void convolve_init(void)
71{
72 c.conv_cmplx_4n = (void *)_base_convolve_complex;
73 c.conv_cmplx_8n = (void *)_base_convolve_complex;
74 c.conv_cmplx = (void *)_base_convolve_complex;
75 c.conv_real4 = (void *)_base_convolve_real;
76 c.conv_real8 = (void *)_base_convolve_real;
77 c.conv_real12 = (void *)_base_convolve_real;
78 c.conv_real16 = (void *)_base_convolve_real;
79 c.conv_real20 = (void *)_base_convolve_real;
80 c.conv_real4n = (void *)_base_convolve_real;
81 c.conv_real = (void *)_base_convolve_real;
82
Vadim Yanitskiy3bd763d2017-05-20 01:46:51 +030083#if defined(HAVE_SSE3) && defined(HAVE___BUILTIN_CPU_SUPPORTS)
Philipp Maier7e07cf22017-03-15 18:09:35 +010084 if (__builtin_cpu_supports("sse3")) {
85 c.conv_cmplx_4n = sse_conv_cmplx_4n;
86 c.conv_cmplx_8n = sse_conv_cmplx_8n;
87 c.conv_real4 = sse_conv_real4;
88 c.conv_real8 = sse_conv_real8;
89 c.conv_real12 = sse_conv_real12;
90 c.conv_real16 = sse_conv_real16;
91 c.conv_real20 = sse_conv_real20;
92 c.conv_real4n = sse_conv_real4n;
93 }
94#endif
95}
96
Thomas Tsou3eaae802013-08-20 19:31:14 -040097/* API: Aligned complex-real */
Tom Tsouf147b172015-03-25 12:55:11 -070098int convolve_real(const float *x, int x_len,
99 const float *h, int h_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +0100100 float *y, int y_len, int start, int len)
Thomas Tsou3eaae802013-08-20 19:31:14 -0400101{
Eric7a52e422020-08-14 03:11:22 +0200102#ifndef __OPTIMIZE__
Sylvain Munauta3934a12018-12-20 19:10:26 +0100103 if (bounds_check(x_len, h_len, y_len, start, len) < 0)
Thomas Tsou3eaae802013-08-20 19:31:14 -0400104 return -1;
Eric7a52e422020-08-14 03:11:22 +0200105#endif
Pau Espin Pedrol0fbdfef2020-09-18 20:11:14 +0200106 memset(y, 0, len * 2 * sizeof(float));
107
Sylvain Munauta3934a12018-12-20 19:10:26 +0100108 switch (h_len) {
109 case 4:
110 c.conv_real4(x, x_len, h, h_len, y, y_len, start, len);
111 break;
112 case 8:
113 c.conv_real8(x, x_len, h, h_len, y, y_len, start, len);
114 break;
115 case 12:
116 c.conv_real12(x, x_len, h, h_len, y, y_len, start, len);
117 break;
118 case 16:
119 c.conv_real16(x, x_len, h, h_len, y, y_len, start, len);
120 break;
121 case 20:
122 c.conv_real20(x, x_len, h, h_len, y, y_len, start, len);
123 break;
124 default:
125 if (!(h_len % 4))
126 c.conv_real4n(x, x_len, h, h_len, y, y_len,
127 start, len);
128 else
129 c.conv_real(x, x_len, h, h_len, y, y_len, start,
130 len);
131 }
Thomas Tsou3eaae802013-08-20 19:31:14 -0400132
133 return len;
134}
135
136/* API: Aligned complex-complex */
Tom Tsouf147b172015-03-25 12:55:11 -0700137int convolve_complex(const float *x, int x_len,
138 const float *h, int h_len,
Thomas Tsou3eaae802013-08-20 19:31:14 -0400139 float *y, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +0100140 int start, int len)
Thomas Tsou3eaae802013-08-20 19:31:14 -0400141{
Eric7a52e422020-08-14 03:11:22 +0200142#ifndef __OPTIMIZE__
Sylvain Munauta3934a12018-12-20 19:10:26 +0100143 if (bounds_check(x_len, h_len, y_len, start, len) < 0)
Thomas Tsou3eaae802013-08-20 19:31:14 -0400144 return -1;
Eric7a52e422020-08-14 03:11:22 +0200145#endif
Pau Espin Pedrol0fbdfef2020-09-18 20:11:14 +0200146 memset(y, 0, len * 2 * sizeof(float));
147
Sylvain Munauta3934a12018-12-20 19:10:26 +0100148 if (!(h_len % 8))
149 c.conv_cmplx_8n(x, x_len, h, h_len, y, y_len, start, len);
150 else if (!(h_len % 4))
151 c.conv_cmplx_4n(x, x_len, h, h_len, y, y_len, start, len);
152 else
153 c.conv_cmplx(x, x_len, h, h_len, y, y_len, start, len);
Thomas Tsou3eaae802013-08-20 19:31:14 -0400154
155 return len;
156}