blob: eb38f64b87a5e4118609117b61b17d4792130509 [file] [log] [blame]
Thomas Tsou3eaae802013-08-20 19:31:14 -04001/*
2 * SSE Convolution
3 * Copyright (C) 2012, 2013 Thomas Tsou <tom@tsou.cc>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20#include <malloc.h>
21#include <string.h>
22#include <stdio.h>
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040023#include "convolve.h"
Philipp Maiere8ae9fc2017-03-20 12:08:42 +010024#include "convolve_sse_3.h"
Thomas Tsou3eaae802013-08-20 19:31:14 -040025
26#ifdef HAVE_CONFIG_H
27#include "config.h"
28#endif
29
Philipp Maier7e07cf22017-03-15 18:09:35 +010030/* Architecture dependant function pointers */
31struct convolve_cpu_context {
32 void (*conv_cmplx_4n) (const float *, int, const float *, int, float *,
33 int, int, int, int, int);
34 void (*conv_cmplx_8n) (const float *, int, const float *, int, float *,
35 int, int, int, int, int);
36 void (*conv_cmplx) (const float *, int, const float *, int, float *,
37 int, int, int, int, int);
38 void (*conv_real4) (const float *, int, const float *, int, float *,
39 int, int, int, int, int);
40 void (*conv_real8) (const float *, int, const float *, int, float *,
41 int, int, int, int, int);
42 void (*conv_real12) (const float *, int, const float *, int, float *,
43 int, int, int, int, int);
44 void (*conv_real16) (const float *, int, const float *, int, float *,
45 int, int, int, int, int);
46 void (*conv_real20) (const float *, int, const float *, int, float *,
47 int, int, int, int, int);
48 void (*conv_real4n) (const float *, int, const float *, int, float *,
49 int, int, int, int, int);
50 void (*conv_real) (const float *, int, const float *, int, float *, int,
51 int, int, int, int);
52};
53static struct convolve_cpu_context c;
54
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040055/* Forward declarations from base implementation */
Tom Tsouf147b172015-03-25 12:55:11 -070056int _base_convolve_real(const float *x, int x_len,
57 const float *h, int h_len,
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040058 float *y, int y_len,
59 int start, int len,
60 int step, int offset);
61
Tom Tsouf147b172015-03-25 12:55:11 -070062int _base_convolve_complex(const float *x, int x_len,
63 const float *h, int h_len,
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040064 float *y, int y_len,
65 int start, int len,
66 int step, int offset);
67
68int bounds_check(int x_len, int h_len, int y_len,
69 int start, int len, int step);
70
Philipp Maier7e07cf22017-03-15 18:09:35 +010071/* API: Initalize convolve module */
72void convolve_init(void)
73{
74 c.conv_cmplx_4n = (void *)_base_convolve_complex;
75 c.conv_cmplx_8n = (void *)_base_convolve_complex;
76 c.conv_cmplx = (void *)_base_convolve_complex;
77 c.conv_real4 = (void *)_base_convolve_real;
78 c.conv_real8 = (void *)_base_convolve_real;
79 c.conv_real12 = (void *)_base_convolve_real;
80 c.conv_real16 = (void *)_base_convolve_real;
81 c.conv_real20 = (void *)_base_convolve_real;
82 c.conv_real4n = (void *)_base_convolve_real;
83 c.conv_real = (void *)_base_convolve_real;
84
Vadim Yanitskiy3bd763d2017-05-20 01:46:51 +030085#if defined(HAVE_SSE3) && defined(HAVE___BUILTIN_CPU_SUPPORTS)
Philipp Maier7e07cf22017-03-15 18:09:35 +010086 if (__builtin_cpu_supports("sse3")) {
87 c.conv_cmplx_4n = sse_conv_cmplx_4n;
88 c.conv_cmplx_8n = sse_conv_cmplx_8n;
89 c.conv_real4 = sse_conv_real4;
90 c.conv_real8 = sse_conv_real8;
91 c.conv_real12 = sse_conv_real12;
92 c.conv_real16 = sse_conv_real16;
93 c.conv_real20 = sse_conv_real20;
94 c.conv_real4n = sse_conv_real4n;
95 }
96#endif
97}
98
Thomas Tsou3eaae802013-08-20 19:31:14 -040099/* API: Aligned complex-real */
Tom Tsouf147b172015-03-25 12:55:11 -0700100int convolve_real(const float *x, int x_len,
101 const float *h, int h_len,
Philipp Maier131f82b2017-03-15 12:39:25 +0100102 float *y, int y_len, int start, int len, int step, int offset)
Thomas Tsou3eaae802013-08-20 19:31:14 -0400103{
Thomas Tsou3eaae802013-08-20 19:31:14 -0400104 if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
105 return -1;
106
107 memset(y, 0, len * 2 * sizeof(float));
108
Thomas Tsou3eaae802013-08-20 19:31:14 -0400109 if (step <= 4) {
110 switch (h_len) {
111 case 4:
Philipp Maier7e07cf22017-03-15 18:09:35 +0100112 c.conv_real4(x, x_len, h, h_len, y, y_len, start, len,
113 step, offset);
Thomas Tsou3eaae802013-08-20 19:31:14 -0400114 break;
115 case 8:
Philipp Maier7e07cf22017-03-15 18:09:35 +0100116 c.conv_real8(x, x_len, h, h_len, y, y_len, start, len,
117 step, offset);
Thomas Tsou3eaae802013-08-20 19:31:14 -0400118 break;
119 case 12:
Philipp Maier7e07cf22017-03-15 18:09:35 +0100120 c.conv_real12(x, x_len, h, h_len, y, y_len, start, len,
121 step, offset);
Thomas Tsou3eaae802013-08-20 19:31:14 -0400122 break;
123 case 16:
Philipp Maier7e07cf22017-03-15 18:09:35 +0100124 c.conv_real16(x, x_len, h, h_len, y, y_len, start, len,
125 step, offset);
Thomas Tsou3eaae802013-08-20 19:31:14 -0400126 break;
127 case 20:
Philipp Maier7e07cf22017-03-15 18:09:35 +0100128 c.conv_real20(x, x_len, h, h_len, y, y_len, start, len,
129 step, offset);
Thomas Tsou3eaae802013-08-20 19:31:14 -0400130 break;
131 default:
132 if (!(h_len % 4))
Philipp Maier7e07cf22017-03-15 18:09:35 +0100133 c.conv_real4n(x, x_len, h, h_len, y, y_len,
134 start, len, step, offset);
135 else
136 c.conv_real(x, x_len, h, h_len, y, y_len, start,
137 len, step, offset);
Thomas Tsou3eaae802013-08-20 19:31:14 -0400138 }
Philipp Maier7e07cf22017-03-15 18:09:35 +0100139 } else
140 c.conv_real(x, x_len, h, h_len, y, y_len, start, len, step,
141 offset);
Thomas Tsou3eaae802013-08-20 19:31:14 -0400142
143 return len;
144}
145
146/* API: Aligned complex-complex */
Tom Tsouf147b172015-03-25 12:55:11 -0700147int convolve_complex(const float *x, int x_len,
148 const float *h, int h_len,
Thomas Tsou3eaae802013-08-20 19:31:14 -0400149 float *y, int y_len,
Philipp Maier131f82b2017-03-15 12:39:25 +0100150 int start, int len, int step, int offset)
Thomas Tsou3eaae802013-08-20 19:31:14 -0400151{
Thomas Tsou3eaae802013-08-20 19:31:14 -0400152 if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
153 return -1;
154
155 memset(y, 0, len * 2 * sizeof(float));
156
Thomas Tsou3eaae802013-08-20 19:31:14 -0400157 if (step <= 4) {
158 if (!(h_len % 8))
Philipp Maier7e07cf22017-03-15 18:09:35 +0100159 c.conv_cmplx_8n(x, x_len, h, h_len, y, y_len, start,
160 len, step, offset);
Thomas Tsou3eaae802013-08-20 19:31:14 -0400161 else if (!(h_len % 4))
Philipp Maier7e07cf22017-03-15 18:09:35 +0100162 c.conv_cmplx_4n(x, x_len, h, h_len, y, y_len, start,
163 len, step, offset);
164 else
165 c.conv_cmplx(x, x_len, h, h_len, y, y_len, start, len,
166 step, offset);
167 } else
168 c.conv_cmplx(x, x_len, h, h_len, y, y_len, start, len, step,
169 offset);
Thomas Tsou3eaae802013-08-20 19:31:14 -0400170
171 return len;
172}