blob: 45a3719cf65b14d62e431038b9ad2d3af76fd01a [file] [log] [blame]
Thomas Tsou3eaae802013-08-20 19:31:14 -04001/*
2 * SSE Convolution
3 * Copyright (C) 2012, 2013 Thomas Tsou <tom@tsou.cc>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
Thomas Tsou3eaae802013-08-20 19:31:14 -040014 */
15
16#include <malloc.h>
17#include <string.h>
18#include <stdio.h>
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040019#include "convolve.h"
Philipp Maiere8ae9fc2017-03-20 12:08:42 +010020#include "convolve_sse_3.h"
Thomas Tsou3eaae802013-08-20 19:31:14 -040021
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
25
Martin Hauke066fd042019-10-13 19:08:00 +020026/* Architecture dependent function pointers */
Philipp Maier7e07cf22017-03-15 18:09:35 +010027struct convolve_cpu_context {
28 void (*conv_cmplx_4n) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010029 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010030 void (*conv_cmplx_8n) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010031 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010032 void (*conv_cmplx) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010033 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010034 void (*conv_real4) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010035 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010036 void (*conv_real8) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010037 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010038 void (*conv_real12) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010039 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010040 void (*conv_real16) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010041 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010042 void (*conv_real20) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010043 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010044 void (*conv_real4n) (const float *, int, const float *, int, float *,
Sylvain Munauta3934a12018-12-20 19:10:26 +010045 int, int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010046 void (*conv_real) (const float *, int, const float *, int, float *, int,
Sylvain Munauta3934a12018-12-20 19:10:26 +010047 int, int);
Philipp Maier7e07cf22017-03-15 18:09:35 +010048};
49static struct convolve_cpu_context c;
50
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040051/* Forward declarations from base implementation */
Tom Tsouf147b172015-03-25 12:55:11 -070052int _base_convolve_real(const float *x, int x_len,
53 const float *h, int h_len,
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040054 float *y, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +010055 int start, int len);
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040056
Tom Tsouf147b172015-03-25 12:55:11 -070057int _base_convolve_complex(const float *x, int x_len,
58 const float *h, int h_len,
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040059 float *y, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +010060 int start, int len);
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040061
62int bounds_check(int x_len, int h_len, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +010063 int start, int len);
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040064
Martin Hauke066fd042019-10-13 19:08:00 +020065/* API: Initialize convolve module */
Philipp Maier7e07cf22017-03-15 18:09:35 +010066void convolve_init(void)
67{
68 c.conv_cmplx_4n = (void *)_base_convolve_complex;
69 c.conv_cmplx_8n = (void *)_base_convolve_complex;
70 c.conv_cmplx = (void *)_base_convolve_complex;
71 c.conv_real4 = (void *)_base_convolve_real;
72 c.conv_real8 = (void *)_base_convolve_real;
73 c.conv_real12 = (void *)_base_convolve_real;
74 c.conv_real16 = (void *)_base_convolve_real;
75 c.conv_real20 = (void *)_base_convolve_real;
76 c.conv_real4n = (void *)_base_convolve_real;
77 c.conv_real = (void *)_base_convolve_real;
78
Vadim Yanitskiy3bd763d2017-05-20 01:46:51 +030079#if defined(HAVE_SSE3) && defined(HAVE___BUILTIN_CPU_SUPPORTS)
Philipp Maier7e07cf22017-03-15 18:09:35 +010080 if (__builtin_cpu_supports("sse3")) {
81 c.conv_cmplx_4n = sse_conv_cmplx_4n;
82 c.conv_cmplx_8n = sse_conv_cmplx_8n;
83 c.conv_real4 = sse_conv_real4;
84 c.conv_real8 = sse_conv_real8;
85 c.conv_real12 = sse_conv_real12;
86 c.conv_real16 = sse_conv_real16;
87 c.conv_real20 = sse_conv_real20;
88 c.conv_real4n = sse_conv_real4n;
89 }
90#endif
91}
92
Thomas Tsou3eaae802013-08-20 19:31:14 -040093/* API: Aligned complex-real */
Tom Tsouf147b172015-03-25 12:55:11 -070094int convolve_real(const float *x, int x_len,
95 const float *h, int h_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +010096 float *y, int y_len, int start, int len)
Thomas Tsou3eaae802013-08-20 19:31:14 -040097{
Eric7a52e422020-08-14 03:11:22 +020098#ifndef __OPTIMIZE__
Sylvain Munauta3934a12018-12-20 19:10:26 +010099 if (bounds_check(x_len, h_len, y_len, start, len) < 0)
Thomas Tsou3eaae802013-08-20 19:31:14 -0400100 return -1;
Eric7a52e422020-08-14 03:11:22 +0200101#endif
Pau Espin Pedrol0fbdfef2020-09-18 20:11:14 +0200102 memset(y, 0, len * 2 * sizeof(float));
103
Sylvain Munauta3934a12018-12-20 19:10:26 +0100104 switch (h_len) {
105 case 4:
106 c.conv_real4(x, x_len, h, h_len, y, y_len, start, len);
107 break;
108 case 8:
109 c.conv_real8(x, x_len, h, h_len, y, y_len, start, len);
110 break;
111 case 12:
112 c.conv_real12(x, x_len, h, h_len, y, y_len, start, len);
113 break;
114 case 16:
115 c.conv_real16(x, x_len, h, h_len, y, y_len, start, len);
116 break;
117 case 20:
118 c.conv_real20(x, x_len, h, h_len, y, y_len, start, len);
119 break;
120 default:
121 if (!(h_len % 4))
122 c.conv_real4n(x, x_len, h, h_len, y, y_len,
123 start, len);
124 else
125 c.conv_real(x, x_len, h, h_len, y, y_len, start,
126 len);
127 }
Thomas Tsou3eaae802013-08-20 19:31:14 -0400128
129 return len;
130}
131
132/* API: Aligned complex-complex */
Tom Tsouf147b172015-03-25 12:55:11 -0700133int convolve_complex(const float *x, int x_len,
134 const float *h, int h_len,
Thomas Tsou3eaae802013-08-20 19:31:14 -0400135 float *y, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +0100136 int start, int len)
Thomas Tsou3eaae802013-08-20 19:31:14 -0400137{
Eric7a52e422020-08-14 03:11:22 +0200138#ifndef __OPTIMIZE__
Sylvain Munauta3934a12018-12-20 19:10:26 +0100139 if (bounds_check(x_len, h_len, y_len, start, len) < 0)
Thomas Tsou3eaae802013-08-20 19:31:14 -0400140 return -1;
Eric7a52e422020-08-14 03:11:22 +0200141#endif
Pau Espin Pedrol0fbdfef2020-09-18 20:11:14 +0200142 memset(y, 0, len * 2 * sizeof(float));
143
Sylvain Munauta3934a12018-12-20 19:10:26 +0100144 if (!(h_len % 8))
145 c.conv_cmplx_8n(x, x_len, h, h_len, y, y_len, start, len);
146 else if (!(h_len % 4))
147 c.conv_cmplx_4n(x, x_len, h, h_len, y, y_len, start, len);
148 else
149 c.conv_cmplx(x, x_len, h, h_len, y, y_len, start, len);
Thomas Tsou3eaae802013-08-20 19:31:14 -0400150
151 return len;
152}