blob: c2611b4c6fb275b5d3fd0feaed0dbd604441b182 [file] [log] [blame]
Thomas Tsou7e4e5362013-10-30 21:18:55 -04001/*
2 * NEON Convolution
3 * Copyright (C) 2012, 2013 Thomas Tsou <tom@tsou.cc>
4 *
Pau Espin Pedrol21d03d32019-07-22 12:05:52 +02005 * SPDX-License-Identifier: LGPL-2.1+
6 *
Thomas Tsou7e4e5362013-10-30 21:18:55 -04007 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
Thomas Tsou7e4e5362013-10-30 21:18:55 -040016 */
17
18#include <malloc.h>
19#include <string.h>
20#include <stdio.h>
21
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
25
26/* Forward declarations from base implementation */
27int _base_convolve_real(float *x, int x_len,
28 float *h, int h_len,
29 float *y, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +010030 int start, int len);
Thomas Tsou7e4e5362013-10-30 21:18:55 -040031
32int _base_convolve_complex(float *x, int x_len,
33 float *h, int h_len,
34 float *y, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +010035 int start, int len);
Thomas Tsou7e4e5362013-10-30 21:18:55 -040036
37int bounds_check(int x_len, int h_len, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +010038 int start, int len);
Thomas Tsou7e4e5362013-10-30 21:18:55 -040039
40#ifdef HAVE_NEON
41/* Calls into NEON assembler */
42void neon_conv_real4(float *x, float *h, float *y, int len);
43void neon_conv_real8(float *x, float *h, float *y, int len);
44void neon_conv_real12(float *x, float *h, float *y, int len);
45void neon_conv_real16(float *x, float *h, float *y, int len);
46void neon_conv_real20(float *x, float *h, float *y, int len);
47void mac_cx_neon4(float *x, float *h, float *y, int len);
48
49/* Complex-complex convolution */
50static void neon_conv_cmplx_4n(float *x, float *h, float *y, int h_len, int len)
51{
52 for (int i = 0; i < len; i++)
53 mac_cx_neon4(&x[2 * i], h, &y[2 * i], h_len >> 2);
54}
55#endif
56
Martin Hauke066fd042019-10-13 19:08:00 +020057/* API: Initialize convolve module */
Philipp Maier7e07cf22017-03-15 18:09:35 +010058void convolve_init(void)
59{
60 /* Stub */
61 return;
62}
63
Thomas Tsou7e4e5362013-10-30 21:18:55 -040064/* API: Aligned complex-real */
65int convolve_real(float *x, int x_len,
66 float *h, int h_len,
67 float *y, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +010068 int start, int len)
Thomas Tsou7e4e5362013-10-30 21:18:55 -040069{
70 void (*conv_func)(float *, float *, float *, int) = NULL;
71
Sylvain Munauta3934a12018-12-20 19:10:26 +010072 if (bounds_check(x_len, h_len, y_len, start, len) < 0)
Thomas Tsou7e4e5362013-10-30 21:18:55 -040073 return -1;
74
75 memset(y, 0, len * 2 * sizeof(float));
76
77#ifdef HAVE_NEON
Sylvain Munauta3934a12018-12-20 19:10:26 +010078 switch (h_len) {
79 case 4:
80 conv_func = neon_conv_real4;
81 break;
82 case 8:
83 conv_func = neon_conv_real8;
84 break;
85 case 12:
86 conv_func = neon_conv_real12;
87 break;
88 case 16:
89 conv_func = neon_conv_real16;
90 break;
91 case 20:
92 conv_func = neon_conv_real20;
93 break;
Thomas Tsou7e4e5362013-10-30 21:18:55 -040094 }
95#endif
96 if (conv_func) {
97 conv_func(&x[2 * (-(h_len - 1) + start)],
98 h, y, len);
99 } else {
100 _base_convolve_real(x, x_len,
101 h, h_len,
102 y, y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +0100103 start, len);
Thomas Tsou7e4e5362013-10-30 21:18:55 -0400104 }
105
106 return len;
107}
108
109
110/* API: Aligned complex-complex */
111int convolve_complex(float *x, int x_len,
112 float *h, int h_len,
113 float *y, int y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +0100114 int start, int len)
Thomas Tsou7e4e5362013-10-30 21:18:55 -0400115{
116 void (*conv_func)(float *, float *, float *, int, int) = NULL;
117
Sylvain Munauta3934a12018-12-20 19:10:26 +0100118 if (bounds_check(x_len, h_len, y_len, start, len) < 0)
Thomas Tsou7e4e5362013-10-30 21:18:55 -0400119 return -1;
120
121 memset(y, 0, len * 2 * sizeof(float));
122
123#ifdef HAVE_NEON
Sylvain Munauta3934a12018-12-20 19:10:26 +0100124 if (!(h_len % 4))
Thomas Tsou7e4e5362013-10-30 21:18:55 -0400125 conv_func = neon_conv_cmplx_4n;
126#endif
127 if (conv_func) {
128 conv_func(&x[2 * (-(h_len - 1) + start)],
129 h, y, h_len, len);
130 } else {
131 _base_convolve_complex(x, x_len,
132 h, h_len,
133 y, y_len,
Sylvain Munauta3934a12018-12-20 19:10:26 +0100134 start, len);
Thomas Tsou7e4e5362013-10-30 21:18:55 -0400135 }
136
137 return len;
138}