blob: 912d0c2917116320b04f4cff199d9c69fd5c9707 [file] [log] [blame]
Thomas Tsou7e4e5362013-10-30 21:18:55 -04001/*
2 * NEON Convolution
3 * Copyright (C) 2012, 2013 Thomas Tsou <tom@tsou.cc>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20#include <malloc.h>
21#include <string.h>
22#include <stdio.h>
23
24#ifdef HAVE_CONFIG_H
25#include "config.h"
26#endif
27
28/* Forward declarations from base implementation */
29int _base_convolve_real(float *x, int x_len,
30 float *h, int h_len,
31 float *y, int y_len,
32 int start, int len,
33 int step, int offset);
34
35int _base_convolve_complex(float *x, int x_len,
36 float *h, int h_len,
37 float *y, int y_len,
38 int start, int len,
39 int step, int offset);
40
41int bounds_check(int x_len, int h_len, int y_len,
42 int start, int len, int step);
43
44#ifdef HAVE_NEON
45/* Calls into NEON assembler */
46void neon_conv_real4(float *x, float *h, float *y, int len);
47void neon_conv_real8(float *x, float *h, float *y, int len);
48void neon_conv_real12(float *x, float *h, float *y, int len);
49void neon_conv_real16(float *x, float *h, float *y, int len);
50void neon_conv_real20(float *x, float *h, float *y, int len);
51void mac_cx_neon4(float *x, float *h, float *y, int len);
52
53/* Complex-complex convolution */
54static void neon_conv_cmplx_4n(float *x, float *h, float *y, int h_len, int len)
55{
56 for (int i = 0; i < len; i++)
57 mac_cx_neon4(&x[2 * i], h, &y[2 * i], h_len >> 2);
58}
59#endif
60
Philipp Maier7e07cf22017-03-15 18:09:35 +010061/* API: Initalize convolve module */
62void convolve_init(void)
63{
64 /* Stub */
65 return;
66}
67
Thomas Tsou7e4e5362013-10-30 21:18:55 -040068/* API: Aligned complex-real */
69int convolve_real(float *x, int x_len,
70 float *h, int h_len,
71 float *y, int y_len,
72 int start, int len,
73 int step, int offset)
74{
75 void (*conv_func)(float *, float *, float *, int) = NULL;
76
77 if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
78 return -1;
79
80 memset(y, 0, len * 2 * sizeof(float));
81
82#ifdef HAVE_NEON
83 if (step <= 4) {
84 switch (h_len) {
85 case 4:
86 conv_func = neon_conv_real4;
87 break;
88 case 8:
89 conv_func = neon_conv_real8;
90 break;
91 case 12:
92 conv_func = neon_conv_real12;
93 break;
94 case 16:
95 conv_func = neon_conv_real16;
96 break;
97 case 20:
98 conv_func = neon_conv_real20;
99 break;
100 }
101 }
102#endif
103 if (conv_func) {
104 conv_func(&x[2 * (-(h_len - 1) + start)],
105 h, y, len);
106 } else {
107 _base_convolve_real(x, x_len,
108 h, h_len,
109 y, y_len,
110 start, len, step, offset);
111 }
112
113 return len;
114}
115
116
117/* API: Aligned complex-complex */
118int convolve_complex(float *x, int x_len,
119 float *h, int h_len,
120 float *y, int y_len,
121 int start, int len,
122 int step, int offset)
123{
124 void (*conv_func)(float *, float *, float *, int, int) = NULL;
125
126 if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
127 return -1;
128
129 memset(y, 0, len * 2 * sizeof(float));
130
131#ifdef HAVE_NEON
132 if (step <= 4 && !(h_len % 4))
133 conv_func = neon_conv_cmplx_4n;
134#endif
135 if (conv_func) {
136 conv_func(&x[2 * (-(h_len - 1) + start)],
137 h, y, h_len, len);
138 } else {
139 _base_convolve_complex(x, x_len,
140 h, h_len,
141 y, y_len,
142 start, len, step, offset);
143 }
144
145 return len;
146}