blob: 162846efb836c84fa9f70a75dae3fc957dd23b1f [file] [log] [blame]
Thomas Tsou0a3dc4c2013-11-09 02:29:55 -05001/*
2 * NEON complex multiplication
3 * Copyright (C) 2012,2013 Thomas Tsou <tom@tsou.cc>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 .syntax unified
21 .text
22 .align 2
23 .global neon_cmplx_mul_4n
24 .type neon_cmplx_mul_4n, %function
25neon_cmplx_mul_4n:
26 vpush {q4-q7}
27.loop_mul:
28 vld2.32 {q0-q1}, [r1]!
29 vld2.32 {q2-q3}, [r2]!
30 vmul.f32 q4, q0, q2
31 vmul.f32 q5, q1, q3
32 vmul.f32 q6, q0, q3
33 vmul.f32 q7, q2, q1
34 vsub.f32 q8, q4, q5
35 vadd.f32 q9, q6, q7
36 vst2.32 {q8-q9}, [r0]!
37 subs r3, #1
38 bne .loop_mul
39 vpop {q4-q7}
40 bx lr
41 .size neon_cmplx_mul_4n, .-neon_cmplx_mul_4n
42 .section .note.GNU-stack,"",%progbits