blob: a66fbe583923603340564143d7a3ce4fbade46c7 [file] [log] [blame]
Thomas Tsou7e4e5362013-10-30 21:18:55 -04001/*
2 * ARM NEON Scaling
3 * Copyright (C) 2013 Thomas Tsou <tom@tsou.cc>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 .syntax unified
21 .text
22 .align 2
23 .global neon_scale_4n
24 .type neon_scale_4n, %function
25neon_scale_4n:
26 push {r4, lr}
27 ldr r4, =32
28
29 vld1.64 d0, [r1]
30 vmov.32 s4, s1
31 vmov.32 s1, s0
32 vmov.64 d1, d0
33 vmov.32 s5, s4
34 vmov.64 d3, d2
35.loop_mul_const:
36 vld2.32 {q2-q3}, [r0], r4
37
38 vmul.f32 q8, q0, q2
39 vmul.f32 q9, q1, q3
40 vmul.f32 q10, q0, q3
41 vmul.f32 q11, q1, q2
42 vsub.f32 q8, q8, q9
43 vadd.f32 q9, q10, q11
44
45 vst2.32 {q8-q9}, [r2]!
46 subs r3, #1
47 bne .loop_mul_const
48 pop {r4, pc}
49 .size neon_scale_4n, .-neon_scale_4n
50 .section .note.GNU-stack,"",%progbits