blob: 596233c0040215316a201868ca79f1d1e7da432f [file] [log] [blame]
Thomas Tsou9471d762013-08-20 21:24:24 -04001/*
2 * SSE type conversions
3 * Copyright (C) 2013 Thomas Tsou <tom@tsou.cc>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
Thomas Tsou9471d762013-08-20 21:24:24 -040014 */
15
16#include <malloc.h>
17#include <string.h>
Thomas Tsou17bbb9b2013-10-30 21:24:40 -040018#include "convert.h"
Philipp Maiere8ae9fc2017-03-20 12:08:42 +010019#include "convert_sse_3.h"
20#include "convert_sse_4_1.h"
Thomas Tsou9471d762013-08-20 21:24:24 -040021
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#endif
25
Martin Hauke066fd042019-10-13 19:08:00 +020026/* Architecture dependent function pointers */
Philipp Maier7e07cf22017-03-15 18:09:35 +010027struct convert_cpu_context {
28 void (*convert_si16_ps_16n) (float *, const short *, int);
29 void (*convert_si16_ps) (float *, const short *, int);
30 void (*convert_scale_ps_si16_16n)(short *, const float *, float, int);
31 void (*convert_scale_ps_si16_8n)(short *, const float *, float, int);
32 void (*convert_scale_ps_si16)(short *, const float *, float, int);
33};
34
35static struct convert_cpu_context c;
36
Philipp Maier7e07cf22017-03-15 18:09:35 +010037void convert_init(void)
38{
Philipp Maierfe976982017-03-16 14:50:25 +010039 c.convert_scale_ps_si16_16n = base_convert_float_short;
40 c.convert_scale_ps_si16_8n = base_convert_float_short;
41 c.convert_scale_ps_si16 = base_convert_float_short;
42 c.convert_si16_ps_16n = base_convert_short_float;
43 c.convert_si16_ps = base_convert_short_float;
Philipp Maier7e07cf22017-03-15 18:09:35 +010044
Vadim Yanitskiy3bd763d2017-05-20 01:46:51 +030045#ifdef HAVE___BUILTIN_CPU_SUPPORTS
Philipp Maier7e07cf22017-03-15 18:09:35 +010046#ifdef HAVE_SSE4_1
47 if (__builtin_cpu_supports("sse4.1")) {
48 c.convert_si16_ps_16n = &_sse_convert_si16_ps_16n;
49 c.convert_si16_ps = &_sse_convert_si16_ps;
50 }
Thomas Tsou9471d762013-08-20 21:24:24 -040051#endif
52
Philipp Maier7e07cf22017-03-15 18:09:35 +010053#ifdef HAVE_SSE3
54 if (__builtin_cpu_supports("sse3")) {
55 c.convert_scale_ps_si16_16n = _sse_convert_scale_ps_si16_16n;
56 c.convert_scale_ps_si16_8n = _sse_convert_scale_ps_si16_8n;
57 c.convert_scale_ps_si16 = _sse_convert_scale_ps_si16;
58 }
59#endif
Vadim Yanitskiy3bd763d2017-05-20 01:46:51 +030060#endif
Philipp Maier7e07cf22017-03-15 18:09:35 +010061}
62
Tom Tsouf147b172015-03-25 12:55:11 -070063void convert_float_short(short *out, const float *in, float scale, int len)
Thomas Tsou9471d762013-08-20 21:24:24 -040064{
Thomas Tsou9471d762013-08-20 21:24:24 -040065 if (!(len % 16))
Philipp Maier7e07cf22017-03-15 18:09:35 +010066 c.convert_scale_ps_si16_16n(out, in, scale, len);
Thomas Tsou9471d762013-08-20 21:24:24 -040067 else if (!(len % 8))
Philipp Maier7e07cf22017-03-15 18:09:35 +010068 c.convert_scale_ps_si16_8n(out, in, scale, len);
Thomas Tsou9471d762013-08-20 21:24:24 -040069 else
Philipp Maier7e07cf22017-03-15 18:09:35 +010070 c.convert_scale_ps_si16(out, in, scale, len);
Thomas Tsou9471d762013-08-20 21:24:24 -040071}
72
Tom Tsouf147b172015-03-25 12:55:11 -070073void convert_short_float(float *out, const short *in, int len)
Thomas Tsou9471d762013-08-20 21:24:24 -040074{
Thomas Tsou9471d762013-08-20 21:24:24 -040075 if (!(len % 16))
Philipp Maier7e07cf22017-03-15 18:09:35 +010076 c.convert_si16_ps_16n(out, in, len);
Thomas Tsou9471d762013-08-20 21:24:24 -040077 else
Philipp Maier7e07cf22017-03-15 18:09:35 +010078 c.convert_si16_ps(out, in, len);
Thomas Tsou9471d762013-08-20 21:24:24 -040079}