Blame - Transceiver52M/x86/convert.c - osmo-trx

blob: 862a2e738ad1427e2f8fd872f9771a2a0f3b0c5a [file] [log] [blame]

Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	1	/*
				2	* SSE type conversions
				3	* Copyright (C) 2013 Thomas Tsou <tom@tsou.cc>
				4	*
				5	* This library is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU Lesser General Public
				7	* License as published by the Free Software Foundation; either
				8	* version 2.1 of the License, or (at your option) any later version.
				9	*
				10	* This library is distributed in the hope that it will be useful,
				11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				13	* Lesser General Public License for more details.
				14	*
				15	* You should have received a copy of the GNU Lesser General Public
				16	* License along with this library; if not, write to the Free Software
				17	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
				18	*/
				19
				20	#include <malloc.h>
				21	#include <string.h>
Thomas Tsou	17bbb9b	2013-10-30 21:24:40 -0400	[diff] [blame]	22	#include "convert.h"
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	23
				24	#ifdef HAVE_CONFIG_H
				25	#include "config.h"
				26	#endif
				27
				28	#ifdef HAVE_SSE3
				29	#include <xmmintrin.h>
				30	#include <emmintrin.h>
				31
				32	#ifdef HAVE_SSE4_1
				33	#include <smmintrin.h>
				34
				35	/* 16N 16-bit signed integer converted to single precision floats /
				36	static void _sse_convert_si16_ps_16n(float *restrict out,
Tom Tsou	f147b17	2015-03-25 12:55:11 -0700	[diff] [blame]	37	const short *restrict in,
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	38	int len)
				39	{
				40	__m128i m0, m1, m2, m3, m4, m5;
				41	__m128 m6, m7, m8, m9;
				42
				43	for (int i = 0; i < len / 16; i++) {
				44	/* Load (unaligned) packed floats */
				45	m0 = _mm_loadu_si128((__m128i ) &in[16 i + 0]);
				46	m1 = _mm_loadu_si128((__m128i ) &in[16 i + 8]);
				47
				48	/* Unpack */
				49	m2 = _mm_cvtepi16_epi32(m0);
				50	m4 = _mm_cvtepi16_epi32(m1);
				51	m0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2));
				52	m1 = _mm_shuffle_epi32(m1, _MM_SHUFFLE(1, 0, 3, 2));
				53	m3 = _mm_cvtepi16_epi32(m0);
				54	m5 = _mm_cvtepi16_epi32(m1);
				55
				56	/* Convert */
				57	m6 = _mm_cvtepi32_ps(m2);
				58	m7 = _mm_cvtepi32_ps(m3);
				59	m8 = _mm_cvtepi32_ps(m4);
				60	m9 = _mm_cvtepi32_ps(m5);
				61
				62	/* Store */
				63	_mm_storeu_ps(&out[16 * i + 0], m6);
				64	_mm_storeu_ps(&out[16 * i + 4], m7);
				65	_mm_storeu_ps(&out[16 * i + 8], m8);
				66	_mm_storeu_ps(&out[16 * i + 12], m9);
				67	}
				68	}
				69
				70	/* 16N 16-bit signed integer conversion with remainder /
				71	static void _sse_convert_si16_ps(float *restrict out,
Tom Tsou	f147b17	2015-03-25 12:55:11 -0700	[diff] [blame]	72	const short *restrict in,
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	73	int len)
				74	{
				75	int start = len / 16 * 16;
				76
				77	_sse_convert_si16_ps_16n(out, in, len);
				78
				79	for (int i = 0; i < len % 16; i++)
				80	out[start + i] = in[start + i];
				81	}
				82	#endif /* HAVE_SSE4_1 */
				83
				84	/* 8N single precision floats scaled and converted to 16-bit signed integer /
				85	static void _sse_convert_scale_ps_si16_8n(short *restrict out,
Tom Tsou	f147b17	2015-03-25 12:55:11 -0700	[diff] [blame]	86	const float *restrict in,
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	87	float scale, int len)
				88	{
				89	__m128 m0, m1, m2;
				90	__m128i m4, m5;
				91
				92	for (int i = 0; i < len / 8; i++) {
				93	/* Load (unaligned) packed floats */
				94	m0 = _mm_loadu_ps(&in[8 * i + 0]);
				95	m1 = _mm_loadu_ps(&in[8 * i + 4]);
				96	m2 = _mm_load1_ps(&scale);
				97
				98	/* Scale */
				99	m0 = _mm_mul_ps(m0, m2);
				100	m1 = _mm_mul_ps(m1, m2);
				101
				102	/* Convert */
				103	m4 = _mm_cvtps_epi32(m0);
				104	m5 = _mm_cvtps_epi32(m1);
				105
				106	/* Pack and store */
				107	m5 = _mm_packs_epi32(m4, m5);
				108	_mm_storeu_si128((__m128i ) &out[8 i], m5);
				109	}
				110	}
				111
				112	/* 8N single precision floats scaled and converted with remainder /
				113	static void _sse_convert_scale_ps_si16(short *restrict out,
Tom Tsou	f147b17	2015-03-25 12:55:11 -0700	[diff] [blame]	114	const float *restrict in,
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	115	float scale, int len)
				116	{
				117	int start = len / 8 * 8;
				118
				119	_sse_convert_scale_ps_si16_8n(out, in, scale, len);
				120
				121	for (int i = 0; i < len % 8; i++)
				122	out[start + i] = in[start + i] * scale;
				123	}
				124
				125	/* 16N single precision floats scaled and converted to 16-bit signed integer /
				126	static void _sse_convert_scale_ps_si16_16n(short *restrict out,
Tom Tsou	f147b17	2015-03-25 12:55:11 -0700	[diff] [blame]	127	const float *restrict in,
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	128	float scale, int len)
				129	{
				130	__m128 m0, m1, m2, m3, m4;
				131	__m128i m5, m6, m7, m8;
				132
				133	for (int i = 0; i < len / 16; i++) {
				134	/* Load (unaligned) packed floats */
				135	m0 = _mm_loadu_ps(&in[16 * i + 0]);
				136	m1 = _mm_loadu_ps(&in[16 * i + 4]);
				137	m2 = _mm_loadu_ps(&in[16 * i + 8]);
				138	m3 = _mm_loadu_ps(&in[16 * i + 12]);
				139	m4 = _mm_load1_ps(&scale);
				140
				141	/* Scale */
				142	m0 = _mm_mul_ps(m0, m4);
				143	m1 = _mm_mul_ps(m1, m4);
				144	m2 = _mm_mul_ps(m2, m4);
				145	m3 = _mm_mul_ps(m3, m4);
				146
				147	/* Convert */
				148	m5 = _mm_cvtps_epi32(m0);
				149	m6 = _mm_cvtps_epi32(m1);
				150	m7 = _mm_cvtps_epi32(m2);
				151	m8 = _mm_cvtps_epi32(m3);
				152
				153	/* Pack and store */
				154	m5 = _mm_packs_epi32(m5, m6);
				155	m7 = _mm_packs_epi32(m7, m8);
				156	_mm_storeu_si128((__m128i ) &out[16 i + 0], m5);
				157	_mm_storeu_si128((__m128i ) &out[16 i + 8], m7);
				158	}
				159	}
				160	#else /* HAVE_SSE3 */
Tom Tsou	f147b17	2015-03-25 12:55:11 -0700	[diff] [blame]	161	static void convert_scale_ps_si16(short out, const float in,
				162	float scale, int len)
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	163	{
				164	for (int i = 0; i < len; i++)
				165	out[i] = in[i] * scale;
				166	}
				167	#endif
				168
Thomas Tsou	69762fd	2013-11-07 22:54:15 -0500	[diff] [blame]	169	#ifndef HAVE_SSE4_1
Tom Tsou	f147b17	2015-03-25 12:55:11 -0700	[diff] [blame]	170	static void convert_si16_ps(float out, const short in, int len)
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	171	{
				172	for (int i = 0; i < len; i++)
				173	out[i] = in[i];
				174	}
				175	#endif
				176
Tom Tsou	f147b17	2015-03-25 12:55:11 -0700	[diff] [blame]	177	void convert_float_short(short out, const float in, float scale, int len)
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	178	{
Philipp Maier	131f82b	2017-03-15 12:39:25 +0100	[diff] [blame^]	179	void (conv_func)(short , const float *, float, int);
				180
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	181	#ifdef HAVE_SSE3
				182	if (!(len % 16))
Philipp Maier	131f82b	2017-03-15 12:39:25 +0100	[diff] [blame^]	183	conv_func = _sse_convert_scale_ps_si16_16n;
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	184	else if (!(len % 8))
Philipp Maier	131f82b	2017-03-15 12:39:25 +0100	[diff] [blame^]	185	conv_func = _sse_convert_scale_ps_si16_8n;
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	186	else
Philipp Maier	131f82b	2017-03-15 12:39:25 +0100	[diff] [blame^]	187	conv_func = _sse_convert_scale_ps_si16;
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	188	#else
Philipp Maier	131f82b	2017-03-15 12:39:25 +0100	[diff] [blame^]	189	conv_func = convert_scale_ps_si16;
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	190	#endif
Philipp Maier	131f82b	2017-03-15 12:39:25 +0100	[diff] [blame^]	191
				192	conv_func(out, in, scale, len);
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	193	}
				194
Tom Tsou	f147b17	2015-03-25 12:55:11 -0700	[diff] [blame]	195	void convert_short_float(float out, const short in, int len)
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	196	{
Philipp Maier	131f82b	2017-03-15 12:39:25 +0100	[diff] [blame^]	197	void (conv_func) (float , const short *, int);
				198
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	199	#ifdef HAVE_SSE4_1
				200	if (!(len % 16))
Philipp Maier	131f82b	2017-03-15 12:39:25 +0100	[diff] [blame^]	201	conv_func = _sse_convert_si16_ps_16n;
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	202	else
Philipp Maier	131f82b	2017-03-15 12:39:25 +0100	[diff] [blame^]	203	conv_func = _sse_convert_si16_ps;
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	204	#else
Philipp Maier	131f82b	2017-03-15 12:39:25 +0100	[diff] [blame^]	205	conv_func = convert_si16_ps;
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	206	#endif
Philipp Maier	131f82b	2017-03-15 12:39:25 +0100	[diff] [blame^]	207
				208	conv_func(out, in, len);
Thomas Tsou	9471d76	2013-08-20 21:24:24 -0400	[diff] [blame]	209	}