blob: fb180e3d52b1e09f7920d43935ca7bcf267c5745 [file] [log] [blame]
Eric3afc1d12020-07-23 02:16:46 +02001/*! \file conv_acc_neon.c
2 * Accelerated Viterbi decoder implementation
3 * for architectures with only NEON available. */
4/*
5 * (C) 2020 by sysmocom - s.f.m.c. GmbH
6 * Author: Eric Wild
7 *
8 * All Rights Reserved
9 *
10 * SPDX-License-Identifier: GPL-2.0+
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
Eric3afc1d12020-07-23 02:16:46 +020021 */
22
23#include <stdlib.h>
24#include <stdint.h>
25#include <malloc.h>
26#include "config.h"
27
28#if defined(HAVE_NEON)
29#include <arm_neon.h>
30#endif
31
32/* align req is 16 on android because google was confused, 8 on sane platforms */
33#define NEON_ALIGN 8
34
35#include <conv_acc_neon_impl.h>
36
37/* Aligned Memory Allocator
38 * NEON requires 8-byte memory alignment. We store relevant trellis values
39 * (accumulated sums, outputs, and path decisions) as 16 bit signed integers
40 * so the allocated memory is casted as such.
41 */
42__attribute__ ((visibility("hidden")))
43int16_t *osmo_conv_neon_vdec_malloc(size_t n)
44{
45 return (int16_t *) memalign(NEON_ALIGN, sizeof(int16_t) * n);
46}
47
48__attribute__ ((visibility("hidden")))
49void osmo_conv_neon_vdec_free(int16_t *ptr)
50{
51 free(ptr);
52}
53
54__attribute__ ((visibility("hidden")))
55void osmo_conv_neon_metrics_k5_n2(const int8_t *val, const int16_t *out,
56 int16_t *sums, int16_t *paths, int norm)
57{
58 const int16_t _val[4] = { val[0], val[1], val[0], val[1] };
59
60 _neon_metrics_k5_n2(_val, out, sums, paths, norm);
61}
62
63__attribute__ ((visibility("hidden")))
64void osmo_conv_neon_metrics_k5_n3(const int8_t *val, const int16_t *out,
65 int16_t *sums, int16_t *paths, int norm)
66{
67 const int16_t _val[4] = { val[0], val[1], val[2], 0 };
68
69 _neon_metrics_k5_n4(_val, out, sums, paths, norm);
70}
71
72__attribute__ ((visibility("hidden")))
73void osmo_conv_neon_metrics_k5_n4(const int8_t *val, const int16_t *out,
74 int16_t *sums, int16_t *paths, int norm)
75{
76 const int16_t _val[4] = { val[0], val[1], val[2], val[3] };
77
78 _neon_metrics_k5_n4(_val, out, sums, paths, norm);
79}
80
81__attribute__ ((visibility("hidden")))
82void osmo_conv_neon_metrics_k7_n2(const int8_t *val, const int16_t *out,
83 int16_t *sums, int16_t *paths, int norm)
84{
85 const int16_t _val[4] = { val[0], val[1], val[0], val[1] };
86
87 _neon_metrics_k7_n2(_val, out, sums, paths, norm);
88}
89
90__attribute__ ((visibility("hidden")))
91void osmo_conv_neon_metrics_k7_n3(const int8_t *val, const int16_t *out,
92 int16_t *sums, int16_t *paths, int norm)
93{
94 const int16_t _val[4] = { val[0], val[1], val[2], 0 };
95
96 _neon_metrics_k7_n4(_val, out, sums, paths, norm);
97}
98
99__attribute__ ((visibility("hidden")))
100void osmo_conv_neon_metrics_k7_n4(const int8_t *val, const int16_t *out,
101 int16_t *sums, int16_t *paths, int norm)
102{
103 const int16_t _val[4] = { val[0], val[1], val[2], val[3] };
104
105 _neon_metrics_k7_n4(_val, out, sums, paths, norm);
106}