blob: 28876738b790eeda0231f2193dc748e19f031373 [file] [log] [blame]
Neels Hofmeyr17518fe2017-06-20 04:35:06 +02001/*! \file conv_acc_generic.c
Vadim Yanitskiy46e533c2017-06-19 18:21:02 +07002 * Accelerated Viterbi decoder implementation
Neels Hofmeyr17518fe2017-06-20 04:35:06 +02003 * for generic architectures without SSE support. */
4/*
Tom Tsou35536802016-11-24 19:24:32 +07005 * Copyright (C) 2013, 2014 Thomas Tsou <tom@tsou.cc>
6 *
7 * All Rights Reserved
8 *
Harald Weltee08da972017-11-13 01:00:26 +09009 * SPDX-License-Identifier: GPL-2.0+
10 *
Tom Tsou35536802016-11-24 19:24:32 +070011 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License along
22 * with this program; if not, write to the Free Software Foundation, Inc.,
23 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 */
25
Tom Tsou34e228a2017-04-29 00:16:43 +070026#include <stdlib.h>
Tom Tsou35536802016-11-24 19:24:32 +070027#include <stdint.h>
28#include <string.h>
29
30/* Add-Compare-Select (ACS-Butterfly)
31 * Compute 4 accumulated path metrics and 4 path selections. Note that path
32 * selections are store as -1 and 0 rather than 0 and 1. This is to match
33 * the output format of the SSE packed compare instruction 'pmaxuw'.
34 */
35
36static void acs_butterfly(int state, int num_states,
37 int16_t metric, int16_t *sum,
38 int16_t *new_sum, int16_t *path)
39{
40 int state0, state1;
41 int sum0, sum1, sum2, sum3;
42
43 state0 = *(sum + (2 * state + 0));
44 state1 = *(sum + (2 * state + 1));
45
46 sum0 = state0 + metric;
47 sum1 = state1 - metric;
48 sum2 = state0 - metric;
49 sum3 = state1 + metric;
50
51 if (sum0 >= sum1) {
52 *new_sum = sum0;
53 *path = -1;
54 } else {
55 *new_sum = sum1;
56 *path = 0;
57 }
58
59 if (sum2 >= sum3) {
60 *(new_sum + num_states / 2) = sum2;
61 *(path + num_states / 2) = -1;
62 } else {
63 *(new_sum + num_states / 2) = sum3;
64 *(path + num_states / 2) = 0;
65 }
66}
67
68/* Branch metrics unit N=2 */
69static void gen_branch_metrics_n2(int num_states, const int8_t *seq,
70 const int16_t *out, int16_t *metrics)
71{
72 int i;
73
74 for (i = 0; i < num_states / 2; i++) {
75 metrics[i] = seq[0] * out[2 * i + 0] +
76 seq[1] * out[2 * i + 1];
77 }
78}
79
80/* Branch metrics unit N=3 */
81static void gen_branch_metrics_n3(int num_states, const int8_t *seq,
82 const int16_t *out, int16_t *metrics)
83{
84 int i;
85
86 for (i = 0; i < num_states / 2; i++) {
87 metrics[i] = seq[0] * out[4 * i + 0] +
88 seq[1] * out[4 * i + 1] +
89 seq[2] * out[4 * i + 2];
90 }
91}
92
93/* Branch metrics unit N=4 */
94static void gen_branch_metrics_n4(int num_states, const int8_t *seq,
95 const int16_t *out, int16_t *metrics)
96{
97 int i;
98
99 for (i = 0; i < num_states / 2; i++) {
100 metrics[i] = seq[0] * out[4 * i + 0] +
101 seq[1] * out[4 * i + 1] +
102 seq[2] * out[4 * i + 2] +
103 seq[3] * out[4 * i + 3];
104 }
105}
106
107/* Path metric unit */
108static void gen_path_metrics(int num_states, int16_t *sums,
109 int16_t *metrics, int16_t *paths, int norm)
110{
111 int i;
112 int16_t min;
113 int16_t new_sums[num_states];
114
115 for (i = 0; i < num_states / 2; i++)
116 acs_butterfly(i, num_states, metrics[i],
117 sums, &new_sums[i], &paths[i]);
118
119 if (norm) {
120 min = new_sums[0];
121
122 for (i = 1; i < num_states; i++)
123 if (new_sums[i] < min)
124 min = new_sums[i];
125
126 for (i = 0; i < num_states; i++)
127 new_sums[i] -= min;
128 }
129
130 memcpy(sums, new_sums, num_states * sizeof(int16_t));
131}
132
Tom Tsou34e228a2017-04-29 00:16:43 +0700133/* Not-aligned Memory Allocator */
134__attribute__ ((visibility("hidden")))
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700135int16_t *osmo_conv_gen_vdec_malloc(size_t n)
Tom Tsou34e228a2017-04-29 00:16:43 +0700136{
137 return (int16_t *) malloc(sizeof(int16_t) * n);
138}
139
140__attribute__ ((visibility("hidden")))
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700141void osmo_conv_gen_vdec_free(int16_t *ptr)
Tom Tsou34e228a2017-04-29 00:16:43 +0700142{
143 free(ptr);
144}
145
Tom Tsou35536802016-11-24 19:24:32 +0700146/* 16-state branch-path metrics units (K=5) */
147__attribute__ ((visibility("hidden")))
148void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out,
149 int16_t *sums, int16_t *paths, int norm)
150{
151 int16_t metrics[8];
152
153 gen_branch_metrics_n2(16, seq, out, metrics);
154 gen_path_metrics(16, sums, metrics, paths, norm);
155}
156
157__attribute__ ((visibility("hidden")))
158void osmo_conv_gen_metrics_k5_n3(const int8_t *seq, const int16_t *out,
159 int16_t *sums, int16_t *paths, int norm)
160{
161 int16_t metrics[8];
162
163 gen_branch_metrics_n3(16, seq, out, metrics);
164 gen_path_metrics(16, sums, metrics, paths, norm);
165
166}
167
168__attribute__ ((visibility("hidden")))
169void osmo_conv_gen_metrics_k5_n4(const int8_t *seq, const int16_t *out,
170 int16_t *sums, int16_t *paths, int norm)
171{
172 int16_t metrics[8];
173
174 gen_branch_metrics_n4(16, seq, out, metrics);
175 gen_path_metrics(16, sums, metrics, paths, norm);
176
177}
178
179/* 64-state branch-path metrics units (K=7) */
180__attribute__ ((visibility("hidden")))
181void osmo_conv_gen_metrics_k7_n2(const int8_t *seq, const int16_t *out,
182 int16_t *sums, int16_t *paths, int norm)
183{
184 int16_t metrics[32];
185
186 gen_branch_metrics_n2(64, seq, out, metrics);
187 gen_path_metrics(64, sums, metrics, paths, norm);
188
189}
190
191__attribute__ ((visibility("hidden")))
192void osmo_conv_gen_metrics_k7_n3(const int8_t *seq, const int16_t *out,
193 int16_t *sums, int16_t *paths, int norm)
194{
195 int16_t metrics[32];
196
197 gen_branch_metrics_n3(64, seq, out, metrics);
198 gen_path_metrics(64, sums, metrics, paths, norm);
199
200}
201
202__attribute__ ((visibility("hidden")))
203void osmo_conv_gen_metrics_k7_n4(const int8_t *seq, const int16_t *out,
204 int16_t *sums, int16_t *paths, int norm)
205{
206 int16_t metrics[32];
207
208 gen_branch_metrics_n4(64, seq, out, metrics);
209 gen_path_metrics(64, sums, metrics, paths, norm);
210}