blob: 4bd3b0767c8581a882c00601c5e5d107f0e17ad3 [file] [log] [blame]
Neels Hofmeyr17518fe2017-06-20 04:35:06 +02001/*! \file conv_acc.c
2 * Accelerated Viterbi decoder implementation. */
Tom Tsou35536802016-11-24 19:24:32 +07003/*
Tom Tsou35536802016-11-24 19:24:32 +07004 * Copyright (C) 2013, 2014 Thomas Tsou <tom@tsou.cc>
5 *
6 * All Rights Reserved
7 *
Harald Weltee08da972017-11-13 01:00:26 +09008 * SPDX-License-Identifier: GPL-2.0+
9 *
Tom Tsou35536802016-11-24 19:24:32 +070010 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
Tom Tsou35536802016-11-24 19:24:32 +070019 */
20
21#include <stdlib.h>
22#include <string.h>
23#include <errno.h>
24
Tom Tsou35536802016-11-24 19:24:32 +070025#include "config.h"
26
Tom Tsou34e228a2017-04-29 00:16:43 +070027#include <osmocom/core/conv.h>
28
Tom Tsou35536802016-11-24 19:24:32 +070029#define BIT2NRZ(REG,N) (((REG >> N) & 0x01) * 2 - 1) * -1
30#define NUM_STATES(K) (K == 7 ? 64 : 16)
Tom Tsou35536802016-11-24 19:24:32 +070031
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070032#define INIT_POINTERS(simd) \
33{ \
34 osmo_conv_metrics_k5_n2 = osmo_conv_##simd##_metrics_k5_n2; \
35 osmo_conv_metrics_k5_n3 = osmo_conv_##simd##_metrics_k5_n3; \
36 osmo_conv_metrics_k5_n4 = osmo_conv_##simd##_metrics_k5_n4; \
37 osmo_conv_metrics_k7_n2 = osmo_conv_##simd##_metrics_k7_n2; \
38 osmo_conv_metrics_k7_n3 = osmo_conv_##simd##_metrics_k7_n3; \
39 osmo_conv_metrics_k7_n4 = osmo_conv_##simd##_metrics_k7_n4; \
40 vdec_malloc = &osmo_conv_##simd##_vdec_malloc; \
41 vdec_free = &osmo_conv_##simd##_vdec_free; \
42}
43
Tom Tsou34e228a2017-04-29 00:16:43 +070044static int init_complete = 0;
45
46__attribute__ ((visibility("hidden"))) int avx2_supported = 0;
Harald Welteb93f60f2017-11-17 11:41:34 +010047__attribute__ ((visibility("hidden"))) int ssse3_supported = 0;
Tom Tsou34e228a2017-04-29 00:16:43 +070048__attribute__ ((visibility("hidden"))) int sse41_supported = 0;
49
50/**
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070051 * These pointers are being initialized at runtime by the
52 * osmo_conv_init() depending on supported SIMD extensions.
Tom Tsou34e228a2017-04-29 00:16:43 +070053 */
54static int16_t *(*vdec_malloc)(size_t n);
55static void (*vdec_free)(int16_t *ptr);
56
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070057void (*osmo_conv_metrics_k5_n2)(const int8_t *seq,
58 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
59void (*osmo_conv_metrics_k5_n3)(const int8_t *seq,
60 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
61void (*osmo_conv_metrics_k5_n4)(const int8_t *seq,
62 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
63void (*osmo_conv_metrics_k7_n2)(const int8_t *seq,
64 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
65void (*osmo_conv_metrics_k7_n3)(const int8_t *seq,
66 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
67void (*osmo_conv_metrics_k7_n4)(const int8_t *seq,
68 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
Tom Tsou34e228a2017-04-29 00:16:43 +070069
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070070/* Forward malloc wrappers */
71int16_t *osmo_conv_gen_vdec_malloc(size_t n);
72void osmo_conv_gen_vdec_free(int16_t *ptr);
73
Harald Welteb93f60f2017-11-17 11:41:34 +010074#if defined(HAVE_SSSE3)
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070075int16_t *osmo_conv_sse_vdec_malloc(size_t n);
76void osmo_conv_sse_vdec_free(int16_t *ptr);
77#endif
78
Harald Welteb93f60f2017-11-17 11:41:34 +010079#if defined(HAVE_SSSE3) && defined(HAVE_AVX2)
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070080int16_t *osmo_conv_sse_avx_vdec_malloc(size_t n);
81void osmo_conv_sse_avx_vdec_free(int16_t *ptr);
Tom Tsou34e228a2017-04-29 00:16:43 +070082#endif
83
Eric3afc1d12020-07-23 02:16:46 +020084#ifdef HAVE_NEON
85int16_t *osmo_conv_neon_vdec_malloc(size_t n);
86void osmo_conv_neon_vdec_free(int16_t *ptr);
87#endif
88
Tom Tsou35536802016-11-24 19:24:32 +070089/* Forward Metric Units */
90void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out,
91 int16_t *sums, int16_t *paths, int norm);
92void osmo_conv_gen_metrics_k5_n3(const int8_t *seq, const int16_t *out,
93 int16_t *sums, int16_t *paths, int norm);
94void osmo_conv_gen_metrics_k5_n4(const int8_t *seq, const int16_t *out,
95 int16_t *sums, int16_t *paths, int norm);
96void osmo_conv_gen_metrics_k7_n2(const int8_t *seq, const int16_t *out,
97 int16_t *sums, int16_t *paths, int norm);
98void osmo_conv_gen_metrics_k7_n3(const int8_t *seq, const int16_t *out,
99 int16_t *sums, int16_t *paths, int norm);
100void osmo_conv_gen_metrics_k7_n4(const int8_t *seq, const int16_t *out,
101 int16_t *sums, int16_t *paths, int norm);
102
Harald Welteb93f60f2017-11-17 11:41:34 +0100103#if defined(HAVE_SSSE3)
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700104void osmo_conv_sse_metrics_k5_n2(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700105 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700106void osmo_conv_sse_metrics_k5_n3(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700107 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700108void osmo_conv_sse_metrics_k5_n4(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700109 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700110void osmo_conv_sse_metrics_k7_n2(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700111 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700112void osmo_conv_sse_metrics_k7_n3(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700113 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700114void osmo_conv_sse_metrics_k7_n4(const int8_t *seq, const int16_t *out,
115 int16_t *sums, int16_t *paths, int norm);
116#endif
117
Harald Welteb93f60f2017-11-17 11:41:34 +0100118#if defined(HAVE_SSSE3) && defined(HAVE_AVX2)
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700119void osmo_conv_sse_avx_metrics_k5_n2(const int8_t *seq, const int16_t *out,
120 int16_t *sums, int16_t *paths, int norm);
121void osmo_conv_sse_avx_metrics_k5_n3(const int8_t *seq, const int16_t *out,
122 int16_t *sums, int16_t *paths, int norm);
123void osmo_conv_sse_avx_metrics_k5_n4(const int8_t *seq, const int16_t *out,
124 int16_t *sums, int16_t *paths, int norm);
125void osmo_conv_sse_avx_metrics_k7_n2(const int8_t *seq, const int16_t *out,
126 int16_t *sums, int16_t *paths, int norm);
127void osmo_conv_sse_avx_metrics_k7_n3(const int8_t *seq, const int16_t *out,
128 int16_t *sums, int16_t *paths, int norm);
129void osmo_conv_sse_avx_metrics_k7_n4(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700130 int16_t *sums, int16_t *paths, int norm);
131#endif
132
Eric3afc1d12020-07-23 02:16:46 +0200133#if defined(HAVE_NEON)
134void osmo_conv_neon_metrics_k5_n2(const int8_t *seq, const int16_t *out,
135 int16_t *sums, int16_t *paths, int norm);
136void osmo_conv_neon_metrics_k5_n3(const int8_t *seq, const int16_t *out,
137 int16_t *sums, int16_t *paths, int norm);
138void osmo_conv_neon_metrics_k5_n4(const int8_t *seq, const int16_t *out,
139 int16_t *sums, int16_t *paths, int norm);
140void osmo_conv_neon_metrics_k7_n2(const int8_t *seq, const int16_t *out,
141 int16_t *sums, int16_t *paths, int norm);
142void osmo_conv_neon_metrics_k7_n3(const int8_t *seq, const int16_t *out,
143 int16_t *sums, int16_t *paths, int norm);
144void osmo_conv_neon_metrics_k7_n4(const int8_t *seq, const int16_t *out,
145 int16_t *sums, int16_t *paths, int norm);
146#endif
147
Tom Tsou35536802016-11-24 19:24:32 +0700148/* Trellis State
149 * state - Internal lshift register value
150 * prev - Register values of previous 0 and 1 states
151 */
152struct vstate {
153 unsigned state;
154 unsigned prev[2];
155};
156
157/* Trellis Object
158 * num_states - Number of states in the trellis
159 * sums - Accumulated path metrics
160 * outputs - Trellis output values
161 * vals - Input value that led to each state
162 */
163struct vtrellis {
164 int num_states;
165 int16_t *sums;
166 int16_t *outputs;
167 uint8_t *vals;
168};
169
170/* Viterbi Decoder
171 * n - Code order
172 * k - Constraint length
173 * len - Horizontal length of trellis
174 * recursive - Set to '1' if the code is recursive
175 * intrvl - Normalization interval
176 * trellis - Trellis object
Tom Tsou35536802016-11-24 19:24:32 +0700177 * paths - Trellis paths
178 */
179struct vdecoder {
180 int n;
181 int k;
182 int len;
183 int recursive;
184 int intrvl;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700185 struct vtrellis trellis;
Tom Tsou35536802016-11-24 19:24:32 +0700186 int16_t **paths;
187
188 void (*metric_func)(const int8_t *, const int16_t *,
189 int16_t *, int16_t *, int);
190};
191
Tom Tsou35536802016-11-24 19:24:32 +0700192/* Accessor calls */
193static inline int conv_code_recursive(const struct osmo_conv_code *code)
194{
195 return code->next_term_output ? 1 : 0;
196}
197
198/* Left shift and mask for finding the previous state */
199static unsigned vstate_lshift(unsigned reg, int k, int val)
200{
201 unsigned mask;
202
203 if (k == 5)
204 mask = 0x0e;
205 else if (k == 7)
206 mask = 0x3e;
207 else
208 mask = 0;
209
210 return ((reg << 1) & mask) | val;
211}
212
213/* Bit endian manipulators */
214static inline unsigned bitswap2(unsigned v)
215{
216 return ((v & 0x02) >> 1) | ((v & 0x01) << 1);
217}
218
219static inline unsigned bitswap3(unsigned v)
220{
221 return ((v & 0x04) >> 2) | ((v & 0x02) >> 0) |
222 ((v & 0x01) << 2);
223}
224
225static inline unsigned bitswap4(unsigned v)
226{
227 return ((v & 0x08) >> 3) | ((v & 0x04) >> 1) |
228 ((v & 0x02) << 1) | ((v & 0x01) << 3);
229}
230
231static inline unsigned bitswap5(unsigned v)
232{
233 return ((v & 0x10) >> 4) | ((v & 0x08) >> 2) | ((v & 0x04) >> 0) |
234 ((v & 0x02) << 2) | ((v & 0x01) << 4);
235}
236
237static inline unsigned bitswap6(unsigned v)
238{
239 return ((v & 0x20) >> 5) | ((v & 0x10) >> 3) | ((v & 0x08) >> 1) |
240 ((v & 0x04) << 1) | ((v & 0x02) << 3) | ((v & 0x01) << 5);
241}
242
243static unsigned bitswap(unsigned v, unsigned n)
244{
245 switch (n) {
246 case 1:
247 return v;
248 case 2:
249 return bitswap2(v);
250 case 3:
251 return bitswap3(v);
252 case 4:
253 return bitswap4(v);
254 case 5:
255 return bitswap5(v);
256 case 6:
257 return bitswap6(v);
258 default:
259 return 0;
260 }
261}
262
263/* Generate non-recursive state output from generator state table
264 * Note that the shift register moves right (i.e. the most recent bit is
265 * shifted into the register at k-1 bit of the register), which is typical
266 * textbook representation. The API transition table expects the most recent
267 * bit in the low order bit, or left shift. A bitswap operation is required
268 * to accommodate the difference.
269 */
270static unsigned gen_output(struct vstate *state, int val,
271 const struct osmo_conv_code *code)
272{
273 unsigned out, prev;
274
275 prev = bitswap(state->prev[0], code->K - 1);
276 out = code->next_output[prev][val];
277 out = bitswap(out, code->N);
278
279 return out;
280}
281
282/* Populate non-recursive trellis state
283 * For a given state defined by the k-1 length shift register, find the
284 * value of the input bit that drove the trellis to that state. Also
285 * generate the N outputs of the generator polynomial at that state.
286 */
287static int gen_state_info(uint8_t *val, unsigned reg,
288 int16_t *output, const struct osmo_conv_code *code)
289{
290 int i;
291 unsigned out;
292 struct vstate state;
293
294 /* Previous '0' state */
295 state.state = reg;
296 state.prev[0] = vstate_lshift(reg, code->K, 0);
297 state.prev[1] = vstate_lshift(reg, code->K, 1);
298
299 *val = (reg >> (code->K - 2)) & 0x01;
300
301 /* Transition output */
302 out = gen_output(&state, *val, code);
303
304 /* Unpack to NRZ */
305 for (i = 0; i < code->N; i++)
306 output[i] = BIT2NRZ(out, i);
307
308 return 0;
309}
310
311/* Generate recursive state output from generator state table */
312static unsigned gen_recursive_output(struct vstate *state,
313 uint8_t *val, unsigned reg,
314 const struct osmo_conv_code *code, int pos)
315{
316 int val0, val1;
317 unsigned out, prev;
318
319 /* Previous '0' state */
320 prev = vstate_lshift(reg, code->K, 0);
321 prev = bitswap(prev, code->K - 1);
322
323 /* Input value */
324 val0 = (reg >> (code->K - 2)) & 0x01;
325 val1 = (code->next_term_output[prev] >> pos) & 0x01;
326 *val = val0 == val1 ? 0 : 1;
327
328 /* Wrapper for osmocom state access */
329 prev = bitswap(state->prev[0], code->K - 1);
330
331 /* Compute the transition output */
332 out = code->next_output[prev][*val];
333 out = bitswap(out, code->N);
334
335 return out;
336}
337
338/* Populate recursive trellis state
339 * The bit position of the systematic bit is not explicitly marked by the
340 * API, so it must be extracted from the generator table. Otherwise,
341 * populate the trellis similar to the non-recursive version.
342 * Non-systematic recursive codes are not supported.
343 */
344static int gen_recursive_state_info(uint8_t *val,
345 unsigned reg, int16_t *output, const struct osmo_conv_code *code)
346{
347 int i, j, pos = -1;
348 int ns = NUM_STATES(code->K);
349 unsigned out;
350 struct vstate state;
351
352 /* Previous '0' and '1' states */
353 state.state = reg;
354 state.prev[0] = vstate_lshift(reg, code->K, 0);
355 state.prev[1] = vstate_lshift(reg, code->K, 1);
356
357 /* Find recursive bit location */
358 for (i = 0; i < code->N; i++) {
359 for (j = 0; j < ns; j++) {
360 if ((code->next_output[j][0] >> i) & 0x01)
361 break;
362 }
363
364 if (j == ns) {
365 pos = i;
366 break;
367 }
368 }
369
370 /* Non-systematic recursive code not supported */
371 if (pos < 0)
372 return -EPROTO;
373
374 /* Transition output */
375 out = gen_recursive_output(&state, val, reg, code, pos);
376
377 /* Unpack to NRZ */
378 for (i = 0; i < code->N; i++)
379 output[i] = BIT2NRZ(out, i);
380
381 return 0;
382}
383
384/* Release the trellis */
385static void free_trellis(struct vtrellis *trellis)
386{
387 if (!trellis)
388 return;
389
Tom Tsou34e228a2017-04-29 00:16:43 +0700390 vdec_free(trellis->outputs);
391 vdec_free(trellis->sums);
Tom Tsou35536802016-11-24 19:24:32 +0700392 free(trellis->vals);
Tom Tsou35536802016-11-24 19:24:32 +0700393}
394
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700395/* Initialize the trellis object
Tom Tsou35536802016-11-24 19:24:32 +0700396 * Initialization consists of generating the outputs and output value of a
397 * given state. Due to trellis symmetry and anti-symmetry, only one of the
398 * transition paths is utilized by the butterfly operation in the forward
399 * recursion, so only one set of N outputs is required per state variable.
400 */
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700401static int generate_trellis(struct vdecoder *dec,
402 const struct osmo_conv_code *code)
Tom Tsou35536802016-11-24 19:24:32 +0700403{
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700404 struct vtrellis *trellis = &dec->trellis;
Tom Tsou35536802016-11-24 19:24:32 +0700405 int16_t *outputs;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700406 int i, rc;
Tom Tsou35536802016-11-24 19:24:32 +0700407
408 int ns = NUM_STATES(code->K);
Tom Tsou35536802016-11-24 19:24:32 +0700409 int olen = (code->N == 2) ? 2 : 4;
410
Tom Tsou35536802016-11-24 19:24:32 +0700411 trellis->num_states = ns;
412 trellis->sums = vdec_malloc(ns);
413 trellis->outputs = vdec_malloc(ns * olen);
414 trellis->vals = (uint8_t *) malloc(ns * sizeof(uint8_t));
415
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700416 if (!trellis->sums || !trellis->outputs || !trellis->vals) {
417 rc = -ENOMEM;
Tom Tsou35536802016-11-24 19:24:32 +0700418 goto fail;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700419 }
Tom Tsou35536802016-11-24 19:24:32 +0700420
421 /* Populate the trellis state objects */
422 for (i = 0; i < ns; i++) {
423 outputs = &trellis->outputs[olen * i];
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700424 if (dec->recursive) {
Tom Tsou35536802016-11-24 19:24:32 +0700425 rc = gen_recursive_state_info(&trellis->vals[i],
426 i, outputs, code);
427 } else {
428 rc = gen_state_info(&trellis->vals[i],
429 i, outputs, code);
430 }
Vadim Yanitskiy34da6b52017-06-19 05:41:49 +0700431
432 if (rc < 0)
433 goto fail;
434
435 /* Set accumulated path metrics to zero */
436 trellis->sums[i] = 0;
Tom Tsou35536802016-11-24 19:24:32 +0700437 }
438
Vadim Yanitskiy34da6b52017-06-19 05:41:49 +0700439 /**
440 * For termination other than tail-biting, initialize the zero state
441 * as the encoder starting state. Initialize with the maximum
442 * accumulated sum at length equal to the constraint length.
443 */
444 if (code->term != CONV_TERM_TAIL_BITING)
445 trellis->sums[0] = INT8_MAX * code->N * code->K;
Tom Tsou35536802016-11-24 19:24:32 +0700446
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700447 return 0;
Tom Tsou35536802016-11-24 19:24:32 +0700448
449fail:
450 free_trellis(trellis);
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700451 return rc;
Tom Tsou35536802016-11-24 19:24:32 +0700452}
453
Tom Tsou35536802016-11-24 19:24:32 +0700454static void _traceback(struct vdecoder *dec,
455 unsigned state, uint8_t *out, int len)
456{
457 int i;
458 unsigned path;
459
460 for (i = len - 1; i >= 0; i--) {
461 path = dec->paths[i][state] + 1;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700462 out[i] = dec->trellis.vals[state];
Tom Tsou35536802016-11-24 19:24:32 +0700463 state = vstate_lshift(state, dec->k, path);
464 }
465}
466
467static void _traceback_rec(struct vdecoder *dec,
468 unsigned state, uint8_t *out, int len)
469{
470 int i;
471 unsigned path;
472
473 for (i = len - 1; i >= 0; i--) {
474 path = dec->paths[i][state] + 1;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700475 out[i] = path ^ dec->trellis.vals[state];
Tom Tsou35536802016-11-24 19:24:32 +0700476 state = vstate_lshift(state, dec->k, path);
477 }
478}
479
480/* Traceback and generate decoded output
481 * Find the largest accumulated path metric at the final state except for
482 * the zero terminated case, where we assume the final state is always zero.
483 */
484static int traceback(struct vdecoder *dec, uint8_t *out, int term, int len)
485{
Sylvain Munautd5974e92021-12-21 19:45:34 +0100486 int i, j, sum, max = -1;
487 unsigned path, state = 0, state_scan;
Tom Tsou35536802016-11-24 19:24:32 +0700488
Sylvain Munautd5974e92021-12-21 19:45:34 +0100489 if (term == CONV_TERM_TAIL_BITING) {
490 for (i = 0; i < dec->trellis.num_states; i++) {
491 state_scan = i;
492 for (j = len - 1; j >= 0; j--) {
493 path = dec->paths[j][state_scan] + 1;
494 state_scan = vstate_lshift(state_scan, dec->k, path);
495 }
496 if (state_scan != i)
497 continue;
498 sum = dec->trellis.sums[i];
499 if (sum > max) {
500 max = sum;
501 state = i;
502 }
503 }
504 }
505
506 if ((max < 0) && (term != CONV_TERM_FLUSH)) {
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700507 for (i = 0; i < dec->trellis.num_states; i++) {
508 sum = dec->trellis.sums[i];
Tom Tsou35536802016-11-24 19:24:32 +0700509 if (sum > max) {
510 max = sum;
511 state = i;
512 }
513 }
514
515 if (max < 0)
516 return -EPROTO;
517 }
518
519 for (i = dec->len - 1; i >= len; i--) {
520 path = dec->paths[i][state] + 1;
521 state = vstate_lshift(state, dec->k, path);
522 }
523
524 if (dec->recursive)
525 _traceback_rec(dec, state, out, len);
526 else
527 _traceback(dec, state, out, len);
528
529 return 0;
530}
531
532/* Release decoder object */
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700533static void vdec_deinit(struct vdecoder *dec)
Tom Tsou35536802016-11-24 19:24:32 +0700534{
535 if (!dec)
536 return;
537
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700538 free_trellis(&dec->trellis);
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700539
540 if (dec->paths != NULL) {
541 vdec_free(dec->paths[0]);
542 free(dec->paths);
543 }
Tom Tsou35536802016-11-24 19:24:32 +0700544}
545
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700546/* Initialize decoder object with code specific params
Tom Tsou35536802016-11-24 19:24:32 +0700547 * Subtract the constraint length K on the normalization interval to
548 * accommodate the initialization path metric at state zero.
549 */
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700550static int vdec_init(struct vdecoder *dec, const struct osmo_conv_code *code)
Tom Tsou35536802016-11-24 19:24:32 +0700551{
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700552 int i, ns, rc;
Tom Tsou35536802016-11-24 19:24:32 +0700553
554 ns = NUM_STATES(code->K);
555
Tom Tsou35536802016-11-24 19:24:32 +0700556 dec->n = code->N;
557 dec->k = code->K;
558 dec->recursive = conv_code_recursive(code);
559 dec->intrvl = INT16_MAX / (dec->n * INT8_MAX) - dec->k;
560
561 if (dec->k == 5) {
562 switch (dec->n) {
563 case 2:
Eric3afc1d12020-07-23 02:16:46 +0200564/* rach len 14 is too short for neon */
565#ifdef HAVE_NEON
566 if (code->len < 100)
567 dec->metric_func = osmo_conv_gen_metrics_k5_n2;
568 else
569#endif
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700570 dec->metric_func = osmo_conv_metrics_k5_n2;
Tom Tsou35536802016-11-24 19:24:32 +0700571 break;
572 case 3:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700573 dec->metric_func = osmo_conv_metrics_k5_n3;
Tom Tsou35536802016-11-24 19:24:32 +0700574 break;
575 case 4:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700576 dec->metric_func = osmo_conv_metrics_k5_n4;
Tom Tsou35536802016-11-24 19:24:32 +0700577 break;
578 default:
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700579 return -EINVAL;
Tom Tsou35536802016-11-24 19:24:32 +0700580 }
581 } else if (dec->k == 7) {
582 switch (dec->n) {
583 case 2:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700584 dec->metric_func = osmo_conv_metrics_k7_n2;
Tom Tsou35536802016-11-24 19:24:32 +0700585 break;
586 case 3:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700587 dec->metric_func = osmo_conv_metrics_k7_n3;
Tom Tsou35536802016-11-24 19:24:32 +0700588 break;
589 case 4:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700590 dec->metric_func = osmo_conv_metrics_k7_n4;
Tom Tsou35536802016-11-24 19:24:32 +0700591 break;
592 default:
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700593 return -EINVAL;
Tom Tsou35536802016-11-24 19:24:32 +0700594 }
595 } else {
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700596 return -EINVAL;
Tom Tsou35536802016-11-24 19:24:32 +0700597 }
598
599 if (code->term == CONV_TERM_FLUSH)
600 dec->len = code->len + code->K - 1;
601 else
602 dec->len = code->len;
603
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700604 rc = generate_trellis(dec, code);
605 if (rc)
606 return rc;
Tom Tsou35536802016-11-24 19:24:32 +0700607
608 dec->paths = (int16_t **) malloc(sizeof(int16_t *) * dec->len);
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700609 if (!dec->paths)
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700610 goto enomem;
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700611
Tom Tsou35536802016-11-24 19:24:32 +0700612 dec->paths[0] = vdec_malloc(ns * dec->len);
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700613 if (!dec->paths[0])
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700614 goto enomem;
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700615
Tom Tsou35536802016-11-24 19:24:32 +0700616 for (i = 1; i < dec->len; i++)
617 dec->paths[i] = &dec->paths[0][i * ns];
618
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700619 return 0;
Tom Tsou35536802016-11-24 19:24:32 +0700620
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700621enomem:
622 vdec_deinit(dec);
623 return -ENOMEM;
Tom Tsou35536802016-11-24 19:24:32 +0700624}
625
626/* Depuncture sequence with nagative value terminated puncturing matrix */
627static int depuncture(const int8_t *in, const int *punc, int8_t *out, int len)
628{
629 int i, n = 0, m = 0;
630
631 for (i = 0; i < len; i++) {
632 if (i == punc[n]) {
633 out[i] = 0;
634 n++;
635 continue;
636 }
637
638 out[i] = in[m++];
639 }
640
641 return 0;
642}
643
644/* Forward trellis recursion
645 * Generate branch metrics and path metrics with a combined function. Only
646 * accumulated path metric sums and path selections are stored. Normalize on
647 * the interval specified by the decoder.
648 */
649static void forward_traverse(struct vdecoder *dec, const int8_t *seq)
650{
Tom Tsou35536802016-11-24 19:24:32 +0700651 int i;
652
653 for (i = 0; i < dec->len; i++) {
654 dec->metric_func(&seq[dec->n * i],
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700655 dec->trellis.outputs,
656 dec->trellis.sums,
Tom Tsou35536802016-11-24 19:24:32 +0700657 dec->paths[i],
658 !(i % dec->intrvl));
659 }
660}
661
662/* Convolutional decode with a decoder object
663 * Initial puncturing run if necessary followed by the forward recursion.
664 * For tail-biting perform a second pass before running the backward
665 * traceback operation.
666 */
667static int conv_decode(struct vdecoder *dec, const int8_t *seq,
668 const int *punc, uint8_t *out, int len, int term)
669{
670 int8_t depunc[dec->len * dec->n];
671
Tom Tsou35536802016-11-24 19:24:32 +0700672 if (punc) {
673 depuncture(seq, punc, depunc, dec->len * dec->n);
674 seq = depunc;
675 }
676
677 /* Propagate through the trellis with interval normalization */
678 forward_traverse(dec, seq);
679
680 if (term == CONV_TERM_TAIL_BITING)
681 forward_traverse(dec, seq);
682
683 return traceback(dec, out, term, len);
684}
685
Tom Tsou34e228a2017-04-29 00:16:43 +0700686static void osmo_conv_init(void)
687{
688 init_complete = 1;
689
690#ifdef HAVE___BUILTIN_CPU_SUPPORTS
691 /* Detect CPU capabilities */
692 #ifdef HAVE_AVX2
693 avx2_supported = __builtin_cpu_supports("avx2");
694 #endif
695
Harald Welteb93f60f2017-11-17 11:41:34 +0100696 #ifdef HAVE_SSSE3
697 ssse3_supported = __builtin_cpu_supports("ssse3");
Tom Tsou34e228a2017-04-29 00:16:43 +0700698 #endif
699
700 #ifdef HAVE_SSE4_1
701 sse41_supported = __builtin_cpu_supports("sse4.1");
702 #endif
703#endif
704
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700705/**
706 * Usage of curly braces is mandatory,
707 * because we use multi-line define.
708 */
Harald Welteb93f60f2017-11-17 11:41:34 +0100709#if defined(HAVE_SSSE3) && defined(HAVE_AVX2)
710 if (ssse3_supported && avx2_supported) {
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700711 INIT_POINTERS(sse_avx);
Harald Welteb93f60f2017-11-17 11:41:34 +0100712 } else if (ssse3_supported) {
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700713 INIT_POINTERS(sse);
714 } else {
715 INIT_POINTERS(gen);
716 }
Harald Welteb93f60f2017-11-17 11:41:34 +0100717#elif defined(HAVE_SSSE3)
718 if (ssse3_supported) {
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700719 INIT_POINTERS(sse);
720 } else {
721 INIT_POINTERS(gen);
722 }
Eric3afc1d12020-07-23 02:16:46 +0200723#elif defined(HAVE_NEON)
724 INIT_POINTERS(neon);
Tom Tsou34e228a2017-04-29 00:16:43 +0700725#else
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700726 INIT_POINTERS(gen);
Tom Tsou34e228a2017-04-29 00:16:43 +0700727#endif
728}
729
Tom Tsou35536802016-11-24 19:24:32 +0700730/* All-in-one Viterbi decoding */
731int osmo_conv_decode_acc(const struct osmo_conv_code *code,
732 const sbit_t *input, ubit_t *output)
733{
734 int rc;
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700735 struct vdecoder dec;
Tom Tsou35536802016-11-24 19:24:32 +0700736
Tom Tsou34e228a2017-04-29 00:16:43 +0700737 if (!init_complete)
738 osmo_conv_init();
739
Tom Tsou35536802016-11-24 19:24:32 +0700740 if ((code->N < 2) || (code->N > 4) || (code->len < 1) ||
741 ((code->K != 5) && (code->K != 7)))
742 return -EINVAL;
743
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700744 rc = vdec_init(&dec, code);
745 if (rc)
746 return rc;
Tom Tsou35536802016-11-24 19:24:32 +0700747
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700748 rc = conv_decode(&dec, input, code->puncture,
Tom Tsou35536802016-11-24 19:24:32 +0700749 output, code->len, code->term);
750
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700751 vdec_deinit(&dec);
Tom Tsou35536802016-11-24 19:24:32 +0700752
753 return rc;
754}