blob: 0f6f7ca253c8412ca427f65254fbf1ef87b19a2a [file] [log] [blame]
Neels Hofmeyr17518fe2017-06-20 04:35:06 +02001/*! \file conv_acc.c
2 * Accelerated Viterbi decoder implementation. */
Tom Tsou35536802016-11-24 19:24:32 +07003/*
Tom Tsou35536802016-11-24 19:24:32 +07004 * Copyright (C) 2013, 2014 Thomas Tsou <tom@tsou.cc>
5 *
6 * All Rights Reserved
7 *
Harald Weltee08da972017-11-13 01:00:26 +09008 * SPDX-License-Identifier: GPL-2.0+
9 *
Tom Tsou35536802016-11-24 19:24:32 +070010 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with this program; if not, write to the Free Software Foundation, Inc.,
22 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 */
24
25#include <stdlib.h>
26#include <string.h>
27#include <errno.h>
28
Tom Tsou35536802016-11-24 19:24:32 +070029#include "config.h"
30
Tom Tsou34e228a2017-04-29 00:16:43 +070031#include <osmocom/core/conv.h>
32
Tom Tsou35536802016-11-24 19:24:32 +070033#define BIT2NRZ(REG,N) (((REG >> N) & 0x01) * 2 - 1) * -1
34#define NUM_STATES(K) (K == 7 ? 64 : 16)
Tom Tsou35536802016-11-24 19:24:32 +070035
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070036#define INIT_POINTERS(simd) \
37{ \
38 osmo_conv_metrics_k5_n2 = osmo_conv_##simd##_metrics_k5_n2; \
39 osmo_conv_metrics_k5_n3 = osmo_conv_##simd##_metrics_k5_n3; \
40 osmo_conv_metrics_k5_n4 = osmo_conv_##simd##_metrics_k5_n4; \
41 osmo_conv_metrics_k7_n2 = osmo_conv_##simd##_metrics_k7_n2; \
42 osmo_conv_metrics_k7_n3 = osmo_conv_##simd##_metrics_k7_n3; \
43 osmo_conv_metrics_k7_n4 = osmo_conv_##simd##_metrics_k7_n4; \
44 vdec_malloc = &osmo_conv_##simd##_vdec_malloc; \
45 vdec_free = &osmo_conv_##simd##_vdec_free; \
46}
47
Tom Tsou34e228a2017-04-29 00:16:43 +070048static int init_complete = 0;
49
50__attribute__ ((visibility("hidden"))) int avx2_supported = 0;
Harald Welteb93f60f2017-11-17 11:41:34 +010051__attribute__ ((visibility("hidden"))) int ssse3_supported = 0;
Tom Tsou34e228a2017-04-29 00:16:43 +070052__attribute__ ((visibility("hidden"))) int sse41_supported = 0;
53
54/**
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070055 * These pointers are being initialized at runtime by the
56 * osmo_conv_init() depending on supported SIMD extensions.
Tom Tsou34e228a2017-04-29 00:16:43 +070057 */
58static int16_t *(*vdec_malloc)(size_t n);
59static void (*vdec_free)(int16_t *ptr);
60
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070061void (*osmo_conv_metrics_k5_n2)(const int8_t *seq,
62 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
63void (*osmo_conv_metrics_k5_n3)(const int8_t *seq,
64 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
65void (*osmo_conv_metrics_k5_n4)(const int8_t *seq,
66 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
67void (*osmo_conv_metrics_k7_n2)(const int8_t *seq,
68 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
69void (*osmo_conv_metrics_k7_n3)(const int8_t *seq,
70 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
71void (*osmo_conv_metrics_k7_n4)(const int8_t *seq,
72 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
Tom Tsou34e228a2017-04-29 00:16:43 +070073
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070074/* Forward malloc wrappers */
75int16_t *osmo_conv_gen_vdec_malloc(size_t n);
76void osmo_conv_gen_vdec_free(int16_t *ptr);
77
Harald Welteb93f60f2017-11-17 11:41:34 +010078#if defined(HAVE_SSSE3)
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070079int16_t *osmo_conv_sse_vdec_malloc(size_t n);
80void osmo_conv_sse_vdec_free(int16_t *ptr);
81#endif
82
Harald Welteb93f60f2017-11-17 11:41:34 +010083#if defined(HAVE_SSSE3) && defined(HAVE_AVX2)
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070084int16_t *osmo_conv_sse_avx_vdec_malloc(size_t n);
85void osmo_conv_sse_avx_vdec_free(int16_t *ptr);
Tom Tsou34e228a2017-04-29 00:16:43 +070086#endif
87
Eric3afc1d12020-07-23 02:16:46 +020088#ifdef HAVE_NEON
89int16_t *osmo_conv_neon_vdec_malloc(size_t n);
90void osmo_conv_neon_vdec_free(int16_t *ptr);
91#endif
92
Tom Tsou35536802016-11-24 19:24:32 +070093/* Forward Metric Units */
94void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out,
95 int16_t *sums, int16_t *paths, int norm);
96void osmo_conv_gen_metrics_k5_n3(const int8_t *seq, const int16_t *out,
97 int16_t *sums, int16_t *paths, int norm);
98void osmo_conv_gen_metrics_k5_n4(const int8_t *seq, const int16_t *out,
99 int16_t *sums, int16_t *paths, int norm);
100void osmo_conv_gen_metrics_k7_n2(const int8_t *seq, const int16_t *out,
101 int16_t *sums, int16_t *paths, int norm);
102void osmo_conv_gen_metrics_k7_n3(const int8_t *seq, const int16_t *out,
103 int16_t *sums, int16_t *paths, int norm);
104void osmo_conv_gen_metrics_k7_n4(const int8_t *seq, const int16_t *out,
105 int16_t *sums, int16_t *paths, int norm);
106
Harald Welteb93f60f2017-11-17 11:41:34 +0100107#if defined(HAVE_SSSE3)
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700108void osmo_conv_sse_metrics_k5_n2(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700109 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700110void osmo_conv_sse_metrics_k5_n3(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700111 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700112void osmo_conv_sse_metrics_k5_n4(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700113 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700114void osmo_conv_sse_metrics_k7_n2(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700115 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700116void osmo_conv_sse_metrics_k7_n3(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700117 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700118void osmo_conv_sse_metrics_k7_n4(const int8_t *seq, const int16_t *out,
119 int16_t *sums, int16_t *paths, int norm);
120#endif
121
Harald Welteb93f60f2017-11-17 11:41:34 +0100122#if defined(HAVE_SSSE3) && defined(HAVE_AVX2)
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700123void osmo_conv_sse_avx_metrics_k5_n2(const int8_t *seq, const int16_t *out,
124 int16_t *sums, int16_t *paths, int norm);
125void osmo_conv_sse_avx_metrics_k5_n3(const int8_t *seq, const int16_t *out,
126 int16_t *sums, int16_t *paths, int norm);
127void osmo_conv_sse_avx_metrics_k5_n4(const int8_t *seq, const int16_t *out,
128 int16_t *sums, int16_t *paths, int norm);
129void osmo_conv_sse_avx_metrics_k7_n2(const int8_t *seq, const int16_t *out,
130 int16_t *sums, int16_t *paths, int norm);
131void osmo_conv_sse_avx_metrics_k7_n3(const int8_t *seq, const int16_t *out,
132 int16_t *sums, int16_t *paths, int norm);
133void osmo_conv_sse_avx_metrics_k7_n4(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700134 int16_t *sums, int16_t *paths, int norm);
135#endif
136
Eric3afc1d12020-07-23 02:16:46 +0200137#if defined(HAVE_NEON)
138void osmo_conv_neon_metrics_k5_n2(const int8_t *seq, const int16_t *out,
139 int16_t *sums, int16_t *paths, int norm);
140void osmo_conv_neon_metrics_k5_n3(const int8_t *seq, const int16_t *out,
141 int16_t *sums, int16_t *paths, int norm);
142void osmo_conv_neon_metrics_k5_n4(const int8_t *seq, const int16_t *out,
143 int16_t *sums, int16_t *paths, int norm);
144void osmo_conv_neon_metrics_k7_n2(const int8_t *seq, const int16_t *out,
145 int16_t *sums, int16_t *paths, int norm);
146void osmo_conv_neon_metrics_k7_n3(const int8_t *seq, const int16_t *out,
147 int16_t *sums, int16_t *paths, int norm);
148void osmo_conv_neon_metrics_k7_n4(const int8_t *seq, const int16_t *out,
149 int16_t *sums, int16_t *paths, int norm);
150#endif
151
Tom Tsou35536802016-11-24 19:24:32 +0700152/* Trellis State
153 * state - Internal lshift register value
154 * prev - Register values of previous 0 and 1 states
155 */
156struct vstate {
157 unsigned state;
158 unsigned prev[2];
159};
160
161/* Trellis Object
162 * num_states - Number of states in the trellis
163 * sums - Accumulated path metrics
164 * outputs - Trellis output values
165 * vals - Input value that led to each state
166 */
167struct vtrellis {
168 int num_states;
169 int16_t *sums;
170 int16_t *outputs;
171 uint8_t *vals;
172};
173
174/* Viterbi Decoder
175 * n - Code order
176 * k - Constraint length
177 * len - Horizontal length of trellis
178 * recursive - Set to '1' if the code is recursive
179 * intrvl - Normalization interval
180 * trellis - Trellis object
Tom Tsou35536802016-11-24 19:24:32 +0700181 * paths - Trellis paths
182 */
183struct vdecoder {
184 int n;
185 int k;
186 int len;
187 int recursive;
188 int intrvl;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700189 struct vtrellis trellis;
Tom Tsou35536802016-11-24 19:24:32 +0700190 int16_t **paths;
191
192 void (*metric_func)(const int8_t *, const int16_t *,
193 int16_t *, int16_t *, int);
194};
195
Tom Tsou35536802016-11-24 19:24:32 +0700196/* Accessor calls */
197static inline int conv_code_recursive(const struct osmo_conv_code *code)
198{
199 return code->next_term_output ? 1 : 0;
200}
201
202/* Left shift and mask for finding the previous state */
203static unsigned vstate_lshift(unsigned reg, int k, int val)
204{
205 unsigned mask;
206
207 if (k == 5)
208 mask = 0x0e;
209 else if (k == 7)
210 mask = 0x3e;
211 else
212 mask = 0;
213
214 return ((reg << 1) & mask) | val;
215}
216
217/* Bit endian manipulators */
218static inline unsigned bitswap2(unsigned v)
219{
220 return ((v & 0x02) >> 1) | ((v & 0x01) << 1);
221}
222
223static inline unsigned bitswap3(unsigned v)
224{
225 return ((v & 0x04) >> 2) | ((v & 0x02) >> 0) |
226 ((v & 0x01) << 2);
227}
228
229static inline unsigned bitswap4(unsigned v)
230{
231 return ((v & 0x08) >> 3) | ((v & 0x04) >> 1) |
232 ((v & 0x02) << 1) | ((v & 0x01) << 3);
233}
234
235static inline unsigned bitswap5(unsigned v)
236{
237 return ((v & 0x10) >> 4) | ((v & 0x08) >> 2) | ((v & 0x04) >> 0) |
238 ((v & 0x02) << 2) | ((v & 0x01) << 4);
239}
240
241static inline unsigned bitswap6(unsigned v)
242{
243 return ((v & 0x20) >> 5) | ((v & 0x10) >> 3) | ((v & 0x08) >> 1) |
244 ((v & 0x04) << 1) | ((v & 0x02) << 3) | ((v & 0x01) << 5);
245}
246
247static unsigned bitswap(unsigned v, unsigned n)
248{
249 switch (n) {
250 case 1:
251 return v;
252 case 2:
253 return bitswap2(v);
254 case 3:
255 return bitswap3(v);
256 case 4:
257 return bitswap4(v);
258 case 5:
259 return bitswap5(v);
260 case 6:
261 return bitswap6(v);
262 default:
263 return 0;
264 }
265}
266
267/* Generate non-recursive state output from generator state table
268 * Note that the shift register moves right (i.e. the most recent bit is
269 * shifted into the register at k-1 bit of the register), which is typical
270 * textbook representation. The API transition table expects the most recent
271 * bit in the low order bit, or left shift. A bitswap operation is required
272 * to accommodate the difference.
273 */
274static unsigned gen_output(struct vstate *state, int val,
275 const struct osmo_conv_code *code)
276{
277 unsigned out, prev;
278
279 prev = bitswap(state->prev[0], code->K - 1);
280 out = code->next_output[prev][val];
281 out = bitswap(out, code->N);
282
283 return out;
284}
285
286/* Populate non-recursive trellis state
287 * For a given state defined by the k-1 length shift register, find the
288 * value of the input bit that drove the trellis to that state. Also
289 * generate the N outputs of the generator polynomial at that state.
290 */
291static int gen_state_info(uint8_t *val, unsigned reg,
292 int16_t *output, const struct osmo_conv_code *code)
293{
294 int i;
295 unsigned out;
296 struct vstate state;
297
298 /* Previous '0' state */
299 state.state = reg;
300 state.prev[0] = vstate_lshift(reg, code->K, 0);
301 state.prev[1] = vstate_lshift(reg, code->K, 1);
302
303 *val = (reg >> (code->K - 2)) & 0x01;
304
305 /* Transition output */
306 out = gen_output(&state, *val, code);
307
308 /* Unpack to NRZ */
309 for (i = 0; i < code->N; i++)
310 output[i] = BIT2NRZ(out, i);
311
312 return 0;
313}
314
315/* Generate recursive state output from generator state table */
316static unsigned gen_recursive_output(struct vstate *state,
317 uint8_t *val, unsigned reg,
318 const struct osmo_conv_code *code, int pos)
319{
320 int val0, val1;
321 unsigned out, prev;
322
323 /* Previous '0' state */
324 prev = vstate_lshift(reg, code->K, 0);
325 prev = bitswap(prev, code->K - 1);
326
327 /* Input value */
328 val0 = (reg >> (code->K - 2)) & 0x01;
329 val1 = (code->next_term_output[prev] >> pos) & 0x01;
330 *val = val0 == val1 ? 0 : 1;
331
332 /* Wrapper for osmocom state access */
333 prev = bitswap(state->prev[0], code->K - 1);
334
335 /* Compute the transition output */
336 out = code->next_output[prev][*val];
337 out = bitswap(out, code->N);
338
339 return out;
340}
341
342/* Populate recursive trellis state
343 * The bit position of the systematic bit is not explicitly marked by the
344 * API, so it must be extracted from the generator table. Otherwise,
345 * populate the trellis similar to the non-recursive version.
346 * Non-systematic recursive codes are not supported.
347 */
348static int gen_recursive_state_info(uint8_t *val,
349 unsigned reg, int16_t *output, const struct osmo_conv_code *code)
350{
351 int i, j, pos = -1;
352 int ns = NUM_STATES(code->K);
353 unsigned out;
354 struct vstate state;
355
356 /* Previous '0' and '1' states */
357 state.state = reg;
358 state.prev[0] = vstate_lshift(reg, code->K, 0);
359 state.prev[1] = vstate_lshift(reg, code->K, 1);
360
361 /* Find recursive bit location */
362 for (i = 0; i < code->N; i++) {
363 for (j = 0; j < ns; j++) {
364 if ((code->next_output[j][0] >> i) & 0x01)
365 break;
366 }
367
368 if (j == ns) {
369 pos = i;
370 break;
371 }
372 }
373
374 /* Non-systematic recursive code not supported */
375 if (pos < 0)
376 return -EPROTO;
377
378 /* Transition output */
379 out = gen_recursive_output(&state, val, reg, code, pos);
380
381 /* Unpack to NRZ */
382 for (i = 0; i < code->N; i++)
383 output[i] = BIT2NRZ(out, i);
384
385 return 0;
386}
387
388/* Release the trellis */
389static void free_trellis(struct vtrellis *trellis)
390{
391 if (!trellis)
392 return;
393
Tom Tsou34e228a2017-04-29 00:16:43 +0700394 vdec_free(trellis->outputs);
395 vdec_free(trellis->sums);
Tom Tsou35536802016-11-24 19:24:32 +0700396 free(trellis->vals);
Tom Tsou35536802016-11-24 19:24:32 +0700397}
398
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700399/* Initialize the trellis object
Tom Tsou35536802016-11-24 19:24:32 +0700400 * Initialization consists of generating the outputs and output value of a
401 * given state. Due to trellis symmetry and anti-symmetry, only one of the
402 * transition paths is utilized by the butterfly operation in the forward
403 * recursion, so only one set of N outputs is required per state variable.
404 */
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700405static int generate_trellis(struct vdecoder *dec,
406 const struct osmo_conv_code *code)
Tom Tsou35536802016-11-24 19:24:32 +0700407{
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700408 struct vtrellis *trellis = &dec->trellis;
Tom Tsou35536802016-11-24 19:24:32 +0700409 int16_t *outputs;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700410 int i, rc;
Tom Tsou35536802016-11-24 19:24:32 +0700411
412 int ns = NUM_STATES(code->K);
Tom Tsou35536802016-11-24 19:24:32 +0700413 int olen = (code->N == 2) ? 2 : 4;
414
Tom Tsou35536802016-11-24 19:24:32 +0700415 trellis->num_states = ns;
416 trellis->sums = vdec_malloc(ns);
417 trellis->outputs = vdec_malloc(ns * olen);
418 trellis->vals = (uint8_t *) malloc(ns * sizeof(uint8_t));
419
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700420 if (!trellis->sums || !trellis->outputs || !trellis->vals) {
421 rc = -ENOMEM;
Tom Tsou35536802016-11-24 19:24:32 +0700422 goto fail;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700423 }
Tom Tsou35536802016-11-24 19:24:32 +0700424
425 /* Populate the trellis state objects */
426 for (i = 0; i < ns; i++) {
427 outputs = &trellis->outputs[olen * i];
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700428 if (dec->recursive) {
Tom Tsou35536802016-11-24 19:24:32 +0700429 rc = gen_recursive_state_info(&trellis->vals[i],
430 i, outputs, code);
431 } else {
432 rc = gen_state_info(&trellis->vals[i],
433 i, outputs, code);
434 }
Vadim Yanitskiy34da6b52017-06-19 05:41:49 +0700435
436 if (rc < 0)
437 goto fail;
438
439 /* Set accumulated path metrics to zero */
440 trellis->sums[i] = 0;
Tom Tsou35536802016-11-24 19:24:32 +0700441 }
442
Vadim Yanitskiy34da6b52017-06-19 05:41:49 +0700443 /**
444 * For termination other than tail-biting, initialize the zero state
445 * as the encoder starting state. Initialize with the maximum
446 * accumulated sum at length equal to the constraint length.
447 */
448 if (code->term != CONV_TERM_TAIL_BITING)
449 trellis->sums[0] = INT8_MAX * code->N * code->K;
Tom Tsou35536802016-11-24 19:24:32 +0700450
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700451 return 0;
Tom Tsou35536802016-11-24 19:24:32 +0700452
453fail:
454 free_trellis(trellis);
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700455 return rc;
Tom Tsou35536802016-11-24 19:24:32 +0700456}
457
Tom Tsou35536802016-11-24 19:24:32 +0700458static void _traceback(struct vdecoder *dec,
459 unsigned state, uint8_t *out, int len)
460{
461 int i;
462 unsigned path;
463
464 for (i = len - 1; i >= 0; i--) {
465 path = dec->paths[i][state] + 1;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700466 out[i] = dec->trellis.vals[state];
Tom Tsou35536802016-11-24 19:24:32 +0700467 state = vstate_lshift(state, dec->k, path);
468 }
469}
470
471static void _traceback_rec(struct vdecoder *dec,
472 unsigned state, uint8_t *out, int len)
473{
474 int i;
475 unsigned path;
476
477 for (i = len - 1; i >= 0; i--) {
478 path = dec->paths[i][state] + 1;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700479 out[i] = path ^ dec->trellis.vals[state];
Tom Tsou35536802016-11-24 19:24:32 +0700480 state = vstate_lshift(state, dec->k, path);
481 }
482}
483
484/* Traceback and generate decoded output
485 * Find the largest accumulated path metric at the final state except for
486 * the zero terminated case, where we assume the final state is always zero.
487 */
488static int traceback(struct vdecoder *dec, uint8_t *out, int term, int len)
489{
490 int i, sum, max = -1;
491 unsigned path, state = 0;
492
493 if (term != CONV_TERM_FLUSH) {
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700494 for (i = 0; i < dec->trellis.num_states; i++) {
495 sum = dec->trellis.sums[i];
Tom Tsou35536802016-11-24 19:24:32 +0700496 if (sum > max) {
497 max = sum;
498 state = i;
499 }
500 }
501
502 if (max < 0)
503 return -EPROTO;
504 }
505
506 for (i = dec->len - 1; i >= len; i--) {
507 path = dec->paths[i][state] + 1;
508 state = vstate_lshift(state, dec->k, path);
509 }
510
511 if (dec->recursive)
512 _traceback_rec(dec, state, out, len);
513 else
514 _traceback(dec, state, out, len);
515
516 return 0;
517}
518
519/* Release decoder object */
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700520static void vdec_deinit(struct vdecoder *dec)
Tom Tsou35536802016-11-24 19:24:32 +0700521{
522 if (!dec)
523 return;
524
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700525 free_trellis(&dec->trellis);
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700526
527 if (dec->paths != NULL) {
528 vdec_free(dec->paths[0]);
529 free(dec->paths);
530 }
Tom Tsou35536802016-11-24 19:24:32 +0700531}
532
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700533/* Initialize decoder object with code specific params
Tom Tsou35536802016-11-24 19:24:32 +0700534 * Subtract the constraint length K on the normalization interval to
535 * accommodate the initialization path metric at state zero.
536 */
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700537static int vdec_init(struct vdecoder *dec, const struct osmo_conv_code *code)
Tom Tsou35536802016-11-24 19:24:32 +0700538{
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700539 int i, ns, rc;
Tom Tsou35536802016-11-24 19:24:32 +0700540
541 ns = NUM_STATES(code->K);
542
Tom Tsou35536802016-11-24 19:24:32 +0700543 dec->n = code->N;
544 dec->k = code->K;
545 dec->recursive = conv_code_recursive(code);
546 dec->intrvl = INT16_MAX / (dec->n * INT8_MAX) - dec->k;
547
548 if (dec->k == 5) {
549 switch (dec->n) {
550 case 2:
Eric3afc1d12020-07-23 02:16:46 +0200551/* rach len 14 is too short for neon */
552#ifdef HAVE_NEON
553 if (code->len < 100)
554 dec->metric_func = osmo_conv_gen_metrics_k5_n2;
555 else
556#endif
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700557 dec->metric_func = osmo_conv_metrics_k5_n2;
Tom Tsou35536802016-11-24 19:24:32 +0700558 break;
559 case 3:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700560 dec->metric_func = osmo_conv_metrics_k5_n3;
Tom Tsou35536802016-11-24 19:24:32 +0700561 break;
562 case 4:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700563 dec->metric_func = osmo_conv_metrics_k5_n4;
Tom Tsou35536802016-11-24 19:24:32 +0700564 break;
565 default:
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700566 return -EINVAL;
Tom Tsou35536802016-11-24 19:24:32 +0700567 }
568 } else if (dec->k == 7) {
569 switch (dec->n) {
570 case 2:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700571 dec->metric_func = osmo_conv_metrics_k7_n2;
Tom Tsou35536802016-11-24 19:24:32 +0700572 break;
573 case 3:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700574 dec->metric_func = osmo_conv_metrics_k7_n3;
Tom Tsou35536802016-11-24 19:24:32 +0700575 break;
576 case 4:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700577 dec->metric_func = osmo_conv_metrics_k7_n4;
Tom Tsou35536802016-11-24 19:24:32 +0700578 break;
579 default:
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700580 return -EINVAL;
Tom Tsou35536802016-11-24 19:24:32 +0700581 }
582 } else {
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700583 return -EINVAL;
Tom Tsou35536802016-11-24 19:24:32 +0700584 }
585
586 if (code->term == CONV_TERM_FLUSH)
587 dec->len = code->len + code->K - 1;
588 else
589 dec->len = code->len;
590
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700591 rc = generate_trellis(dec, code);
592 if (rc)
593 return rc;
Tom Tsou35536802016-11-24 19:24:32 +0700594
595 dec->paths = (int16_t **) malloc(sizeof(int16_t *) * dec->len);
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700596 if (!dec->paths)
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700597 goto enomem;
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700598
Tom Tsou35536802016-11-24 19:24:32 +0700599 dec->paths[0] = vdec_malloc(ns * dec->len);
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700600 if (!dec->paths[0])
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700601 goto enomem;
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700602
Tom Tsou35536802016-11-24 19:24:32 +0700603 for (i = 1; i < dec->len; i++)
604 dec->paths[i] = &dec->paths[0][i * ns];
605
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700606 return 0;
Tom Tsou35536802016-11-24 19:24:32 +0700607
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700608enomem:
609 vdec_deinit(dec);
610 return -ENOMEM;
Tom Tsou35536802016-11-24 19:24:32 +0700611}
612
613/* Depuncture sequence with nagative value terminated puncturing matrix */
614static int depuncture(const int8_t *in, const int *punc, int8_t *out, int len)
615{
616 int i, n = 0, m = 0;
617
618 for (i = 0; i < len; i++) {
619 if (i == punc[n]) {
620 out[i] = 0;
621 n++;
622 continue;
623 }
624
625 out[i] = in[m++];
626 }
627
628 return 0;
629}
630
631/* Forward trellis recursion
632 * Generate branch metrics and path metrics with a combined function. Only
633 * accumulated path metric sums and path selections are stored. Normalize on
634 * the interval specified by the decoder.
635 */
636static void forward_traverse(struct vdecoder *dec, const int8_t *seq)
637{
Tom Tsou35536802016-11-24 19:24:32 +0700638 int i;
639
640 for (i = 0; i < dec->len; i++) {
641 dec->metric_func(&seq[dec->n * i],
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700642 dec->trellis.outputs,
643 dec->trellis.sums,
Tom Tsou35536802016-11-24 19:24:32 +0700644 dec->paths[i],
645 !(i % dec->intrvl));
646 }
647}
648
649/* Convolutional decode with a decoder object
650 * Initial puncturing run if necessary followed by the forward recursion.
651 * For tail-biting perform a second pass before running the backward
652 * traceback operation.
653 */
654static int conv_decode(struct vdecoder *dec, const int8_t *seq,
655 const int *punc, uint8_t *out, int len, int term)
656{
657 int8_t depunc[dec->len * dec->n];
658
Tom Tsou35536802016-11-24 19:24:32 +0700659 if (punc) {
660 depuncture(seq, punc, depunc, dec->len * dec->n);
661 seq = depunc;
662 }
663
664 /* Propagate through the trellis with interval normalization */
665 forward_traverse(dec, seq);
666
667 if (term == CONV_TERM_TAIL_BITING)
668 forward_traverse(dec, seq);
669
670 return traceback(dec, out, term, len);
671}
672
Tom Tsou34e228a2017-04-29 00:16:43 +0700673static void osmo_conv_init(void)
674{
675 init_complete = 1;
676
677#ifdef HAVE___BUILTIN_CPU_SUPPORTS
678 /* Detect CPU capabilities */
679 #ifdef HAVE_AVX2
680 avx2_supported = __builtin_cpu_supports("avx2");
681 #endif
682
Harald Welteb93f60f2017-11-17 11:41:34 +0100683 #ifdef HAVE_SSSE3
684 ssse3_supported = __builtin_cpu_supports("ssse3");
Tom Tsou34e228a2017-04-29 00:16:43 +0700685 #endif
686
687 #ifdef HAVE_SSE4_1
688 sse41_supported = __builtin_cpu_supports("sse4.1");
689 #endif
690#endif
691
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700692/**
693 * Usage of curly braces is mandatory,
694 * because we use multi-line define.
695 */
Harald Welteb93f60f2017-11-17 11:41:34 +0100696#if defined(HAVE_SSSE3) && defined(HAVE_AVX2)
697 if (ssse3_supported && avx2_supported) {
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700698 INIT_POINTERS(sse_avx);
Harald Welteb93f60f2017-11-17 11:41:34 +0100699 } else if (ssse3_supported) {
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700700 INIT_POINTERS(sse);
701 } else {
702 INIT_POINTERS(gen);
703 }
Harald Welteb93f60f2017-11-17 11:41:34 +0100704#elif defined(HAVE_SSSE3)
705 if (ssse3_supported) {
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700706 INIT_POINTERS(sse);
707 } else {
708 INIT_POINTERS(gen);
709 }
Eric3afc1d12020-07-23 02:16:46 +0200710#elif defined(HAVE_NEON)
711 INIT_POINTERS(neon);
Tom Tsou34e228a2017-04-29 00:16:43 +0700712#else
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700713 INIT_POINTERS(gen);
Tom Tsou34e228a2017-04-29 00:16:43 +0700714#endif
715}
716
Tom Tsou35536802016-11-24 19:24:32 +0700717/* All-in-one Viterbi decoding */
718int osmo_conv_decode_acc(const struct osmo_conv_code *code,
719 const sbit_t *input, ubit_t *output)
720{
721 int rc;
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700722 struct vdecoder dec;
Tom Tsou35536802016-11-24 19:24:32 +0700723
Tom Tsou34e228a2017-04-29 00:16:43 +0700724 if (!init_complete)
725 osmo_conv_init();
726
Tom Tsou35536802016-11-24 19:24:32 +0700727 if ((code->N < 2) || (code->N > 4) || (code->len < 1) ||
728 ((code->K != 5) && (code->K != 7)))
729 return -EINVAL;
730
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700731 rc = vdec_init(&dec, code);
732 if (rc)
733 return rc;
Tom Tsou35536802016-11-24 19:24:32 +0700734
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700735 rc = conv_decode(&dec, input, code->puncture,
Tom Tsou35536802016-11-24 19:24:32 +0700736 output, code->len, code->term);
737
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700738 vdec_deinit(&dec);
Tom Tsou35536802016-11-24 19:24:32 +0700739
740 return rc;
741}