blob: c16e4364334d890139d0fb62fe45a5f62ec775c8 [file] [log] [blame]
Neels Hofmeyr17518fe2017-06-20 04:35:06 +02001/*! \file conv_acc.c
2 * Accelerated Viterbi decoder implementation. */
Tom Tsou35536802016-11-24 19:24:32 +07003/*
Tom Tsou35536802016-11-24 19:24:32 +07004 * Copyright (C) 2013, 2014 Thomas Tsou <tom@tsou.cc>
5 *
6 * All Rights Reserved
7 *
Harald Weltee08da972017-11-13 01:00:26 +09008 * SPDX-License-Identifier: GPL-2.0+
9 *
Tom Tsou35536802016-11-24 19:24:32 +070010 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with this program; if not, write to the Free Software Foundation, Inc.,
22 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 */
24
25#include <stdlib.h>
26#include <string.h>
27#include <errno.h>
28
Tom Tsou35536802016-11-24 19:24:32 +070029#include "config.h"
30
Tom Tsou34e228a2017-04-29 00:16:43 +070031#include <osmocom/core/conv.h>
32
Tom Tsou35536802016-11-24 19:24:32 +070033#define BIT2NRZ(REG,N) (((REG >> N) & 0x01) * 2 - 1) * -1
34#define NUM_STATES(K) (K == 7 ? 64 : 16)
Tom Tsou35536802016-11-24 19:24:32 +070035
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070036#define INIT_POINTERS(simd) \
37{ \
38 osmo_conv_metrics_k5_n2 = osmo_conv_##simd##_metrics_k5_n2; \
39 osmo_conv_metrics_k5_n3 = osmo_conv_##simd##_metrics_k5_n3; \
40 osmo_conv_metrics_k5_n4 = osmo_conv_##simd##_metrics_k5_n4; \
41 osmo_conv_metrics_k7_n2 = osmo_conv_##simd##_metrics_k7_n2; \
42 osmo_conv_metrics_k7_n3 = osmo_conv_##simd##_metrics_k7_n3; \
43 osmo_conv_metrics_k7_n4 = osmo_conv_##simd##_metrics_k7_n4; \
44 vdec_malloc = &osmo_conv_##simd##_vdec_malloc; \
45 vdec_free = &osmo_conv_##simd##_vdec_free; \
46}
47
Tom Tsou34e228a2017-04-29 00:16:43 +070048static int init_complete = 0;
49
50__attribute__ ((visibility("hidden"))) int avx2_supported = 0;
Harald Welteb93f60f2017-11-17 11:41:34 +010051__attribute__ ((visibility("hidden"))) int ssse3_supported = 0;
Tom Tsou34e228a2017-04-29 00:16:43 +070052__attribute__ ((visibility("hidden"))) int sse41_supported = 0;
53
54/**
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070055 * These pointers are being initialized at runtime by the
56 * osmo_conv_init() depending on supported SIMD extensions.
Tom Tsou34e228a2017-04-29 00:16:43 +070057 */
58static int16_t *(*vdec_malloc)(size_t n);
59static void (*vdec_free)(int16_t *ptr);
60
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070061void (*osmo_conv_metrics_k5_n2)(const int8_t *seq,
62 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
63void (*osmo_conv_metrics_k5_n3)(const int8_t *seq,
64 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
65void (*osmo_conv_metrics_k5_n4)(const int8_t *seq,
66 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
67void (*osmo_conv_metrics_k7_n2)(const int8_t *seq,
68 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
69void (*osmo_conv_metrics_k7_n3)(const int8_t *seq,
70 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
71void (*osmo_conv_metrics_k7_n4)(const int8_t *seq,
72 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
Tom Tsou34e228a2017-04-29 00:16:43 +070073
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070074/* Forward malloc wrappers */
75int16_t *osmo_conv_gen_vdec_malloc(size_t n);
76void osmo_conv_gen_vdec_free(int16_t *ptr);
77
Harald Welteb93f60f2017-11-17 11:41:34 +010078#if defined(HAVE_SSSE3)
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070079int16_t *osmo_conv_sse_vdec_malloc(size_t n);
80void osmo_conv_sse_vdec_free(int16_t *ptr);
81#endif
82
Harald Welteb93f60f2017-11-17 11:41:34 +010083#if defined(HAVE_SSSE3) && defined(HAVE_AVX2)
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070084int16_t *osmo_conv_sse_avx_vdec_malloc(size_t n);
85void osmo_conv_sse_avx_vdec_free(int16_t *ptr);
Tom Tsou34e228a2017-04-29 00:16:43 +070086#endif
87
Tom Tsou35536802016-11-24 19:24:32 +070088/* Forward Metric Units */
89void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out,
90 int16_t *sums, int16_t *paths, int norm);
91void osmo_conv_gen_metrics_k5_n3(const int8_t *seq, const int16_t *out,
92 int16_t *sums, int16_t *paths, int norm);
93void osmo_conv_gen_metrics_k5_n4(const int8_t *seq, const int16_t *out,
94 int16_t *sums, int16_t *paths, int norm);
95void osmo_conv_gen_metrics_k7_n2(const int8_t *seq, const int16_t *out,
96 int16_t *sums, int16_t *paths, int norm);
97void osmo_conv_gen_metrics_k7_n3(const int8_t *seq, const int16_t *out,
98 int16_t *sums, int16_t *paths, int norm);
99void osmo_conv_gen_metrics_k7_n4(const int8_t *seq, const int16_t *out,
100 int16_t *sums, int16_t *paths, int norm);
101
Harald Welteb93f60f2017-11-17 11:41:34 +0100102#if defined(HAVE_SSSE3)
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700103void osmo_conv_sse_metrics_k5_n2(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700104 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700105void osmo_conv_sse_metrics_k5_n3(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700106 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700107void osmo_conv_sse_metrics_k5_n4(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700108 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700109void osmo_conv_sse_metrics_k7_n2(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700110 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700111void osmo_conv_sse_metrics_k7_n3(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700112 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700113void osmo_conv_sse_metrics_k7_n4(const int8_t *seq, const int16_t *out,
114 int16_t *sums, int16_t *paths, int norm);
115#endif
116
Harald Welteb93f60f2017-11-17 11:41:34 +0100117#if defined(HAVE_SSSE3) && defined(HAVE_AVX2)
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700118void osmo_conv_sse_avx_metrics_k5_n2(const int8_t *seq, const int16_t *out,
119 int16_t *sums, int16_t *paths, int norm);
120void osmo_conv_sse_avx_metrics_k5_n3(const int8_t *seq, const int16_t *out,
121 int16_t *sums, int16_t *paths, int norm);
122void osmo_conv_sse_avx_metrics_k5_n4(const int8_t *seq, const int16_t *out,
123 int16_t *sums, int16_t *paths, int norm);
124void osmo_conv_sse_avx_metrics_k7_n2(const int8_t *seq, const int16_t *out,
125 int16_t *sums, int16_t *paths, int norm);
126void osmo_conv_sse_avx_metrics_k7_n3(const int8_t *seq, const int16_t *out,
127 int16_t *sums, int16_t *paths, int norm);
128void osmo_conv_sse_avx_metrics_k7_n4(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700129 int16_t *sums, int16_t *paths, int norm);
130#endif
131
Tom Tsou35536802016-11-24 19:24:32 +0700132/* Trellis State
133 * state - Internal lshift register value
134 * prev - Register values of previous 0 and 1 states
135 */
136struct vstate {
137 unsigned state;
138 unsigned prev[2];
139};
140
141/* Trellis Object
142 * num_states - Number of states in the trellis
143 * sums - Accumulated path metrics
144 * outputs - Trellis output values
145 * vals - Input value that led to each state
146 */
147struct vtrellis {
148 int num_states;
149 int16_t *sums;
150 int16_t *outputs;
151 uint8_t *vals;
152};
153
154/* Viterbi Decoder
155 * n - Code order
156 * k - Constraint length
157 * len - Horizontal length of trellis
158 * recursive - Set to '1' if the code is recursive
159 * intrvl - Normalization interval
160 * trellis - Trellis object
Tom Tsou35536802016-11-24 19:24:32 +0700161 * paths - Trellis paths
162 */
163struct vdecoder {
164 int n;
165 int k;
166 int len;
167 int recursive;
168 int intrvl;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700169 struct vtrellis trellis;
Tom Tsou35536802016-11-24 19:24:32 +0700170 int16_t **paths;
171
172 void (*metric_func)(const int8_t *, const int16_t *,
173 int16_t *, int16_t *, int);
174};
175
Tom Tsou35536802016-11-24 19:24:32 +0700176/* Accessor calls */
177static inline int conv_code_recursive(const struct osmo_conv_code *code)
178{
179 return code->next_term_output ? 1 : 0;
180}
181
182/* Left shift and mask for finding the previous state */
183static unsigned vstate_lshift(unsigned reg, int k, int val)
184{
185 unsigned mask;
186
187 if (k == 5)
188 mask = 0x0e;
189 else if (k == 7)
190 mask = 0x3e;
191 else
192 mask = 0;
193
194 return ((reg << 1) & mask) | val;
195}
196
197/* Bit endian manipulators */
198static inline unsigned bitswap2(unsigned v)
199{
200 return ((v & 0x02) >> 1) | ((v & 0x01) << 1);
201}
202
203static inline unsigned bitswap3(unsigned v)
204{
205 return ((v & 0x04) >> 2) | ((v & 0x02) >> 0) |
206 ((v & 0x01) << 2);
207}
208
209static inline unsigned bitswap4(unsigned v)
210{
211 return ((v & 0x08) >> 3) | ((v & 0x04) >> 1) |
212 ((v & 0x02) << 1) | ((v & 0x01) << 3);
213}
214
215static inline unsigned bitswap5(unsigned v)
216{
217 return ((v & 0x10) >> 4) | ((v & 0x08) >> 2) | ((v & 0x04) >> 0) |
218 ((v & 0x02) << 2) | ((v & 0x01) << 4);
219}
220
221static inline unsigned bitswap6(unsigned v)
222{
223 return ((v & 0x20) >> 5) | ((v & 0x10) >> 3) | ((v & 0x08) >> 1) |
224 ((v & 0x04) << 1) | ((v & 0x02) << 3) | ((v & 0x01) << 5);
225}
226
227static unsigned bitswap(unsigned v, unsigned n)
228{
229 switch (n) {
230 case 1:
231 return v;
232 case 2:
233 return bitswap2(v);
234 case 3:
235 return bitswap3(v);
236 case 4:
237 return bitswap4(v);
238 case 5:
239 return bitswap5(v);
240 case 6:
241 return bitswap6(v);
242 default:
243 return 0;
244 }
245}
246
247/* Generate non-recursive state output from generator state table
248 * Note that the shift register moves right (i.e. the most recent bit is
249 * shifted into the register at k-1 bit of the register), which is typical
250 * textbook representation. The API transition table expects the most recent
251 * bit in the low order bit, or left shift. A bitswap operation is required
252 * to accommodate the difference.
253 */
254static unsigned gen_output(struct vstate *state, int val,
255 const struct osmo_conv_code *code)
256{
257 unsigned out, prev;
258
259 prev = bitswap(state->prev[0], code->K - 1);
260 out = code->next_output[prev][val];
261 out = bitswap(out, code->N);
262
263 return out;
264}
265
266/* Populate non-recursive trellis state
267 * For a given state defined by the k-1 length shift register, find the
268 * value of the input bit that drove the trellis to that state. Also
269 * generate the N outputs of the generator polynomial at that state.
270 */
271static int gen_state_info(uint8_t *val, unsigned reg,
272 int16_t *output, const struct osmo_conv_code *code)
273{
274 int i;
275 unsigned out;
276 struct vstate state;
277
278 /* Previous '0' state */
279 state.state = reg;
280 state.prev[0] = vstate_lshift(reg, code->K, 0);
281 state.prev[1] = vstate_lshift(reg, code->K, 1);
282
283 *val = (reg >> (code->K - 2)) & 0x01;
284
285 /* Transition output */
286 out = gen_output(&state, *val, code);
287
288 /* Unpack to NRZ */
289 for (i = 0; i < code->N; i++)
290 output[i] = BIT2NRZ(out, i);
291
292 return 0;
293}
294
295/* Generate recursive state output from generator state table */
296static unsigned gen_recursive_output(struct vstate *state,
297 uint8_t *val, unsigned reg,
298 const struct osmo_conv_code *code, int pos)
299{
300 int val0, val1;
301 unsigned out, prev;
302
303 /* Previous '0' state */
304 prev = vstate_lshift(reg, code->K, 0);
305 prev = bitswap(prev, code->K - 1);
306
307 /* Input value */
308 val0 = (reg >> (code->K - 2)) & 0x01;
309 val1 = (code->next_term_output[prev] >> pos) & 0x01;
310 *val = val0 == val1 ? 0 : 1;
311
312 /* Wrapper for osmocom state access */
313 prev = bitswap(state->prev[0], code->K - 1);
314
315 /* Compute the transition output */
316 out = code->next_output[prev][*val];
317 out = bitswap(out, code->N);
318
319 return out;
320}
321
322/* Populate recursive trellis state
323 * The bit position of the systematic bit is not explicitly marked by the
324 * API, so it must be extracted from the generator table. Otherwise,
325 * populate the trellis similar to the non-recursive version.
326 * Non-systematic recursive codes are not supported.
327 */
328static int gen_recursive_state_info(uint8_t *val,
329 unsigned reg, int16_t *output, const struct osmo_conv_code *code)
330{
331 int i, j, pos = -1;
332 int ns = NUM_STATES(code->K);
333 unsigned out;
334 struct vstate state;
335
336 /* Previous '0' and '1' states */
337 state.state = reg;
338 state.prev[0] = vstate_lshift(reg, code->K, 0);
339 state.prev[1] = vstate_lshift(reg, code->K, 1);
340
341 /* Find recursive bit location */
342 for (i = 0; i < code->N; i++) {
343 for (j = 0; j < ns; j++) {
344 if ((code->next_output[j][0] >> i) & 0x01)
345 break;
346 }
347
348 if (j == ns) {
349 pos = i;
350 break;
351 }
352 }
353
354 /* Non-systematic recursive code not supported */
355 if (pos < 0)
356 return -EPROTO;
357
358 /* Transition output */
359 out = gen_recursive_output(&state, val, reg, code, pos);
360
361 /* Unpack to NRZ */
362 for (i = 0; i < code->N; i++)
363 output[i] = BIT2NRZ(out, i);
364
365 return 0;
366}
367
368/* Release the trellis */
369static void free_trellis(struct vtrellis *trellis)
370{
371 if (!trellis)
372 return;
373
Tom Tsou34e228a2017-04-29 00:16:43 +0700374 vdec_free(trellis->outputs);
375 vdec_free(trellis->sums);
Tom Tsou35536802016-11-24 19:24:32 +0700376 free(trellis->vals);
Tom Tsou35536802016-11-24 19:24:32 +0700377}
378
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700379/* Initialize the trellis object
Tom Tsou35536802016-11-24 19:24:32 +0700380 * Initialization consists of generating the outputs and output value of a
381 * given state. Due to trellis symmetry and anti-symmetry, only one of the
382 * transition paths is utilized by the butterfly operation in the forward
383 * recursion, so only one set of N outputs is required per state variable.
384 */
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700385static int generate_trellis(struct vdecoder *dec,
386 const struct osmo_conv_code *code)
Tom Tsou35536802016-11-24 19:24:32 +0700387{
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700388 struct vtrellis *trellis = &dec->trellis;
Tom Tsou35536802016-11-24 19:24:32 +0700389 int16_t *outputs;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700390 int i, rc;
Tom Tsou35536802016-11-24 19:24:32 +0700391
392 int ns = NUM_STATES(code->K);
Tom Tsou35536802016-11-24 19:24:32 +0700393 int olen = (code->N == 2) ? 2 : 4;
394
Tom Tsou35536802016-11-24 19:24:32 +0700395 trellis->num_states = ns;
396 trellis->sums = vdec_malloc(ns);
397 trellis->outputs = vdec_malloc(ns * olen);
398 trellis->vals = (uint8_t *) malloc(ns * sizeof(uint8_t));
399
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700400 if (!trellis->sums || !trellis->outputs || !trellis->vals) {
401 rc = -ENOMEM;
Tom Tsou35536802016-11-24 19:24:32 +0700402 goto fail;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700403 }
Tom Tsou35536802016-11-24 19:24:32 +0700404
405 /* Populate the trellis state objects */
406 for (i = 0; i < ns; i++) {
407 outputs = &trellis->outputs[olen * i];
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700408 if (dec->recursive) {
Tom Tsou35536802016-11-24 19:24:32 +0700409 rc = gen_recursive_state_info(&trellis->vals[i],
410 i, outputs, code);
411 } else {
412 rc = gen_state_info(&trellis->vals[i],
413 i, outputs, code);
414 }
Vadim Yanitskiy34da6b52017-06-19 05:41:49 +0700415
416 if (rc < 0)
417 goto fail;
418
419 /* Set accumulated path metrics to zero */
420 trellis->sums[i] = 0;
Tom Tsou35536802016-11-24 19:24:32 +0700421 }
422
Vadim Yanitskiy34da6b52017-06-19 05:41:49 +0700423 /**
424 * For termination other than tail-biting, initialize the zero state
425 * as the encoder starting state. Initialize with the maximum
426 * accumulated sum at length equal to the constraint length.
427 */
428 if (code->term != CONV_TERM_TAIL_BITING)
429 trellis->sums[0] = INT8_MAX * code->N * code->K;
Tom Tsou35536802016-11-24 19:24:32 +0700430
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700431 return 0;
Tom Tsou35536802016-11-24 19:24:32 +0700432
433fail:
434 free_trellis(trellis);
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700435 return rc;
Tom Tsou35536802016-11-24 19:24:32 +0700436}
437
Tom Tsou35536802016-11-24 19:24:32 +0700438static void _traceback(struct vdecoder *dec,
439 unsigned state, uint8_t *out, int len)
440{
441 int i;
442 unsigned path;
443
444 for (i = len - 1; i >= 0; i--) {
445 path = dec->paths[i][state] + 1;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700446 out[i] = dec->trellis.vals[state];
Tom Tsou35536802016-11-24 19:24:32 +0700447 state = vstate_lshift(state, dec->k, path);
448 }
449}
450
451static void _traceback_rec(struct vdecoder *dec,
452 unsigned state, uint8_t *out, int len)
453{
454 int i;
455 unsigned path;
456
457 for (i = len - 1; i >= 0; i--) {
458 path = dec->paths[i][state] + 1;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700459 out[i] = path ^ dec->trellis.vals[state];
Tom Tsou35536802016-11-24 19:24:32 +0700460 state = vstate_lshift(state, dec->k, path);
461 }
462}
463
464/* Traceback and generate decoded output
465 * Find the largest accumulated path metric at the final state except for
466 * the zero terminated case, where we assume the final state is always zero.
467 */
468static int traceback(struct vdecoder *dec, uint8_t *out, int term, int len)
469{
470 int i, sum, max = -1;
471 unsigned path, state = 0;
472
473 if (term != CONV_TERM_FLUSH) {
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700474 for (i = 0; i < dec->trellis.num_states; i++) {
475 sum = dec->trellis.sums[i];
Tom Tsou35536802016-11-24 19:24:32 +0700476 if (sum > max) {
477 max = sum;
478 state = i;
479 }
480 }
481
482 if (max < 0)
483 return -EPROTO;
484 }
485
486 for (i = dec->len - 1; i >= len; i--) {
487 path = dec->paths[i][state] + 1;
488 state = vstate_lshift(state, dec->k, path);
489 }
490
491 if (dec->recursive)
492 _traceback_rec(dec, state, out, len);
493 else
494 _traceback(dec, state, out, len);
495
496 return 0;
497}
498
499/* Release decoder object */
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700500static void vdec_deinit(struct vdecoder *dec)
Tom Tsou35536802016-11-24 19:24:32 +0700501{
502 if (!dec)
503 return;
504
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700505 free_trellis(&dec->trellis);
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700506
507 if (dec->paths != NULL) {
508 vdec_free(dec->paths[0]);
509 free(dec->paths);
510 }
Tom Tsou35536802016-11-24 19:24:32 +0700511}
512
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700513/* Initialize decoder object with code specific params
Tom Tsou35536802016-11-24 19:24:32 +0700514 * Subtract the constraint length K on the normalization interval to
515 * accommodate the initialization path metric at state zero.
516 */
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700517static int vdec_init(struct vdecoder *dec, const struct osmo_conv_code *code)
Tom Tsou35536802016-11-24 19:24:32 +0700518{
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700519 int i, ns, rc;
Tom Tsou35536802016-11-24 19:24:32 +0700520
521 ns = NUM_STATES(code->K);
522
Tom Tsou35536802016-11-24 19:24:32 +0700523 dec->n = code->N;
524 dec->k = code->K;
525 dec->recursive = conv_code_recursive(code);
526 dec->intrvl = INT16_MAX / (dec->n * INT8_MAX) - dec->k;
527
528 if (dec->k == 5) {
529 switch (dec->n) {
530 case 2:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700531 dec->metric_func = osmo_conv_metrics_k5_n2;
Tom Tsou35536802016-11-24 19:24:32 +0700532 break;
533 case 3:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700534 dec->metric_func = osmo_conv_metrics_k5_n3;
Tom Tsou35536802016-11-24 19:24:32 +0700535 break;
536 case 4:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700537 dec->metric_func = osmo_conv_metrics_k5_n4;
Tom Tsou35536802016-11-24 19:24:32 +0700538 break;
539 default:
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700540 return -EINVAL;
Tom Tsou35536802016-11-24 19:24:32 +0700541 }
542 } else if (dec->k == 7) {
543 switch (dec->n) {
544 case 2:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700545 dec->metric_func = osmo_conv_metrics_k7_n2;
Tom Tsou35536802016-11-24 19:24:32 +0700546 break;
547 case 3:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700548 dec->metric_func = osmo_conv_metrics_k7_n3;
Tom Tsou35536802016-11-24 19:24:32 +0700549 break;
550 case 4:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700551 dec->metric_func = osmo_conv_metrics_k7_n4;
Tom Tsou35536802016-11-24 19:24:32 +0700552 break;
553 default:
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700554 return -EINVAL;
Tom Tsou35536802016-11-24 19:24:32 +0700555 }
556 } else {
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700557 return -EINVAL;
Tom Tsou35536802016-11-24 19:24:32 +0700558 }
559
560 if (code->term == CONV_TERM_FLUSH)
561 dec->len = code->len + code->K - 1;
562 else
563 dec->len = code->len;
564
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700565 rc = generate_trellis(dec, code);
566 if (rc)
567 return rc;
Tom Tsou35536802016-11-24 19:24:32 +0700568
569 dec->paths = (int16_t **) malloc(sizeof(int16_t *) * dec->len);
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700570 if (!dec->paths)
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700571 goto enomem;
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700572
Tom Tsou35536802016-11-24 19:24:32 +0700573 dec->paths[0] = vdec_malloc(ns * dec->len);
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700574 if (!dec->paths[0])
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700575 goto enomem;
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700576
Tom Tsou35536802016-11-24 19:24:32 +0700577 for (i = 1; i < dec->len; i++)
578 dec->paths[i] = &dec->paths[0][i * ns];
579
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700580 return 0;
Tom Tsou35536802016-11-24 19:24:32 +0700581
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700582enomem:
583 vdec_deinit(dec);
584 return -ENOMEM;
Tom Tsou35536802016-11-24 19:24:32 +0700585}
586
587/* Depuncture sequence with nagative value terminated puncturing matrix */
588static int depuncture(const int8_t *in, const int *punc, int8_t *out, int len)
589{
590 int i, n = 0, m = 0;
591
592 for (i = 0; i < len; i++) {
593 if (i == punc[n]) {
594 out[i] = 0;
595 n++;
596 continue;
597 }
598
599 out[i] = in[m++];
600 }
601
602 return 0;
603}
604
605/* Forward trellis recursion
606 * Generate branch metrics and path metrics with a combined function. Only
607 * accumulated path metric sums and path selections are stored. Normalize on
608 * the interval specified by the decoder.
609 */
610static void forward_traverse(struct vdecoder *dec, const int8_t *seq)
611{
Tom Tsou35536802016-11-24 19:24:32 +0700612 int i;
613
614 for (i = 0; i < dec->len; i++) {
615 dec->metric_func(&seq[dec->n * i],
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700616 dec->trellis.outputs,
617 dec->trellis.sums,
Tom Tsou35536802016-11-24 19:24:32 +0700618 dec->paths[i],
619 !(i % dec->intrvl));
620 }
621}
622
623/* Convolutional decode with a decoder object
624 * Initial puncturing run if necessary followed by the forward recursion.
625 * For tail-biting perform a second pass before running the backward
626 * traceback operation.
627 */
628static int conv_decode(struct vdecoder *dec, const int8_t *seq,
629 const int *punc, uint8_t *out, int len, int term)
630{
631 int8_t depunc[dec->len * dec->n];
632
Tom Tsou35536802016-11-24 19:24:32 +0700633 if (punc) {
634 depuncture(seq, punc, depunc, dec->len * dec->n);
635 seq = depunc;
636 }
637
638 /* Propagate through the trellis with interval normalization */
639 forward_traverse(dec, seq);
640
641 if (term == CONV_TERM_TAIL_BITING)
642 forward_traverse(dec, seq);
643
644 return traceback(dec, out, term, len);
645}
646
Tom Tsou34e228a2017-04-29 00:16:43 +0700647static void osmo_conv_init(void)
648{
649 init_complete = 1;
650
651#ifdef HAVE___BUILTIN_CPU_SUPPORTS
652 /* Detect CPU capabilities */
653 #ifdef HAVE_AVX2
654 avx2_supported = __builtin_cpu_supports("avx2");
655 #endif
656
Harald Welteb93f60f2017-11-17 11:41:34 +0100657 #ifdef HAVE_SSSE3
658 ssse3_supported = __builtin_cpu_supports("ssse3");
Tom Tsou34e228a2017-04-29 00:16:43 +0700659 #endif
660
661 #ifdef HAVE_SSE4_1
662 sse41_supported = __builtin_cpu_supports("sse4.1");
663 #endif
664#endif
665
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700666/**
667 * Usage of curly braces is mandatory,
668 * because we use multi-line define.
669 */
Harald Welteb93f60f2017-11-17 11:41:34 +0100670#if defined(HAVE_SSSE3) && defined(HAVE_AVX2)
671 if (ssse3_supported && avx2_supported) {
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700672 INIT_POINTERS(sse_avx);
Harald Welteb93f60f2017-11-17 11:41:34 +0100673 } else if (ssse3_supported) {
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700674 INIT_POINTERS(sse);
675 } else {
676 INIT_POINTERS(gen);
677 }
Harald Welteb93f60f2017-11-17 11:41:34 +0100678#elif defined(HAVE_SSSE3)
679 if (ssse3_supported) {
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700680 INIT_POINTERS(sse);
681 } else {
682 INIT_POINTERS(gen);
683 }
Tom Tsou34e228a2017-04-29 00:16:43 +0700684#else
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700685 INIT_POINTERS(gen);
Tom Tsou34e228a2017-04-29 00:16:43 +0700686#endif
687}
688
Tom Tsou35536802016-11-24 19:24:32 +0700689/* All-in-one Viterbi decoding */
690int osmo_conv_decode_acc(const struct osmo_conv_code *code,
691 const sbit_t *input, ubit_t *output)
692{
693 int rc;
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700694 struct vdecoder dec;
Tom Tsou35536802016-11-24 19:24:32 +0700695
Tom Tsou34e228a2017-04-29 00:16:43 +0700696 if (!init_complete)
697 osmo_conv_init();
698
Tom Tsou35536802016-11-24 19:24:32 +0700699 if ((code->N < 2) || (code->N > 4) || (code->len < 1) ||
700 ((code->K != 5) && (code->K != 7)))
701 return -EINVAL;
702
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700703 rc = vdec_init(&dec, code);
704 if (rc)
705 return rc;
Tom Tsou35536802016-11-24 19:24:32 +0700706
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700707 rc = conv_decode(&dec, input, code->puncture,
Tom Tsou35536802016-11-24 19:24:32 +0700708 output, code->len, code->term);
709
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700710 vdec_deinit(&dec);
Tom Tsou35536802016-11-24 19:24:32 +0700711
712 return rc;
713}