blob: 048bbb1c35b0c958ff494f0dd56eb58a2ffc9837 [file] [log] [blame]
Neels Hofmeyr17518fe2017-06-20 04:35:06 +02001/*! \file conv_acc.c
2 * Accelerated Viterbi decoder implementation. */
Tom Tsou35536802016-11-24 19:24:32 +07003/*
Tom Tsou35536802016-11-24 19:24:32 +07004 * Copyright (C) 2013, 2014 Thomas Tsou <tom@tsou.cc>
5 *
6 * All Rights Reserved
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23#include <stdlib.h>
24#include <string.h>
25#include <errno.h>
26
Tom Tsou35536802016-11-24 19:24:32 +070027#include "config.h"
28
Tom Tsou34e228a2017-04-29 00:16:43 +070029#include <osmocom/core/conv.h>
30
Tom Tsou35536802016-11-24 19:24:32 +070031#define BIT2NRZ(REG,N) (((REG >> N) & 0x01) * 2 - 1) * -1
32#define NUM_STATES(K) (K == 7 ? 64 : 16)
Tom Tsou35536802016-11-24 19:24:32 +070033
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070034#define INIT_POINTERS(simd) \
35{ \
36 osmo_conv_metrics_k5_n2 = osmo_conv_##simd##_metrics_k5_n2; \
37 osmo_conv_metrics_k5_n3 = osmo_conv_##simd##_metrics_k5_n3; \
38 osmo_conv_metrics_k5_n4 = osmo_conv_##simd##_metrics_k5_n4; \
39 osmo_conv_metrics_k7_n2 = osmo_conv_##simd##_metrics_k7_n2; \
40 osmo_conv_metrics_k7_n3 = osmo_conv_##simd##_metrics_k7_n3; \
41 osmo_conv_metrics_k7_n4 = osmo_conv_##simd##_metrics_k7_n4; \
42 vdec_malloc = &osmo_conv_##simd##_vdec_malloc; \
43 vdec_free = &osmo_conv_##simd##_vdec_free; \
44}
45
Tom Tsou34e228a2017-04-29 00:16:43 +070046static int init_complete = 0;
47
48__attribute__ ((visibility("hidden"))) int avx2_supported = 0;
49__attribute__ ((visibility("hidden"))) int sse3_supported = 0;
50__attribute__ ((visibility("hidden"))) int sse41_supported = 0;
51
52/**
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070053 * These pointers are being initialized at runtime by the
54 * osmo_conv_init() depending on supported SIMD extensions.
Tom Tsou34e228a2017-04-29 00:16:43 +070055 */
56static int16_t *(*vdec_malloc)(size_t n);
57static void (*vdec_free)(int16_t *ptr);
58
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070059void (*osmo_conv_metrics_k5_n2)(const int8_t *seq,
60 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
61void (*osmo_conv_metrics_k5_n3)(const int8_t *seq,
62 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
63void (*osmo_conv_metrics_k5_n4)(const int8_t *seq,
64 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
65void (*osmo_conv_metrics_k7_n2)(const int8_t *seq,
66 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
67void (*osmo_conv_metrics_k7_n3)(const int8_t *seq,
68 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
69void (*osmo_conv_metrics_k7_n4)(const int8_t *seq,
70 const int16_t *out, int16_t *sums, int16_t *paths, int norm);
Tom Tsou34e228a2017-04-29 00:16:43 +070071
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +070072/* Forward malloc wrappers */
73int16_t *osmo_conv_gen_vdec_malloc(size_t n);
74void osmo_conv_gen_vdec_free(int16_t *ptr);
75
76#if defined(HAVE_SSE3)
77int16_t *osmo_conv_sse_vdec_malloc(size_t n);
78void osmo_conv_sse_vdec_free(int16_t *ptr);
79#endif
80
81#if defined(HAVE_SSE3) && defined(HAVE_AVX2)
82int16_t *osmo_conv_sse_avx_vdec_malloc(size_t n);
83void osmo_conv_sse_avx_vdec_free(int16_t *ptr);
Tom Tsou34e228a2017-04-29 00:16:43 +070084#endif
85
Tom Tsou35536802016-11-24 19:24:32 +070086/* Forward Metric Units */
87void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out,
88 int16_t *sums, int16_t *paths, int norm);
89void osmo_conv_gen_metrics_k5_n3(const int8_t *seq, const int16_t *out,
90 int16_t *sums, int16_t *paths, int norm);
91void osmo_conv_gen_metrics_k5_n4(const int8_t *seq, const int16_t *out,
92 int16_t *sums, int16_t *paths, int norm);
93void osmo_conv_gen_metrics_k7_n2(const int8_t *seq, const int16_t *out,
94 int16_t *sums, int16_t *paths, int norm);
95void osmo_conv_gen_metrics_k7_n3(const int8_t *seq, const int16_t *out,
96 int16_t *sums, int16_t *paths, int norm);
97void osmo_conv_gen_metrics_k7_n4(const int8_t *seq, const int16_t *out,
98 int16_t *sums, int16_t *paths, int norm);
99
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700100#if defined(HAVE_SSE3)
101void osmo_conv_sse_metrics_k5_n2(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700102 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700103void osmo_conv_sse_metrics_k5_n3(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700104 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700105void osmo_conv_sse_metrics_k5_n4(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700106 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700107void osmo_conv_sse_metrics_k7_n2(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700108 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700109void osmo_conv_sse_metrics_k7_n3(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700110 int16_t *sums, int16_t *paths, int norm);
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700111void osmo_conv_sse_metrics_k7_n4(const int8_t *seq, const int16_t *out,
112 int16_t *sums, int16_t *paths, int norm);
113#endif
114
115#if defined(HAVE_SSE3) && defined(HAVE_AVX2)
116void osmo_conv_sse_avx_metrics_k5_n2(const int8_t *seq, const int16_t *out,
117 int16_t *sums, int16_t *paths, int norm);
118void osmo_conv_sse_avx_metrics_k5_n3(const int8_t *seq, const int16_t *out,
119 int16_t *sums, int16_t *paths, int norm);
120void osmo_conv_sse_avx_metrics_k5_n4(const int8_t *seq, const int16_t *out,
121 int16_t *sums, int16_t *paths, int norm);
122void osmo_conv_sse_avx_metrics_k7_n2(const int8_t *seq, const int16_t *out,
123 int16_t *sums, int16_t *paths, int norm);
124void osmo_conv_sse_avx_metrics_k7_n3(const int8_t *seq, const int16_t *out,
125 int16_t *sums, int16_t *paths, int norm);
126void osmo_conv_sse_avx_metrics_k7_n4(const int8_t *seq, const int16_t *out,
Tom Tsou34e228a2017-04-29 00:16:43 +0700127 int16_t *sums, int16_t *paths, int norm);
128#endif
129
Tom Tsou35536802016-11-24 19:24:32 +0700130/* Trellis State
131 * state - Internal lshift register value
132 * prev - Register values of previous 0 and 1 states
133 */
134struct vstate {
135 unsigned state;
136 unsigned prev[2];
137};
138
139/* Trellis Object
140 * num_states - Number of states in the trellis
141 * sums - Accumulated path metrics
142 * outputs - Trellis output values
143 * vals - Input value that led to each state
144 */
145struct vtrellis {
146 int num_states;
147 int16_t *sums;
148 int16_t *outputs;
149 uint8_t *vals;
150};
151
152/* Viterbi Decoder
153 * n - Code order
154 * k - Constraint length
155 * len - Horizontal length of trellis
156 * recursive - Set to '1' if the code is recursive
157 * intrvl - Normalization interval
158 * trellis - Trellis object
Tom Tsou35536802016-11-24 19:24:32 +0700159 * paths - Trellis paths
160 */
161struct vdecoder {
162 int n;
163 int k;
164 int len;
165 int recursive;
166 int intrvl;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700167 struct vtrellis trellis;
Tom Tsou35536802016-11-24 19:24:32 +0700168 int16_t **paths;
169
170 void (*metric_func)(const int8_t *, const int16_t *,
171 int16_t *, int16_t *, int);
172};
173
Tom Tsou35536802016-11-24 19:24:32 +0700174/* Accessor calls */
175static inline int conv_code_recursive(const struct osmo_conv_code *code)
176{
177 return code->next_term_output ? 1 : 0;
178}
179
180/* Left shift and mask for finding the previous state */
181static unsigned vstate_lshift(unsigned reg, int k, int val)
182{
183 unsigned mask;
184
185 if (k == 5)
186 mask = 0x0e;
187 else if (k == 7)
188 mask = 0x3e;
189 else
190 mask = 0;
191
192 return ((reg << 1) & mask) | val;
193}
194
195/* Bit endian manipulators */
196static inline unsigned bitswap2(unsigned v)
197{
198 return ((v & 0x02) >> 1) | ((v & 0x01) << 1);
199}
200
201static inline unsigned bitswap3(unsigned v)
202{
203 return ((v & 0x04) >> 2) | ((v & 0x02) >> 0) |
204 ((v & 0x01) << 2);
205}
206
207static inline unsigned bitswap4(unsigned v)
208{
209 return ((v & 0x08) >> 3) | ((v & 0x04) >> 1) |
210 ((v & 0x02) << 1) | ((v & 0x01) << 3);
211}
212
213static inline unsigned bitswap5(unsigned v)
214{
215 return ((v & 0x10) >> 4) | ((v & 0x08) >> 2) | ((v & 0x04) >> 0) |
216 ((v & 0x02) << 2) | ((v & 0x01) << 4);
217}
218
219static inline unsigned bitswap6(unsigned v)
220{
221 return ((v & 0x20) >> 5) | ((v & 0x10) >> 3) | ((v & 0x08) >> 1) |
222 ((v & 0x04) << 1) | ((v & 0x02) << 3) | ((v & 0x01) << 5);
223}
224
225static unsigned bitswap(unsigned v, unsigned n)
226{
227 switch (n) {
228 case 1:
229 return v;
230 case 2:
231 return bitswap2(v);
232 case 3:
233 return bitswap3(v);
234 case 4:
235 return bitswap4(v);
236 case 5:
237 return bitswap5(v);
238 case 6:
239 return bitswap6(v);
240 default:
241 return 0;
242 }
243}
244
245/* Generate non-recursive state output from generator state table
246 * Note that the shift register moves right (i.e. the most recent bit is
247 * shifted into the register at k-1 bit of the register), which is typical
248 * textbook representation. The API transition table expects the most recent
249 * bit in the low order bit, or left shift. A bitswap operation is required
250 * to accommodate the difference.
251 */
252static unsigned gen_output(struct vstate *state, int val,
253 const struct osmo_conv_code *code)
254{
255 unsigned out, prev;
256
257 prev = bitswap(state->prev[0], code->K - 1);
258 out = code->next_output[prev][val];
259 out = bitswap(out, code->N);
260
261 return out;
262}
263
264/* Populate non-recursive trellis state
265 * For a given state defined by the k-1 length shift register, find the
266 * value of the input bit that drove the trellis to that state. Also
267 * generate the N outputs of the generator polynomial at that state.
268 */
269static int gen_state_info(uint8_t *val, unsigned reg,
270 int16_t *output, const struct osmo_conv_code *code)
271{
272 int i;
273 unsigned out;
274 struct vstate state;
275
276 /* Previous '0' state */
277 state.state = reg;
278 state.prev[0] = vstate_lshift(reg, code->K, 0);
279 state.prev[1] = vstate_lshift(reg, code->K, 1);
280
281 *val = (reg >> (code->K - 2)) & 0x01;
282
283 /* Transition output */
284 out = gen_output(&state, *val, code);
285
286 /* Unpack to NRZ */
287 for (i = 0; i < code->N; i++)
288 output[i] = BIT2NRZ(out, i);
289
290 return 0;
291}
292
293/* Generate recursive state output from generator state table */
294static unsigned gen_recursive_output(struct vstate *state,
295 uint8_t *val, unsigned reg,
296 const struct osmo_conv_code *code, int pos)
297{
298 int val0, val1;
299 unsigned out, prev;
300
301 /* Previous '0' state */
302 prev = vstate_lshift(reg, code->K, 0);
303 prev = bitswap(prev, code->K - 1);
304
305 /* Input value */
306 val0 = (reg >> (code->K - 2)) & 0x01;
307 val1 = (code->next_term_output[prev] >> pos) & 0x01;
308 *val = val0 == val1 ? 0 : 1;
309
310 /* Wrapper for osmocom state access */
311 prev = bitswap(state->prev[0], code->K - 1);
312
313 /* Compute the transition output */
314 out = code->next_output[prev][*val];
315 out = bitswap(out, code->N);
316
317 return out;
318}
319
320/* Populate recursive trellis state
321 * The bit position of the systematic bit is not explicitly marked by the
322 * API, so it must be extracted from the generator table. Otherwise,
323 * populate the trellis similar to the non-recursive version.
324 * Non-systematic recursive codes are not supported.
325 */
326static int gen_recursive_state_info(uint8_t *val,
327 unsigned reg, int16_t *output, const struct osmo_conv_code *code)
328{
329 int i, j, pos = -1;
330 int ns = NUM_STATES(code->K);
331 unsigned out;
332 struct vstate state;
333
334 /* Previous '0' and '1' states */
335 state.state = reg;
336 state.prev[0] = vstate_lshift(reg, code->K, 0);
337 state.prev[1] = vstate_lshift(reg, code->K, 1);
338
339 /* Find recursive bit location */
340 for (i = 0; i < code->N; i++) {
341 for (j = 0; j < ns; j++) {
342 if ((code->next_output[j][0] >> i) & 0x01)
343 break;
344 }
345
346 if (j == ns) {
347 pos = i;
348 break;
349 }
350 }
351
352 /* Non-systematic recursive code not supported */
353 if (pos < 0)
354 return -EPROTO;
355
356 /* Transition output */
357 out = gen_recursive_output(&state, val, reg, code, pos);
358
359 /* Unpack to NRZ */
360 for (i = 0; i < code->N; i++)
361 output[i] = BIT2NRZ(out, i);
362
363 return 0;
364}
365
366/* Release the trellis */
367static void free_trellis(struct vtrellis *trellis)
368{
369 if (!trellis)
370 return;
371
Tom Tsou34e228a2017-04-29 00:16:43 +0700372 vdec_free(trellis->outputs);
373 vdec_free(trellis->sums);
Tom Tsou35536802016-11-24 19:24:32 +0700374 free(trellis->vals);
Tom Tsou35536802016-11-24 19:24:32 +0700375}
376
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700377/* Initialize the trellis object
Tom Tsou35536802016-11-24 19:24:32 +0700378 * Initialization consists of generating the outputs and output value of a
379 * given state. Due to trellis symmetry and anti-symmetry, only one of the
380 * transition paths is utilized by the butterfly operation in the forward
381 * recursion, so only one set of N outputs is required per state variable.
382 */
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700383static int generate_trellis(struct vdecoder *dec,
384 const struct osmo_conv_code *code)
Tom Tsou35536802016-11-24 19:24:32 +0700385{
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700386 struct vtrellis *trellis = &dec->trellis;
Tom Tsou35536802016-11-24 19:24:32 +0700387 int16_t *outputs;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700388 int i, rc;
Tom Tsou35536802016-11-24 19:24:32 +0700389
390 int ns = NUM_STATES(code->K);
Tom Tsou35536802016-11-24 19:24:32 +0700391 int olen = (code->N == 2) ? 2 : 4;
392
Tom Tsou35536802016-11-24 19:24:32 +0700393 trellis->num_states = ns;
394 trellis->sums = vdec_malloc(ns);
395 trellis->outputs = vdec_malloc(ns * olen);
396 trellis->vals = (uint8_t *) malloc(ns * sizeof(uint8_t));
397
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700398 if (!trellis->sums || !trellis->outputs || !trellis->vals) {
399 rc = -ENOMEM;
Tom Tsou35536802016-11-24 19:24:32 +0700400 goto fail;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700401 }
Tom Tsou35536802016-11-24 19:24:32 +0700402
403 /* Populate the trellis state objects */
404 for (i = 0; i < ns; i++) {
405 outputs = &trellis->outputs[olen * i];
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700406 if (dec->recursive) {
Tom Tsou35536802016-11-24 19:24:32 +0700407 rc = gen_recursive_state_info(&trellis->vals[i],
408 i, outputs, code);
409 } else {
410 rc = gen_state_info(&trellis->vals[i],
411 i, outputs, code);
412 }
Vadim Yanitskiy34da6b52017-06-19 05:41:49 +0700413
414 if (rc < 0)
415 goto fail;
416
417 /* Set accumulated path metrics to zero */
418 trellis->sums[i] = 0;
Tom Tsou35536802016-11-24 19:24:32 +0700419 }
420
Vadim Yanitskiy34da6b52017-06-19 05:41:49 +0700421 /**
422 * For termination other than tail-biting, initialize the zero state
423 * as the encoder starting state. Initialize with the maximum
424 * accumulated sum at length equal to the constraint length.
425 */
426 if (code->term != CONV_TERM_TAIL_BITING)
427 trellis->sums[0] = INT8_MAX * code->N * code->K;
Tom Tsou35536802016-11-24 19:24:32 +0700428
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700429 return 0;
Tom Tsou35536802016-11-24 19:24:32 +0700430
431fail:
432 free_trellis(trellis);
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700433 return rc;
Tom Tsou35536802016-11-24 19:24:32 +0700434}
435
Tom Tsou35536802016-11-24 19:24:32 +0700436static void _traceback(struct vdecoder *dec,
437 unsigned state, uint8_t *out, int len)
438{
439 int i;
440 unsigned path;
441
442 for (i = len - 1; i >= 0; i--) {
443 path = dec->paths[i][state] + 1;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700444 out[i] = dec->trellis.vals[state];
Tom Tsou35536802016-11-24 19:24:32 +0700445 state = vstate_lshift(state, dec->k, path);
446 }
447}
448
449static void _traceback_rec(struct vdecoder *dec,
450 unsigned state, uint8_t *out, int len)
451{
452 int i;
453 unsigned path;
454
455 for (i = len - 1; i >= 0; i--) {
456 path = dec->paths[i][state] + 1;
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700457 out[i] = path ^ dec->trellis.vals[state];
Tom Tsou35536802016-11-24 19:24:32 +0700458 state = vstate_lshift(state, dec->k, path);
459 }
460}
461
462/* Traceback and generate decoded output
463 * Find the largest accumulated path metric at the final state except for
464 * the zero terminated case, where we assume the final state is always zero.
465 */
466static int traceback(struct vdecoder *dec, uint8_t *out, int term, int len)
467{
468 int i, sum, max = -1;
469 unsigned path, state = 0;
470
471 if (term != CONV_TERM_FLUSH) {
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700472 for (i = 0; i < dec->trellis.num_states; i++) {
473 sum = dec->trellis.sums[i];
Tom Tsou35536802016-11-24 19:24:32 +0700474 if (sum > max) {
475 max = sum;
476 state = i;
477 }
478 }
479
480 if (max < 0)
481 return -EPROTO;
482 }
483
484 for (i = dec->len - 1; i >= len; i--) {
485 path = dec->paths[i][state] + 1;
486 state = vstate_lshift(state, dec->k, path);
487 }
488
489 if (dec->recursive)
490 _traceback_rec(dec, state, out, len);
491 else
492 _traceback(dec, state, out, len);
493
494 return 0;
495}
496
497/* Release decoder object */
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700498static void vdec_deinit(struct vdecoder *dec)
Tom Tsou35536802016-11-24 19:24:32 +0700499{
500 if (!dec)
501 return;
502
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700503 free_trellis(&dec->trellis);
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700504
505 if (dec->paths != NULL) {
506 vdec_free(dec->paths[0]);
507 free(dec->paths);
508 }
Tom Tsou35536802016-11-24 19:24:32 +0700509}
510
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700511/* Initialize decoder object with code specific params
Tom Tsou35536802016-11-24 19:24:32 +0700512 * Subtract the constraint length K on the normalization interval to
513 * accommodate the initialization path metric at state zero.
514 */
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700515static int vdec_init(struct vdecoder *dec, const struct osmo_conv_code *code)
Tom Tsou35536802016-11-24 19:24:32 +0700516{
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700517 int i, ns, rc;
Tom Tsou35536802016-11-24 19:24:32 +0700518
519 ns = NUM_STATES(code->K);
520
Tom Tsou35536802016-11-24 19:24:32 +0700521 dec->n = code->N;
522 dec->k = code->K;
523 dec->recursive = conv_code_recursive(code);
524 dec->intrvl = INT16_MAX / (dec->n * INT8_MAX) - dec->k;
525
526 if (dec->k == 5) {
527 switch (dec->n) {
528 case 2:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700529 dec->metric_func = osmo_conv_metrics_k5_n2;
Tom Tsou35536802016-11-24 19:24:32 +0700530 break;
531 case 3:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700532 dec->metric_func = osmo_conv_metrics_k5_n3;
Tom Tsou35536802016-11-24 19:24:32 +0700533 break;
534 case 4:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700535 dec->metric_func = osmo_conv_metrics_k5_n4;
Tom Tsou35536802016-11-24 19:24:32 +0700536 break;
537 default:
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700538 return -EINVAL;
Tom Tsou35536802016-11-24 19:24:32 +0700539 }
540 } else if (dec->k == 7) {
541 switch (dec->n) {
542 case 2:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700543 dec->metric_func = osmo_conv_metrics_k7_n2;
Tom Tsou35536802016-11-24 19:24:32 +0700544 break;
545 case 3:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700546 dec->metric_func = osmo_conv_metrics_k7_n3;
Tom Tsou35536802016-11-24 19:24:32 +0700547 break;
548 case 4:
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700549 dec->metric_func = osmo_conv_metrics_k7_n4;
Tom Tsou35536802016-11-24 19:24:32 +0700550 break;
551 default:
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700552 return -EINVAL;
Tom Tsou35536802016-11-24 19:24:32 +0700553 }
554 } else {
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700555 return -EINVAL;
Tom Tsou35536802016-11-24 19:24:32 +0700556 }
557
558 if (code->term == CONV_TERM_FLUSH)
559 dec->len = code->len + code->K - 1;
560 else
561 dec->len = code->len;
562
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700563 rc = generate_trellis(dec, code);
564 if (rc)
565 return rc;
Tom Tsou35536802016-11-24 19:24:32 +0700566
567 dec->paths = (int16_t **) malloc(sizeof(int16_t *) * dec->len);
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700568 if (!dec->paths)
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700569 goto enomem;
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700570
Tom Tsou35536802016-11-24 19:24:32 +0700571 dec->paths[0] = vdec_malloc(ns * dec->len);
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700572 if (!dec->paths[0])
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700573 goto enomem;
Vadim Yanitskiycfb1eaa2017-06-12 03:13:12 +0700574
Tom Tsou35536802016-11-24 19:24:32 +0700575 for (i = 1; i < dec->len; i++)
576 dec->paths[i] = &dec->paths[0][i * ns];
577
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700578 return 0;
Tom Tsou35536802016-11-24 19:24:32 +0700579
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700580enomem:
581 vdec_deinit(dec);
582 return -ENOMEM;
Tom Tsou35536802016-11-24 19:24:32 +0700583}
584
585/* Depuncture sequence with nagative value terminated puncturing matrix */
586static int depuncture(const int8_t *in, const int *punc, int8_t *out, int len)
587{
588 int i, n = 0, m = 0;
589
590 for (i = 0; i < len; i++) {
591 if (i == punc[n]) {
592 out[i] = 0;
593 n++;
594 continue;
595 }
596
597 out[i] = in[m++];
598 }
599
600 return 0;
601}
602
603/* Forward trellis recursion
604 * Generate branch metrics and path metrics with a combined function. Only
605 * accumulated path metric sums and path selections are stored. Normalize on
606 * the interval specified by the decoder.
607 */
608static void forward_traverse(struct vdecoder *dec, const int8_t *seq)
609{
Tom Tsou35536802016-11-24 19:24:32 +0700610 int i;
611
612 for (i = 0; i < dec->len; i++) {
613 dec->metric_func(&seq[dec->n * i],
Vadim Yanitskiy0876ef82017-06-19 06:16:37 +0700614 dec->trellis.outputs,
615 dec->trellis.sums,
Tom Tsou35536802016-11-24 19:24:32 +0700616 dec->paths[i],
617 !(i % dec->intrvl));
618 }
619}
620
621/* Convolutional decode with a decoder object
622 * Initial puncturing run if necessary followed by the forward recursion.
623 * For tail-biting perform a second pass before running the backward
624 * traceback operation.
625 */
626static int conv_decode(struct vdecoder *dec, const int8_t *seq,
627 const int *punc, uint8_t *out, int len, int term)
628{
629 int8_t depunc[dec->len * dec->n];
630
Tom Tsou35536802016-11-24 19:24:32 +0700631 if (punc) {
632 depuncture(seq, punc, depunc, dec->len * dec->n);
633 seq = depunc;
634 }
635
636 /* Propagate through the trellis with interval normalization */
637 forward_traverse(dec, seq);
638
639 if (term == CONV_TERM_TAIL_BITING)
640 forward_traverse(dec, seq);
641
642 return traceback(dec, out, term, len);
643}
644
Tom Tsou34e228a2017-04-29 00:16:43 +0700645static void osmo_conv_init(void)
646{
647 init_complete = 1;
648
649#ifdef HAVE___BUILTIN_CPU_SUPPORTS
650 /* Detect CPU capabilities */
651 #ifdef HAVE_AVX2
652 avx2_supported = __builtin_cpu_supports("avx2");
653 #endif
654
655 #ifdef HAVE_SSE3
656 sse3_supported = __builtin_cpu_supports("sse3");
657 #endif
658
659 #ifdef HAVE_SSE4_1
660 sse41_supported = __builtin_cpu_supports("sse4.1");
661 #endif
662#endif
663
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700664/**
665 * Usage of curly braces is mandatory,
666 * because we use multi-line define.
667 */
668#if defined(HAVE_SSE3) && defined(HAVE_AVX2)
669 if (sse3_supported && avx2_supported) {
670 INIT_POINTERS(sse_avx);
671 } else if (sse3_supported) {
672 INIT_POINTERS(sse);
673 } else {
674 INIT_POINTERS(gen);
675 }
676#elif defined(HAVE_SSE3)
677 if (sse3_supported) {
678 INIT_POINTERS(sse);
679 } else {
680 INIT_POINTERS(gen);
681 }
Tom Tsou34e228a2017-04-29 00:16:43 +0700682#else
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700683 INIT_POINTERS(gen);
Tom Tsou34e228a2017-04-29 00:16:43 +0700684#endif
685}
686
Tom Tsou35536802016-11-24 19:24:32 +0700687/* All-in-one Viterbi decoding */
688int osmo_conv_decode_acc(const struct osmo_conv_code *code,
689 const sbit_t *input, ubit_t *output)
690{
691 int rc;
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700692 struct vdecoder dec;
Tom Tsou35536802016-11-24 19:24:32 +0700693
Tom Tsou34e228a2017-04-29 00:16:43 +0700694 if (!init_complete)
695 osmo_conv_init();
696
Tom Tsou35536802016-11-24 19:24:32 +0700697 if ((code->N < 2) || (code->N > 4) || (code->len < 1) ||
698 ((code->K != 5) && (code->K != 7)))
699 return -EINVAL;
700
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700701 rc = vdec_init(&dec, code);
702 if (rc)
703 return rc;
Tom Tsou35536802016-11-24 19:24:32 +0700704
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700705 rc = conv_decode(&dec, input, code->puncture,
Tom Tsou35536802016-11-24 19:24:32 +0700706 output, code->len, code->term);
707
Vadim Yanitskiy1ac56fb2017-06-19 06:01:30 +0700708 vdec_deinit(&dec);
Tom Tsou35536802016-11-24 19:24:32 +0700709
710 return rc;
711}