blob: ea4fb21f635bd517c97a7afea6c8d78de65bfdbf [file] [log] [blame]
Tom Tsou35536802016-11-24 19:24:32 +07001/*
2 * Viterbi decoder
3 *
4 * Copyright (C) 2013, 2014 Thomas Tsou <tom@tsou.cc>
5 *
6 * All Rights Reserved
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23#include <stdlib.h>
24#include <string.h>
25#include <errno.h>
26
27#include <osmocom/core/conv.h>
28#include "config.h"
29
30#define BIT2NRZ(REG,N) (((REG >> N) & 0x01) * 2 - 1) * -1
31#define NUM_STATES(K) (K == 7 ? 64 : 16)
32#define SSE_ALIGN 16
33
34/* Forward Metric Units */
35void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out,
36 int16_t *sums, int16_t *paths, int norm);
37void osmo_conv_gen_metrics_k5_n3(const int8_t *seq, const int16_t *out,
38 int16_t *sums, int16_t *paths, int norm);
39void osmo_conv_gen_metrics_k5_n4(const int8_t *seq, const int16_t *out,
40 int16_t *sums, int16_t *paths, int norm);
41void osmo_conv_gen_metrics_k7_n2(const int8_t *seq, const int16_t *out,
42 int16_t *sums, int16_t *paths, int norm);
43void osmo_conv_gen_metrics_k7_n3(const int8_t *seq, const int16_t *out,
44 int16_t *sums, int16_t *paths, int norm);
45void osmo_conv_gen_metrics_k7_n4(const int8_t *seq, const int16_t *out,
46 int16_t *sums, int16_t *paths, int norm);
47
48/* Trellis State
49 * state - Internal lshift register value
50 * prev - Register values of previous 0 and 1 states
51 */
52struct vstate {
53 unsigned state;
54 unsigned prev[2];
55};
56
57/* Trellis Object
58 * num_states - Number of states in the trellis
59 * sums - Accumulated path metrics
60 * outputs - Trellis output values
61 * vals - Input value that led to each state
62 */
63struct vtrellis {
64 int num_states;
65 int16_t *sums;
66 int16_t *outputs;
67 uint8_t *vals;
68};
69
70/* Viterbi Decoder
71 * n - Code order
72 * k - Constraint length
73 * len - Horizontal length of trellis
74 * recursive - Set to '1' if the code is recursive
75 * intrvl - Normalization interval
76 * trellis - Trellis object
77 * punc - Puncturing sequence
78 * paths - Trellis paths
79 */
80struct vdecoder {
81 int n;
82 int k;
83 int len;
84 int recursive;
85 int intrvl;
86 struct vtrellis *trellis;
87 int *punc;
88 int16_t **paths;
89
90 void (*metric_func)(const int8_t *, const int16_t *,
91 int16_t *, int16_t *, int);
92};
93
94/* Aligned Memory Allocator
95 * SSE requires 16-byte memory alignment. We store relevant trellis values
96 * (accumulated sums, outputs, and path decisions) as 16 bit signed integers
97 * so the allocated memory is casted as such.
98 */
99static int16_t *vdec_malloc(size_t n)
100{
101#ifdef HAVE_SSE3
102 return (int16_t *) memalign(SSE_ALIGN, sizeof(int16_t) * n);
103#else
104 return (int16_t *) malloc(sizeof(int16_t) * n);
105#endif
106}
107
108/* Accessor calls */
109static inline int conv_code_recursive(const struct osmo_conv_code *code)
110{
111 return code->next_term_output ? 1 : 0;
112}
113
114/* Left shift and mask for finding the previous state */
115static unsigned vstate_lshift(unsigned reg, int k, int val)
116{
117 unsigned mask;
118
119 if (k == 5)
120 mask = 0x0e;
121 else if (k == 7)
122 mask = 0x3e;
123 else
124 mask = 0;
125
126 return ((reg << 1) & mask) | val;
127}
128
129/* Bit endian manipulators */
130static inline unsigned bitswap2(unsigned v)
131{
132 return ((v & 0x02) >> 1) | ((v & 0x01) << 1);
133}
134
135static inline unsigned bitswap3(unsigned v)
136{
137 return ((v & 0x04) >> 2) | ((v & 0x02) >> 0) |
138 ((v & 0x01) << 2);
139}
140
141static inline unsigned bitswap4(unsigned v)
142{
143 return ((v & 0x08) >> 3) | ((v & 0x04) >> 1) |
144 ((v & 0x02) << 1) | ((v & 0x01) << 3);
145}
146
147static inline unsigned bitswap5(unsigned v)
148{
149 return ((v & 0x10) >> 4) | ((v & 0x08) >> 2) | ((v & 0x04) >> 0) |
150 ((v & 0x02) << 2) | ((v & 0x01) << 4);
151}
152
153static inline unsigned bitswap6(unsigned v)
154{
155 return ((v & 0x20) >> 5) | ((v & 0x10) >> 3) | ((v & 0x08) >> 1) |
156 ((v & 0x04) << 1) | ((v & 0x02) << 3) | ((v & 0x01) << 5);
157}
158
159static unsigned bitswap(unsigned v, unsigned n)
160{
161 switch (n) {
162 case 1:
163 return v;
164 case 2:
165 return bitswap2(v);
166 case 3:
167 return bitswap3(v);
168 case 4:
169 return bitswap4(v);
170 case 5:
171 return bitswap5(v);
172 case 6:
173 return bitswap6(v);
174 default:
175 return 0;
176 }
177}
178
179/* Generate non-recursive state output from generator state table
180 * Note that the shift register moves right (i.e. the most recent bit is
181 * shifted into the register at k-1 bit of the register), which is typical
182 * textbook representation. The API transition table expects the most recent
183 * bit in the low order bit, or left shift. A bitswap operation is required
184 * to accommodate the difference.
185 */
186static unsigned gen_output(struct vstate *state, int val,
187 const struct osmo_conv_code *code)
188{
189 unsigned out, prev;
190
191 prev = bitswap(state->prev[0], code->K - 1);
192 out = code->next_output[prev][val];
193 out = bitswap(out, code->N);
194
195 return out;
196}
197
198/* Populate non-recursive trellis state
199 * For a given state defined by the k-1 length shift register, find the
200 * value of the input bit that drove the trellis to that state. Also
201 * generate the N outputs of the generator polynomial at that state.
202 */
203static int gen_state_info(uint8_t *val, unsigned reg,
204 int16_t *output, const struct osmo_conv_code *code)
205{
206 int i;
207 unsigned out;
208 struct vstate state;
209
210 /* Previous '0' state */
211 state.state = reg;
212 state.prev[0] = vstate_lshift(reg, code->K, 0);
213 state.prev[1] = vstate_lshift(reg, code->K, 1);
214
215 *val = (reg >> (code->K - 2)) & 0x01;
216
217 /* Transition output */
218 out = gen_output(&state, *val, code);
219
220 /* Unpack to NRZ */
221 for (i = 0; i < code->N; i++)
222 output[i] = BIT2NRZ(out, i);
223
224 return 0;
225}
226
227/* Generate recursive state output from generator state table */
228static unsigned gen_recursive_output(struct vstate *state,
229 uint8_t *val, unsigned reg,
230 const struct osmo_conv_code *code, int pos)
231{
232 int val0, val1;
233 unsigned out, prev;
234
235 /* Previous '0' state */
236 prev = vstate_lshift(reg, code->K, 0);
237 prev = bitswap(prev, code->K - 1);
238
239 /* Input value */
240 val0 = (reg >> (code->K - 2)) & 0x01;
241 val1 = (code->next_term_output[prev] >> pos) & 0x01;
242 *val = val0 == val1 ? 0 : 1;
243
244 /* Wrapper for osmocom state access */
245 prev = bitswap(state->prev[0], code->K - 1);
246
247 /* Compute the transition output */
248 out = code->next_output[prev][*val];
249 out = bitswap(out, code->N);
250
251 return out;
252}
253
254/* Populate recursive trellis state
255 * The bit position of the systematic bit is not explicitly marked by the
256 * API, so it must be extracted from the generator table. Otherwise,
257 * populate the trellis similar to the non-recursive version.
258 * Non-systematic recursive codes are not supported.
259 */
260static int gen_recursive_state_info(uint8_t *val,
261 unsigned reg, int16_t *output, const struct osmo_conv_code *code)
262{
263 int i, j, pos = -1;
264 int ns = NUM_STATES(code->K);
265 unsigned out;
266 struct vstate state;
267
268 /* Previous '0' and '1' states */
269 state.state = reg;
270 state.prev[0] = vstate_lshift(reg, code->K, 0);
271 state.prev[1] = vstate_lshift(reg, code->K, 1);
272
273 /* Find recursive bit location */
274 for (i = 0; i < code->N; i++) {
275 for (j = 0; j < ns; j++) {
276 if ((code->next_output[j][0] >> i) & 0x01)
277 break;
278 }
279
280 if (j == ns) {
281 pos = i;
282 break;
283 }
284 }
285
286 /* Non-systematic recursive code not supported */
287 if (pos < 0)
288 return -EPROTO;
289
290 /* Transition output */
291 out = gen_recursive_output(&state, val, reg, code, pos);
292
293 /* Unpack to NRZ */
294 for (i = 0; i < code->N; i++)
295 output[i] = BIT2NRZ(out, i);
296
297 return 0;
298}
299
300/* Release the trellis */
301static void free_trellis(struct vtrellis *trellis)
302{
303 if (!trellis)
304 return;
305
306 free(trellis->vals);
307 free(trellis->outputs);
308 free(trellis->sums);
309 free(trellis);
310}
311
312/* Allocate and initialize the trellis object
313 * Initialization consists of generating the outputs and output value of a
314 * given state. Due to trellis symmetry and anti-symmetry, only one of the
315 * transition paths is utilized by the butterfly operation in the forward
316 * recursion, so only one set of N outputs is required per state variable.
317 */
318static struct vtrellis *generate_trellis(const struct osmo_conv_code *code)
319{
320 int i, rc = -1;
321 struct vtrellis *trellis;
322 int16_t *outputs;
323
324 int ns = NUM_STATES(code->K);
325 int recursive = conv_code_recursive(code);
326 int olen = (code->N == 2) ? 2 : 4;
327
328 trellis = (struct vtrellis *) calloc(1, sizeof(struct vtrellis));
329 trellis->num_states = ns;
330 trellis->sums = vdec_malloc(ns);
331 trellis->outputs = vdec_malloc(ns * olen);
332 trellis->vals = (uint8_t *) malloc(ns * sizeof(uint8_t));
333
334 if (!trellis->sums || !trellis->outputs)
335 goto fail;
336
337 /* Populate the trellis state objects */
338 for (i = 0; i < ns; i++) {
339 outputs = &trellis->outputs[olen * i];
340 if (recursive) {
341 rc = gen_recursive_state_info(&trellis->vals[i],
342 i, outputs, code);
343 } else {
344 rc = gen_state_info(&trellis->vals[i],
345 i, outputs, code);
346 }
347 }
348
349 if (rc < 0)
350 goto fail;
351
352 return trellis;
353
354fail:
355 free_trellis(trellis);
356 return NULL;
357}
358
359/* Reset decoder
360 * Set accumulated path metrics to zero. For termination other than
361 * tail-biting, initialize the zero state as the encoder starting state.
362 * Initialize with the maximum accumulated sum at length equal to the
363 * constraint length.
364 */
365static void reset_decoder(struct vdecoder *dec, int term)
366{
367 int ns = dec->trellis->num_states;
368
369 memset(dec->trellis->sums, 0, sizeof(int16_t) * ns);
370
371 if (term != CONV_TERM_TAIL_BITING)
372 dec->trellis->sums[0] = INT8_MAX * dec->n * dec->k;
373}
374
375static void _traceback(struct vdecoder *dec,
376 unsigned state, uint8_t *out, int len)
377{
378 int i;
379 unsigned path;
380
381 for (i = len - 1; i >= 0; i--) {
382 path = dec->paths[i][state] + 1;
383 out[i] = dec->trellis->vals[state];
384 state = vstate_lshift(state, dec->k, path);
385 }
386}
387
388static void _traceback_rec(struct vdecoder *dec,
389 unsigned state, uint8_t *out, int len)
390{
391 int i;
392 unsigned path;
393
394 for (i = len - 1; i >= 0; i--) {
395 path = dec->paths[i][state] + 1;
396 out[i] = path ^ dec->trellis->vals[state];
397 state = vstate_lshift(state, dec->k, path);
398 }
399}
400
401/* Traceback and generate decoded output
402 * Find the largest accumulated path metric at the final state except for
403 * the zero terminated case, where we assume the final state is always zero.
404 */
405static int traceback(struct vdecoder *dec, uint8_t *out, int term, int len)
406{
407 int i, sum, max = -1;
408 unsigned path, state = 0;
409
410 if (term != CONV_TERM_FLUSH) {
411 for (i = 0; i < dec->trellis->num_states; i++) {
412 sum = dec->trellis->sums[i];
413 if (sum > max) {
414 max = sum;
415 state = i;
416 }
417 }
418
419 if (max < 0)
420 return -EPROTO;
421 }
422
423 for (i = dec->len - 1; i >= len; i--) {
424 path = dec->paths[i][state] + 1;
425 state = vstate_lshift(state, dec->k, path);
426 }
427
428 if (dec->recursive)
429 _traceback_rec(dec, state, out, len);
430 else
431 _traceback(dec, state, out, len);
432
433 return 0;
434}
435
436/* Release decoder object */
437static void free_vdec(struct vdecoder *dec)
438{
439 if (!dec)
440 return;
441
442 free(dec->paths[0]);
443 free(dec->paths);
444 free_trellis(dec->trellis);
445 free(dec);
446}
447
448/* Allocate decoder object
449 * Subtract the constraint length K on the normalization interval to
450 * accommodate the initialization path metric at state zero.
451 */
452static struct vdecoder *alloc_vdec(const struct osmo_conv_code *code)
453{
454 int i, ns;
455 struct vdecoder *dec;
456
457 ns = NUM_STATES(code->K);
458
459 dec = (struct vdecoder *) calloc(1, sizeof(struct vdecoder));
460 dec->n = code->N;
461 dec->k = code->K;
462 dec->recursive = conv_code_recursive(code);
463 dec->intrvl = INT16_MAX / (dec->n * INT8_MAX) - dec->k;
464
465 if (dec->k == 5) {
466 switch (dec->n) {
467 case 2:
468 dec->metric_func = osmo_conv_gen_metrics_k5_n2;
469 break;
470 case 3:
471 dec->metric_func = osmo_conv_gen_metrics_k5_n3;
472 break;
473 case 4:
474 dec->metric_func = osmo_conv_gen_metrics_k5_n4;
475 break;
476 default:
477 goto fail;
478 }
479 } else if (dec->k == 7) {
480 switch (dec->n) {
481 case 2:
482 dec->metric_func = osmo_conv_gen_metrics_k7_n2;
483 break;
484 case 3:
485 dec->metric_func = osmo_conv_gen_metrics_k7_n3;
486 break;
487 case 4:
488 dec->metric_func = osmo_conv_gen_metrics_k7_n4;
489 break;
490 default:
491 goto fail;
492 }
493 } else {
494 goto fail;
495 }
496
497 if (code->term == CONV_TERM_FLUSH)
498 dec->len = code->len + code->K - 1;
499 else
500 dec->len = code->len;
501
502 dec->trellis = generate_trellis(code);
503 if (!dec->trellis)
504 goto fail;
505
506 dec->paths = (int16_t **) malloc(sizeof(int16_t *) * dec->len);
507 dec->paths[0] = vdec_malloc(ns * dec->len);
508 for (i = 1; i < dec->len; i++)
509 dec->paths[i] = &dec->paths[0][i * ns];
510
511 return dec;
512
513fail:
514 free_vdec(dec);
515 return NULL;
516}
517
518/* Depuncture sequence with nagative value terminated puncturing matrix */
519static int depuncture(const int8_t *in, const int *punc, int8_t *out, int len)
520{
521 int i, n = 0, m = 0;
522
523 for (i = 0; i < len; i++) {
524 if (i == punc[n]) {
525 out[i] = 0;
526 n++;
527 continue;
528 }
529
530 out[i] = in[m++];
531 }
532
533 return 0;
534}
535
536/* Forward trellis recursion
537 * Generate branch metrics and path metrics with a combined function. Only
538 * accumulated path metric sums and path selections are stored. Normalize on
539 * the interval specified by the decoder.
540 */
541static void forward_traverse(struct vdecoder *dec, const int8_t *seq)
542{
543 struct vtrellis *trellis = dec->trellis;
544 int i;
545
546 for (i = 0; i < dec->len; i++) {
547 dec->metric_func(&seq[dec->n * i],
548 trellis->outputs,
549 trellis->sums,
550 dec->paths[i],
551 !(i % dec->intrvl));
552 }
553}
554
555/* Convolutional decode with a decoder object
556 * Initial puncturing run if necessary followed by the forward recursion.
557 * For tail-biting perform a second pass before running the backward
558 * traceback operation.
559 */
560static int conv_decode(struct vdecoder *dec, const int8_t *seq,
561 const int *punc, uint8_t *out, int len, int term)
562{
563 int8_t depunc[dec->len * dec->n];
564
565 reset_decoder(dec, term);
566
567 if (punc) {
568 depuncture(seq, punc, depunc, dec->len * dec->n);
569 seq = depunc;
570 }
571
572 /* Propagate through the trellis with interval normalization */
573 forward_traverse(dec, seq);
574
575 if (term == CONV_TERM_TAIL_BITING)
576 forward_traverse(dec, seq);
577
578 return traceback(dec, out, term, len);
579}
580
581/* All-in-one Viterbi decoding */
582int osmo_conv_decode_acc(const struct osmo_conv_code *code,
583 const sbit_t *input, ubit_t *output)
584{
585 int rc;
586 struct vdecoder *vdec;
587
588 if ((code->N < 2) || (code->N > 4) || (code->len < 1) ||
589 ((code->K != 5) && (code->K != 7)))
590 return -EINVAL;
591
592 vdec = alloc_vdec(code);
593 if (!vdec)
594 return -EFAULT;
595
596 rc = conv_decode(vdec, input, code->puncture,
597 output, code->len, code->term);
598
599 free_vdec(vdec);
600
601 return rc;
602}