blob: 6042668552b1530401e95d07389c2635dc8820e4 [file] [log] [blame]
Tom Tsou35536802016-11-24 19:24:32 +07001/*
2 * Viterbi decoder
3 *
4 * Copyright (C) 2013, 2014 Thomas Tsou <tom@tsou.cc>
5 *
6 * All Rights Reserved
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
Tom Tsou34e228a2017-04-29 00:16:43 +070023#include <stdlib.h>
Tom Tsou35536802016-11-24 19:24:32 +070024#include <stdint.h>
25#include <string.h>
26
27/* Add-Compare-Select (ACS-Butterfly)
28 * Compute 4 accumulated path metrics and 4 path selections. Note that path
29 * selections are store as -1 and 0 rather than 0 and 1. This is to match
30 * the output format of the SSE packed compare instruction 'pmaxuw'.
31 */
32
33static void acs_butterfly(int state, int num_states,
34 int16_t metric, int16_t *sum,
35 int16_t *new_sum, int16_t *path)
36{
37 int state0, state1;
38 int sum0, sum1, sum2, sum3;
39
40 state0 = *(sum + (2 * state + 0));
41 state1 = *(sum + (2 * state + 1));
42
43 sum0 = state0 + metric;
44 sum1 = state1 - metric;
45 sum2 = state0 - metric;
46 sum3 = state1 + metric;
47
48 if (sum0 >= sum1) {
49 *new_sum = sum0;
50 *path = -1;
51 } else {
52 *new_sum = sum1;
53 *path = 0;
54 }
55
56 if (sum2 >= sum3) {
57 *(new_sum + num_states / 2) = sum2;
58 *(path + num_states / 2) = -1;
59 } else {
60 *(new_sum + num_states / 2) = sum3;
61 *(path + num_states / 2) = 0;
62 }
63}
64
65/* Branch metrics unit N=2 */
66static void gen_branch_metrics_n2(int num_states, const int8_t *seq,
67 const int16_t *out, int16_t *metrics)
68{
69 int i;
70
71 for (i = 0; i < num_states / 2; i++) {
72 metrics[i] = seq[0] * out[2 * i + 0] +
73 seq[1] * out[2 * i + 1];
74 }
75}
76
77/* Branch metrics unit N=3 */
78static void gen_branch_metrics_n3(int num_states, const int8_t *seq,
79 const int16_t *out, int16_t *metrics)
80{
81 int i;
82
83 for (i = 0; i < num_states / 2; i++) {
84 metrics[i] = seq[0] * out[4 * i + 0] +
85 seq[1] * out[4 * i + 1] +
86 seq[2] * out[4 * i + 2];
87 }
88}
89
90/* Branch metrics unit N=4 */
91static void gen_branch_metrics_n4(int num_states, const int8_t *seq,
92 const int16_t *out, int16_t *metrics)
93{
94 int i;
95
96 for (i = 0; i < num_states / 2; i++) {
97 metrics[i] = seq[0] * out[4 * i + 0] +
98 seq[1] * out[4 * i + 1] +
99 seq[2] * out[4 * i + 2] +
100 seq[3] * out[4 * i + 3];
101 }
102}
103
104/* Path metric unit */
105static void gen_path_metrics(int num_states, int16_t *sums,
106 int16_t *metrics, int16_t *paths, int norm)
107{
108 int i;
109 int16_t min;
110 int16_t new_sums[num_states];
111
112 for (i = 0; i < num_states / 2; i++)
113 acs_butterfly(i, num_states, metrics[i],
114 sums, &new_sums[i], &paths[i]);
115
116 if (norm) {
117 min = new_sums[0];
118
119 for (i = 1; i < num_states; i++)
120 if (new_sums[i] < min)
121 min = new_sums[i];
122
123 for (i = 0; i < num_states; i++)
124 new_sums[i] -= min;
125 }
126
127 memcpy(sums, new_sums, num_states * sizeof(int16_t));
128}
129
Tom Tsou34e228a2017-04-29 00:16:43 +0700130/* Not-aligned Memory Allocator */
131__attribute__ ((visibility("hidden")))
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700132int16_t *osmo_conv_gen_vdec_malloc(size_t n)
Tom Tsou34e228a2017-04-29 00:16:43 +0700133{
134 return (int16_t *) malloc(sizeof(int16_t) * n);
135}
136
137__attribute__ ((visibility("hidden")))
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700138void osmo_conv_gen_vdec_free(int16_t *ptr)
Tom Tsou34e228a2017-04-29 00:16:43 +0700139{
140 free(ptr);
141}
142
Tom Tsou35536802016-11-24 19:24:32 +0700143/* 16-state branch-path metrics units (K=5) */
144__attribute__ ((visibility("hidden")))
145void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out,
146 int16_t *sums, int16_t *paths, int norm)
147{
148 int16_t metrics[8];
149
150 gen_branch_metrics_n2(16, seq, out, metrics);
151 gen_path_metrics(16, sums, metrics, paths, norm);
152}
153
154__attribute__ ((visibility("hidden")))
155void osmo_conv_gen_metrics_k5_n3(const int8_t *seq, const int16_t *out,
156 int16_t *sums, int16_t *paths, int norm)
157{
158 int16_t metrics[8];
159
160 gen_branch_metrics_n3(16, seq, out, metrics);
161 gen_path_metrics(16, sums, metrics, paths, norm);
162
163}
164
165__attribute__ ((visibility("hidden")))
166void osmo_conv_gen_metrics_k5_n4(const int8_t *seq, const int16_t *out,
167 int16_t *sums, int16_t *paths, int norm)
168{
169 int16_t metrics[8];
170
171 gen_branch_metrics_n4(16, seq, out, metrics);
172 gen_path_metrics(16, sums, metrics, paths, norm);
173
174}
175
176/* 64-state branch-path metrics units (K=7) */
177__attribute__ ((visibility("hidden")))
178void osmo_conv_gen_metrics_k7_n2(const int8_t *seq, const int16_t *out,
179 int16_t *sums, int16_t *paths, int norm)
180{
181 int16_t metrics[32];
182
183 gen_branch_metrics_n2(64, seq, out, metrics);
184 gen_path_metrics(64, sums, metrics, paths, norm);
185
186}
187
188__attribute__ ((visibility("hidden")))
189void osmo_conv_gen_metrics_k7_n3(const int8_t *seq, const int16_t *out,
190 int16_t *sums, int16_t *paths, int norm)
191{
192 int16_t metrics[32];
193
194 gen_branch_metrics_n3(64, seq, out, metrics);
195 gen_path_metrics(64, sums, metrics, paths, norm);
196
197}
198
199__attribute__ ((visibility("hidden")))
200void osmo_conv_gen_metrics_k7_n4(const int8_t *seq, const int16_t *out,
201 int16_t *sums, int16_t *paths, int norm)
202{
203 int16_t metrics[32];
204
205 gen_branch_metrics_n4(64, seq, out, metrics);
206 gen_path_metrics(64, sums, metrics, paths, norm);
207}