blob: 228f3e281da7ca8e9373be9cc1f8b1a4f7494006 [file] [log] [blame]
Neels Hofmeyr17518fe2017-06-20 04:35:06 +02001/*! \file conv_acc_generic.c
Vadim Yanitskiy46e533c2017-06-19 18:21:02 +07002 * Accelerated Viterbi decoder implementation
Neels Hofmeyr17518fe2017-06-20 04:35:06 +02003 * for generic architectures without SSE support. */
4/*
Tom Tsou35536802016-11-24 19:24:32 +07005 * Copyright (C) 2013, 2014 Thomas Tsou <tom@tsou.cc>
6 *
7 * All Rights Reserved
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 */
23
Tom Tsou34e228a2017-04-29 00:16:43 +070024#include <stdlib.h>
Tom Tsou35536802016-11-24 19:24:32 +070025#include <stdint.h>
26#include <string.h>
27
28/* Add-Compare-Select (ACS-Butterfly)
29 * Compute 4 accumulated path metrics and 4 path selections. Note that path
30 * selections are store as -1 and 0 rather than 0 and 1. This is to match
31 * the output format of the SSE packed compare instruction 'pmaxuw'.
32 */
33
34static void acs_butterfly(int state, int num_states,
35 int16_t metric, int16_t *sum,
36 int16_t *new_sum, int16_t *path)
37{
38 int state0, state1;
39 int sum0, sum1, sum2, sum3;
40
41 state0 = *(sum + (2 * state + 0));
42 state1 = *(sum + (2 * state + 1));
43
44 sum0 = state0 + metric;
45 sum1 = state1 - metric;
46 sum2 = state0 - metric;
47 sum3 = state1 + metric;
48
49 if (sum0 >= sum1) {
50 *new_sum = sum0;
51 *path = -1;
52 } else {
53 *new_sum = sum1;
54 *path = 0;
55 }
56
57 if (sum2 >= sum3) {
58 *(new_sum + num_states / 2) = sum2;
59 *(path + num_states / 2) = -1;
60 } else {
61 *(new_sum + num_states / 2) = sum3;
62 *(path + num_states / 2) = 0;
63 }
64}
65
66/* Branch metrics unit N=2 */
67static void gen_branch_metrics_n2(int num_states, const int8_t *seq,
68 const int16_t *out, int16_t *metrics)
69{
70 int i;
71
72 for (i = 0; i < num_states / 2; i++) {
73 metrics[i] = seq[0] * out[2 * i + 0] +
74 seq[1] * out[2 * i + 1];
75 }
76}
77
78/* Branch metrics unit N=3 */
79static void gen_branch_metrics_n3(int num_states, const int8_t *seq,
80 const int16_t *out, int16_t *metrics)
81{
82 int i;
83
84 for (i = 0; i < num_states / 2; i++) {
85 metrics[i] = seq[0] * out[4 * i + 0] +
86 seq[1] * out[4 * i + 1] +
87 seq[2] * out[4 * i + 2];
88 }
89}
90
91/* Branch metrics unit N=4 */
92static void gen_branch_metrics_n4(int num_states, const int8_t *seq,
93 const int16_t *out, int16_t *metrics)
94{
95 int i;
96
97 for (i = 0; i < num_states / 2; i++) {
98 metrics[i] = seq[0] * out[4 * i + 0] +
99 seq[1] * out[4 * i + 1] +
100 seq[2] * out[4 * i + 2] +
101 seq[3] * out[4 * i + 3];
102 }
103}
104
105/* Path metric unit */
106static void gen_path_metrics(int num_states, int16_t *sums,
107 int16_t *metrics, int16_t *paths, int norm)
108{
109 int i;
110 int16_t min;
111 int16_t new_sums[num_states];
112
113 for (i = 0; i < num_states / 2; i++)
114 acs_butterfly(i, num_states, metrics[i],
115 sums, &new_sums[i], &paths[i]);
116
117 if (norm) {
118 min = new_sums[0];
119
120 for (i = 1; i < num_states; i++)
121 if (new_sums[i] < min)
122 min = new_sums[i];
123
124 for (i = 0; i < num_states; i++)
125 new_sums[i] -= min;
126 }
127
128 memcpy(sums, new_sums, num_states * sizeof(int16_t));
129}
130
Tom Tsou34e228a2017-04-29 00:16:43 +0700131/* Not-aligned Memory Allocator */
132__attribute__ ((visibility("hidden")))
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700133int16_t *osmo_conv_gen_vdec_malloc(size_t n)
Tom Tsou34e228a2017-04-29 00:16:43 +0700134{
135 return (int16_t *) malloc(sizeof(int16_t) * n);
136}
137
138__attribute__ ((visibility("hidden")))
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700139void osmo_conv_gen_vdec_free(int16_t *ptr)
Tom Tsou34e228a2017-04-29 00:16:43 +0700140{
141 free(ptr);
142}
143
Tom Tsou35536802016-11-24 19:24:32 +0700144/* 16-state branch-path metrics units (K=5) */
145__attribute__ ((visibility("hidden")))
146void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out,
147 int16_t *sums, int16_t *paths, int norm)
148{
149 int16_t metrics[8];
150
151 gen_branch_metrics_n2(16, seq, out, metrics);
152 gen_path_metrics(16, sums, metrics, paths, norm);
153}
154
155__attribute__ ((visibility("hidden")))
156void osmo_conv_gen_metrics_k5_n3(const int8_t *seq, const int16_t *out,
157 int16_t *sums, int16_t *paths, int norm)
158{
159 int16_t metrics[8];
160
161 gen_branch_metrics_n3(16, seq, out, metrics);
162 gen_path_metrics(16, sums, metrics, paths, norm);
163
164}
165
166__attribute__ ((visibility("hidden")))
167void osmo_conv_gen_metrics_k5_n4(const int8_t *seq, const int16_t *out,
168 int16_t *sums, int16_t *paths, int norm)
169{
170 int16_t metrics[8];
171
172 gen_branch_metrics_n4(16, seq, out, metrics);
173 gen_path_metrics(16, sums, metrics, paths, norm);
174
175}
176
177/* 64-state branch-path metrics units (K=7) */
178__attribute__ ((visibility("hidden")))
179void osmo_conv_gen_metrics_k7_n2(const int8_t *seq, const int16_t *out,
180 int16_t *sums, int16_t *paths, int norm)
181{
182 int16_t metrics[32];
183
184 gen_branch_metrics_n2(64, seq, out, metrics);
185 gen_path_metrics(64, sums, metrics, paths, norm);
186
187}
188
189__attribute__ ((visibility("hidden")))
190void osmo_conv_gen_metrics_k7_n3(const int8_t *seq, const int16_t *out,
191 int16_t *sums, int16_t *paths, int norm)
192{
193 int16_t metrics[32];
194
195 gen_branch_metrics_n3(64, seq, out, metrics);
196 gen_path_metrics(64, sums, metrics, paths, norm);
197
198}
199
200__attribute__ ((visibility("hidden")))
201void osmo_conv_gen_metrics_k7_n4(const int8_t *seq, const int16_t *out,
202 int16_t *sums, int16_t *paths, int norm)
203{
204 int16_t metrics[32];
205
206 gen_branch_metrics_n4(64, seq, out, metrics);
207 gen_path_metrics(64, sums, metrics, paths, norm);
208}