blob: 87b0e9538e74bc3103cc73369a55126d204ce20b [file] [log] [blame]
Piotr Krysik9e2e8352018-02-27 12:16:25 +01001/*! \file conv_acc_generic.c
2 * Accelerated Viterbi decoder implementation
3 * for generic architectures without SSE support. */
4/*
5 * Copyright (C) 2013, 2014 Thomas Tsou <tom@tsou.cc>
6 *
7 * All Rights Reserved
8 *
9 * SPDX-License-Identifier: GPL-2.0+
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License along
22 * with this program; if not, write to the Free Software Foundation, Inc.,
23 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 */
25
26#include <stdlib.h>
27#include <stdint.h>
28#include <string.h>
29
30#define __attribute__(_arg_)
31
32/* Add-Compare-Select (ACS-Butterfly)
33 * Compute 4 accumulated path metrics and 4 path selections. Note that path
34 * selections are store as -1 and 0 rather than 0 and 1. This is to match
35 * the output format of the SSE packed compare instruction 'pmaxuw'.
36 */
37
38static void acs_butterfly(int state, int num_states,
39 int16_t metric, int16_t *sum,
40 int16_t *new_sum, int16_t *path)
41{
42 int state0, state1;
43 int sum0, sum1, sum2, sum3;
44
45 state0 = *(sum + (2 * state + 0));
46 state1 = *(sum + (2 * state + 1));
47
48 sum0 = state0 + metric;
49 sum1 = state1 - metric;
50 sum2 = state0 - metric;
51 sum3 = state1 + metric;
52
53 if (sum0 >= sum1) {
54 *new_sum = sum0;
55 *path = -1;
56 } else {
57 *new_sum = sum1;
58 *path = 0;
59 }
60
61 if (sum2 >= sum3) {
62 *(new_sum + num_states / 2) = sum2;
63 *(path + num_states / 2) = -1;
64 } else {
65 *(new_sum + num_states / 2) = sum3;
66 *(path + num_states / 2) = 0;
67 }
68}
69
70/* Branch metrics unit N=2 */
71static void gen_branch_metrics_n2(int num_states, const int8_t *seq,
72 const int16_t *out, int16_t *metrics)
73{
74 int i;
75
76 for (i = 0; i < num_states / 2; i++) {
77 metrics[i] = seq[0] * out[2 * i + 0] +
78 seq[1] * out[2 * i + 1];
79 }
80}
81
82/* Branch metrics unit N=3 */
83static void gen_branch_metrics_n3(int num_states, const int8_t *seq,
84 const int16_t *out, int16_t *metrics)
85{
86 int i;
87
88 for (i = 0; i < num_states / 2; i++) {
89 metrics[i] = seq[0] * out[4 * i + 0] +
90 seq[1] * out[4 * i + 1] +
91 seq[2] * out[4 * i + 2];
92 }
93}
94
95/* Branch metrics unit N=4 */
96static void gen_branch_metrics_n4(int num_states, const int8_t *seq,
97 const int16_t *out, int16_t *metrics)
98{
99 int i;
100
101 for (i = 0; i < num_states / 2; i++) {
102 metrics[i] = seq[0] * out[4 * i + 0] +
103 seq[1] * out[4 * i + 1] +
104 seq[2] * out[4 * i + 2] +
105 seq[3] * out[4 * i + 3];
106 }
107}
108
109/* Path metric unit */
110static void gen_path_metrics(int num_states, int16_t *sums,
111 int16_t *metrics, int16_t *paths, int norm)
112{
113 int i;
114 int16_t min;
115 int16_t * new_sums = malloc(sizeof(int16_t)*num_states);
116
117 for (i = 0; i < num_states / 2; i++)
118 acs_butterfly(i, num_states, metrics[i],
119 sums, &new_sums[i], &paths[i]);
120
121 if (norm) {
122 min = new_sums[0];
123
124 for (i = 1; i < num_states; i++)
125 if (new_sums[i] < min)
126 min = new_sums[i];
127
128 for (i = 0; i < num_states; i++)
129 new_sums[i] -= min;
130 }
131
Piotr Krysik9e2e8352018-02-27 12:16:25 +0100132 memcpy(sums, new_sums, num_states * sizeof(int16_t));
Piotr Krysikdaa26e82018-03-03 20:04:13 +0100133 free(new_sums);
134
Piotr Krysik9e2e8352018-02-27 12:16:25 +0100135}
136
137/* Not-aligned Memory Allocator */
138__attribute__ ((visibility("hidden")))
139int16_t *osmo_conv_gen_vdec_malloc(size_t n)
140{
141 return (int16_t *) malloc(sizeof(int16_t) * n);
142}
143
144__attribute__ ((visibility("hidden")))
145void osmo_conv_gen_vdec_free(int16_t *ptr)
146{
147 free(ptr);
148}
149
150/* 16-state branch-path metrics units (K=5) */
151__attribute__ ((visibility("hidden")))
152void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out,
153 int16_t *sums, int16_t *paths, int norm)
154{
155 int16_t metrics[8];
156
157 gen_branch_metrics_n2(16, seq, out, metrics);
158 gen_path_metrics(16, sums, metrics, paths, norm);
159}
160
161__attribute__ ((visibility("hidden")))
162void osmo_conv_gen_metrics_k5_n3(const int8_t *seq, const int16_t *out,
163 int16_t *sums, int16_t *paths, int norm)
164{
165 int16_t metrics[8];
166
167 gen_branch_metrics_n3(16, seq, out, metrics);
168 gen_path_metrics(16, sums, metrics, paths, norm);
169
170}
171
172__attribute__ ((visibility("hidden")))
173void osmo_conv_gen_metrics_k5_n4(const int8_t *seq, const int16_t *out,
174 int16_t *sums, int16_t *paths, int norm)
175{
176 int16_t metrics[8];
177
178 gen_branch_metrics_n4(16, seq, out, metrics);
179 gen_path_metrics(16, sums, metrics, paths, norm);
180
181}
182
183/* 64-state branch-path metrics units (K=7) */
184__attribute__ ((visibility("hidden")))
185void osmo_conv_gen_metrics_k7_n2(const int8_t *seq, const int16_t *out,
186 int16_t *sums, int16_t *paths, int norm)
187{
188 int16_t metrics[32];
189
190 gen_branch_metrics_n2(64, seq, out, metrics);
191 gen_path_metrics(64, sums, metrics, paths, norm);
192
193}
194
195__attribute__ ((visibility("hidden")))
196void osmo_conv_gen_metrics_k7_n3(const int8_t *seq, const int16_t *out,
197 int16_t *sums, int16_t *paths, int norm)
198{
199 int16_t metrics[32];
200
201 gen_branch_metrics_n3(64, seq, out, metrics);
202 gen_path_metrics(64, sums, metrics, paths, norm);
203
204}
205
206__attribute__ ((visibility("hidden")))
207void osmo_conv_gen_metrics_k7_n4(const int8_t *seq, const int16_t *out,
208 int16_t *sums, int16_t *paths, int norm)
209{
210 int16_t metrics[32];
211
212 gen_branch_metrics_n4(64, seq, out, metrics);
213 gen_path_metrics(64, sums, metrics, paths, norm);
214}