blob: 2257e6a9c3968995cdd12938a5efcfe165a83d30 [file] [log] [blame]
Neels Hofmeyr17518fe2017-06-20 04:35:06 +02001/*! \file conv_acc_generic.c
Vadim Yanitskiy46e533c2017-06-19 18:21:02 +07002 * Accelerated Viterbi decoder implementation
Neels Hofmeyr17518fe2017-06-20 04:35:06 +02003 * for generic architectures without SSE support. */
4/*
Tom Tsou35536802016-11-24 19:24:32 +07005 * Copyright (C) 2013, 2014 Thomas Tsou <tom@tsou.cc>
6 *
7 * All Rights Reserved
8 *
Harald Weltee08da972017-11-13 01:00:26 +09009 * SPDX-License-Identifier: GPL-2.0+
10 *
Tom Tsou35536802016-11-24 19:24:32 +070011 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
Tom Tsou35536802016-11-24 19:24:32 +070020 */
21
Tom Tsou34e228a2017-04-29 00:16:43 +070022#include <stdlib.h>
Tom Tsou35536802016-11-24 19:24:32 +070023#include <stdint.h>
24#include <string.h>
25
26/* Add-Compare-Select (ACS-Butterfly)
27 * Compute 4 accumulated path metrics and 4 path selections. Note that path
28 * selections are store as -1 and 0 rather than 0 and 1. This is to match
29 * the output format of the SSE packed compare instruction 'pmaxuw'.
30 */
31
32static void acs_butterfly(int state, int num_states,
33 int16_t metric, int16_t *sum,
34 int16_t *new_sum, int16_t *path)
35{
36 int state0, state1;
37 int sum0, sum1, sum2, sum3;
38
39 state0 = *(sum + (2 * state + 0));
40 state1 = *(sum + (2 * state + 1));
41
42 sum0 = state0 + metric;
43 sum1 = state1 - metric;
44 sum2 = state0 - metric;
45 sum3 = state1 + metric;
46
47 if (sum0 >= sum1) {
48 *new_sum = sum0;
49 *path = -1;
50 } else {
51 *new_sum = sum1;
52 *path = 0;
53 }
54
55 if (sum2 >= sum3) {
56 *(new_sum + num_states / 2) = sum2;
57 *(path + num_states / 2) = -1;
58 } else {
59 *(new_sum + num_states / 2) = sum3;
60 *(path + num_states / 2) = 0;
61 }
62}
63
64/* Branch metrics unit N=2 */
65static void gen_branch_metrics_n2(int num_states, const int8_t *seq,
66 const int16_t *out, int16_t *metrics)
67{
68 int i;
69
70 for (i = 0; i < num_states / 2; i++) {
71 metrics[i] = seq[0] * out[2 * i + 0] +
72 seq[1] * out[2 * i + 1];
73 }
74}
75
76/* Branch metrics unit N=3 */
77static void gen_branch_metrics_n3(int num_states, const int8_t *seq,
78 const int16_t *out, int16_t *metrics)
79{
80 int i;
81
82 for (i = 0; i < num_states / 2; i++) {
83 metrics[i] = seq[0] * out[4 * i + 0] +
84 seq[1] * out[4 * i + 1] +
85 seq[2] * out[4 * i + 2];
86 }
87}
88
89/* Branch metrics unit N=4 */
90static void gen_branch_metrics_n4(int num_states, const int8_t *seq,
91 const int16_t *out, int16_t *metrics)
92{
93 int i;
94
95 for (i = 0; i < num_states / 2; i++) {
96 metrics[i] = seq[0] * out[4 * i + 0] +
97 seq[1] * out[4 * i + 1] +
98 seq[2] * out[4 * i + 2] +
99 seq[3] * out[4 * i + 3];
100 }
101}
102
103/* Path metric unit */
104static void gen_path_metrics(int num_states, int16_t *sums,
105 int16_t *metrics, int16_t *paths, int norm)
106{
107 int i;
108 int16_t min;
109 int16_t new_sums[num_states];
110
111 for (i = 0; i < num_states / 2; i++)
112 acs_butterfly(i, num_states, metrics[i],
113 sums, &new_sums[i], &paths[i]);
114
115 if (norm) {
116 min = new_sums[0];
117
118 for (i = 1; i < num_states; i++)
119 if (new_sums[i] < min)
120 min = new_sums[i];
121
122 for (i = 0; i < num_states; i++)
123 new_sums[i] -= min;
124 }
125
126 memcpy(sums, new_sums, num_states * sizeof(int16_t));
127}
128
Tom Tsou34e228a2017-04-29 00:16:43 +0700129/* Not-aligned Memory Allocator */
130__attribute__ ((visibility("hidden")))
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700131int16_t *osmo_conv_gen_vdec_malloc(size_t n)
Tom Tsou34e228a2017-04-29 00:16:43 +0700132{
133 return (int16_t *) malloc(sizeof(int16_t) * n);
134}
135
136__attribute__ ((visibility("hidden")))
Vadim Yanitskiy0d49f472017-05-28 18:20:02 +0700137void osmo_conv_gen_vdec_free(int16_t *ptr)
Tom Tsou34e228a2017-04-29 00:16:43 +0700138{
139 free(ptr);
140}
141
Tom Tsou35536802016-11-24 19:24:32 +0700142/* 16-state branch-path metrics units (K=5) */
143__attribute__ ((visibility("hidden")))
144void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out,
145 int16_t *sums, int16_t *paths, int norm)
146{
147 int16_t metrics[8];
148
149 gen_branch_metrics_n2(16, seq, out, metrics);
150 gen_path_metrics(16, sums, metrics, paths, norm);
151}
152
153__attribute__ ((visibility("hidden")))
154void osmo_conv_gen_metrics_k5_n3(const int8_t *seq, const int16_t *out,
155 int16_t *sums, int16_t *paths, int norm)
156{
157 int16_t metrics[8];
158
159 gen_branch_metrics_n3(16, seq, out, metrics);
160 gen_path_metrics(16, sums, metrics, paths, norm);
161
162}
163
164__attribute__ ((visibility("hidden")))
165void osmo_conv_gen_metrics_k5_n4(const int8_t *seq, const int16_t *out,
166 int16_t *sums, int16_t *paths, int norm)
167{
168 int16_t metrics[8];
169
170 gen_branch_metrics_n4(16, seq, out, metrics);
171 gen_path_metrics(16, sums, metrics, paths, norm);
172
173}
174
175/* 64-state branch-path metrics units (K=7) */
176__attribute__ ((visibility("hidden")))
177void osmo_conv_gen_metrics_k7_n2(const int8_t *seq, const int16_t *out,
178 int16_t *sums, int16_t *paths, int norm)
179{
180 int16_t metrics[32];
181
182 gen_branch_metrics_n2(64, seq, out, metrics);
183 gen_path_metrics(64, sums, metrics, paths, norm);
184
185}
186
187__attribute__ ((visibility("hidden")))
188void osmo_conv_gen_metrics_k7_n3(const int8_t *seq, const int16_t *out,
189 int16_t *sums, int16_t *paths, int norm)
190{
191 int16_t metrics[32];
192
193 gen_branch_metrics_n3(64, seq, out, metrics);
194 gen_path_metrics(64, sums, metrics, paths, norm);
195
196}
197
198__attribute__ ((visibility("hidden")))
199void osmo_conv_gen_metrics_k7_n4(const int8_t *seq, const int16_t *out,
200 int16_t *sums, int16_t *paths, int norm)
201{
202 int16_t metrics[32];
203
204 gen_branch_metrics_n4(64, seq, out, metrics);
205 gen_path_metrics(64, sums, metrics, paths, norm);
206}