blob: d98caff66ee9ef938e7511ff1ee8538baa78ad9f [file] [log] [blame]
Pau Espin Pedrol4456b6f2019-05-24 16:54:19 +02001/*
2 * Copyright (C) 2019 sysmocom - s.f.m.c. GmbH
3 * All Rights Reserved
4 *
5 * Author: Pau Espin Pedrol <pespin@sysmocom.de>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 *
20 */
21
22/*
23 * rate_ctr API uses several osmocom select loop features, and as a result,
24 * calls to it must be done through the main thread (the one running the osmocom
25 * loop in osmo-trx).
26 * Since read/write from/to SDR is done in separate threads (even read and write
27 * each use a different thread), we must use some sort of message passing system
28 * between main thread feeding rate_ctr structures and the Rx/Tx threads
29 * generating the events.
30 * The idea is that upon read/write issues, lower layers (SDR APIs) provide us with
31 * underrun/overrun/droppedPackets information, and in that case we pass that up
32 * the stack through signal <SS_DEVICE,S_DEVICE_COUNTER_CHANGE> with signal_cb
33 * being a pointer to a "struct device_counters" structure, which contains
34 * device (implementation agnostic) statful counters for different kind of
35 * statistics.
36 * That signal is processed here in device_sig_cb, where a copy of the "struct
37 * device_counters" structure is held and the main thread is instructed through
38 * a timerfd to update rate_ctr APIs against this copy. All this is done inside
39 * a mutex to avoid different race conditons (between Rx andTx threads, and
40 * between Rx/Tx and main thread). For the same reason, callers of signal
41 * <SS_DEVICE,S_DEVICE_COUNTER_CHANGE> (device_sig_cb), that is Rx/Tx threads,
42 * must do so with PTHREAD_CANCEL_DISABLE, in order to avoid possible deadlocks
43 * in case the main thread decides to cancel other threads due to a shutdown
44 * operation (fi SIGKILL received)
45 */
46
47#include <string.h>
48#include <stdint.h>
49#include <inttypes.h>
50#include <netinet/in.h>
51#include <arpa/inet.h>
52
53extern "C" {
54#include <osmocom/core/talloc.h>
55#include <osmocom/core/utils.h>
56#include <osmocom/core/rate_ctr.h>
57#include <osmocom/core/select.h>
58#include <osmocom/core/stats.h>
Pau Espin Pedrol6a305fe2019-05-24 19:58:20 +020059#include <osmocom/core/timer.h>
Pau Espin Pedrol4456b6f2019-05-24 16:54:19 +020060
61#include "osmo_signal.h"
62#include "trx_vty.h"
63#include "trx_rate_ctr.h"
64}
65#include "Threads.h"
66#include "Logger.h"
67
68/* Used in ctrs_pending, when set it means that channel slot contains unused
69 (non-pending) counter data */
70#define PENDING_CHAN_NONE SIZE_MAX
71
Pau Espin Pedrol6a305fe2019-05-24 19:58:20 +020072static void *trx_rate_ctr_ctx;
73
Pau Espin Pedrol4456b6f2019-05-24 16:54:19 +020074static struct rate_ctr_group** rate_ctrs;
75static struct device_counters* ctrs_pending;
76static size_t chan_len;
77static struct osmo_fd rate_ctr_timerfd;
78static Mutex rate_ctr_mutex;
79
Pau Espin Pedrol6a305fe2019-05-24 19:58:20 +020080struct osmo_timer_list threshold_timer;
81static LLIST_HEAD(threshold_list);
82static int threshold_timer_sched_secs;
83static bool threshold_initied;
84
85const struct value_string rate_ctr_intv[] = {
86 { RATE_CTR_INTV_SEC, "per-second" },
87 { RATE_CTR_INTV_MIN, "per-minute" },
88 { RATE_CTR_INTV_HOUR, "per-hour" },
89 { RATE_CTR_INTV_DAY, "per-day" },
90 { 0, NULL }
91};
92
93const struct value_string trx_chan_ctr_names[] = {
94 { TRX_CTR_RX_UNDERRUNS, "rx_underruns" },
95 { TRX_CTR_RX_OVERRUNS, "rx_overruns" },
96 { TRX_CTR_TX_UNDERRUNS, "tx_underruns" },
97 { TRX_CTR_RX_DROP_EV, "rx_drop_events" },
98 { TRX_CTR_RX_DROP_SMPL, "rx_drop_samples" },
99 { 0, NULL }
Pau Espin Pedrol4456b6f2019-05-24 16:54:19 +0200100};
101
102static const struct rate_ctr_desc trx_chan_ctr_desc[] = {
103 [TRX_CTR_RX_UNDERRUNS] = { "device:rx_underruns", "Number of Rx underruns" },
104 [TRX_CTR_RX_OVERRUNS] = { "device:rx_overruns", "Number of Rx overruns" },
105 [TRX_CTR_TX_UNDERRUNS] = { "device:tx_underruns", "Number of Tx underruns" },
106 [TRX_CTR_RX_DROP_EV] = { "device:rx_drop_events", "Number of times Rx samples were dropped by HW" },
107 [TRX_CTR_RX_DROP_SMPL] = { "device:rx_drop_samples", "Number of Rx samples dropped by HW" },
108};
109
110static const struct rate_ctr_group_desc trx_chan_ctr_group_desc = {
111 .group_name_prefix = "trx:chan",
112 .group_description = "osmo-trx statistics",
113 .class_id = OSMO_STATS_CLASS_GLOBAL,
114 .num_ctr = ARRAY_SIZE(trx_chan_ctr_desc),
115 .ctr_desc = trx_chan_ctr_desc,
116};
117
118static int rate_ctr_timerfd_cb(struct osmo_fd *ofd, unsigned int what) {
119 size_t chan;
120 struct rate_ctr *ctr;
121 LOGC(DMAIN, NOTICE) << "Main thread is updating counters";
122 rate_ctr_mutex.lock();
123 for (chan = 0; chan < chan_len; chan++) {
124 if (ctrs_pending[chan].chan == PENDING_CHAN_NONE)
125 continue;
126 LOGCHAN(chan, DMAIN, INFO) << "rate_ctr update";
127 ctr = &rate_ctrs[chan]->ctr[TRX_CTR_RX_UNDERRUNS];
128 rate_ctr_add(ctr, ctrs_pending[chan].rx_underruns - ctr->current);
129 ctr = &rate_ctrs[chan]->ctr[TRX_CTR_RX_OVERRUNS];
130 rate_ctr_add(ctr, ctrs_pending[chan].rx_overruns - ctr->current);
131 ctr = &rate_ctrs[chan]->ctr[TRX_CTR_TX_UNDERRUNS];
132 rate_ctr_add(ctr, ctrs_pending[chan].tx_underruns - ctr->current);
133 ctr = &rate_ctrs[chan]->ctr[TRX_CTR_RX_DROP_EV];
134 rate_ctr_add(ctr, ctrs_pending[chan].rx_dropped_events - ctr->current);
135 ctr = &rate_ctrs[chan]->ctr[TRX_CTR_RX_DROP_SMPL];
136 rate_ctr_add(ctr, ctrs_pending[chan].rx_dropped_samples - ctr->current);
137
138 /* Mark as done */
139 ctrs_pending[chan].chan = PENDING_CHAN_NONE;
140 }
141 if (osmo_timerfd_disable(&rate_ctr_timerfd) < 0)
142 LOGC(DMAIN, ERROR) << "Failed to disable timerfd";
143 rate_ctr_mutex.unlock();
144 return 0;
145}
146
147/* Callback function to be called every time we receive a signal from DEVICE */
148static int device_sig_cb(unsigned int subsys, unsigned int signal,
149 void *handler_data, void *signal_data)
150{
151 struct device_counters *ctr;
152 /* Delay sched around 20 ms, in case we receive several calls from several
153 * channels batched */
154 struct timespec next_sched = {.tv_sec = 0, .tv_nsec = 20*1000*1000};
155 /* no automatic re-trigger */
156 struct timespec intv_sched = {.tv_sec = 0, .tv_nsec = 0};
157
158 switch (signal) {
159 case S_DEVICE_COUNTER_CHANGE:
160 ctr = (struct device_counters *)signal_data;
161 LOGCHAN(ctr->chan, DMAIN, NOTICE) << "Received counter change from radioDevice";
162 rate_ctr_mutex.lock();
163 ctrs_pending[ctr->chan] = *ctr;
164 if (osmo_timerfd_schedule(&rate_ctr_timerfd, &next_sched, &intv_sched) < 0) {
165 LOGC(DMAIN, ERROR) << "Failed to schedule timerfd: " << errno << " = "<< strerror(errno);
166 }
167 rate_ctr_mutex.unlock();
168 break;
169 default:
170 break;
171 }
172 return 0;
173}
174
Pau Espin Pedrol6a305fe2019-05-24 19:58:20 +0200175/************************************
176 * ctr_threshold APIs
177 ************************************/
178static const char* ctr_threshold_2_vty_str(struct ctr_threshold *ctr)
179{
180 static char buf[256];
181 int rc = 0;
182 rc += snprintf(buf, sizeof(buf), "ctr-error-threshold %s", get_value_string(trx_chan_ctr_names, ctr->ctr_id));
183 rc += snprintf(buf + rc, sizeof(buf) - rc, " %d %s", ctr->val, get_value_string(rate_ctr_intv, ctr->intv));
184 return buf;
185}
186
187static void threshold_timer_cb(void *data)
188{
189 struct ctr_threshold *ctr_thr;
190 struct rate_ctr *rate_ctr;
191 size_t chan;
192 LOGC(DMAIN, DEBUG) << "threshold_timer_cb fired!";
193
194 llist_for_each_entry(ctr_thr, &threshold_list, list) {
195 for (chan = 0; chan < chan_len; chan++) {
196 rate_ctr = &rate_ctrs[chan]->ctr[ctr_thr->ctr_id];
197 LOGCHAN(chan, DMAIN, INFO) << "checking threshold: " << ctr_threshold_2_vty_str(ctr_thr)
198 << " ("<< rate_ctr->intv[ctr_thr->intv].rate << " vs " << ctr_thr->val << ")";
199 if (rate_ctr->intv[ctr_thr->intv].rate >= ctr_thr->val) {
200 LOGCHAN(chan, DMAIN, FATAL) << "threshold reached, stopping! " << ctr_threshold_2_vty_str(ctr_thr)
201 << " ("<< rate_ctr->intv[ctr_thr->intv].rate << " vs " << ctr_thr->val << ")";
202 osmo_signal_dispatch(SS_MAIN, S_MAIN_STOP_REQUIRED, NULL);
203 return;
204 }
205 }
206 }
207 osmo_timer_schedule(&threshold_timer, threshold_timer_sched_secs, 0);
208}
209
210static size_t ctr_threshold_2_seconds(struct ctr_threshold *ctr)
211{
212 size_t mult = 0;
213 switch (ctr->intv) {
214 case RATE_CTR_INTV_SEC:
215 mult = 1;
216 break;
217 case RATE_CTR_INTV_MIN:
218 mult = 60;
219 break;
220 case RATE_CTR_INTV_HOUR:
221 mult = 60*60;
222 break;
223 case RATE_CTR_INTV_DAY:
224 mult = 60*60*24;
225 break;
226 default:
227 OSMO_ASSERT(false);
228 }
229 return mult;
230}
231
232static void threshold_timer_update_intv() {
233 struct ctr_threshold *ctr, *min_ctr;
234 size_t secs, min_secs;
235
236 /* Avoid scheduling timer until itself and other structures are prepared
237 by trx_rate_ctr_init */
238 if (!threshold_initied)
239 return;
240
241 if (llist_empty(&threshold_list)) {
242 if (osmo_timer_pending(&threshold_timer))
243 osmo_timer_del(&threshold_timer);
244 return;
245 }
246
247 min_ctr = llist_first_entry(&threshold_list, struct ctr_threshold, list);
248 min_secs = ctr_threshold_2_seconds(min_ctr);
249
250 llist_for_each_entry(ctr, &threshold_list, list) {
251 secs = ctr_threshold_2_seconds(ctr);
252 if( min_secs > secs)
253 min_secs = secs;
254 }
255
256
257 threshold_timer_sched_secs = OSMO_MAX(min_secs / 2 - 1, 1);
258 LOGC(DMAIN, INFO) << "New ctr-error-threshold check interval: "
259 << threshold_timer_sched_secs << " seconds";
260 osmo_timer_schedule(&threshold_timer, threshold_timer_sched_secs, 0);
261}
262
263/* Init rate_ctr subsystem. Expected to be called during process start by main thread before VTY is ready */
Pau Espin Pedrol4456b6f2019-05-24 16:54:19 +0200264void trx_rate_ctr_init(void *ctx, struct trx_ctx* trx_ctx)
265{
266 size_t i;
Pau Espin Pedrol6a305fe2019-05-24 19:58:20 +0200267 trx_rate_ctr_ctx = ctx;
Pau Espin Pedrol4456b6f2019-05-24 16:54:19 +0200268 chan_len = trx_ctx->cfg.num_chans;
269 ctrs_pending = (struct device_counters*) talloc_zero_size(ctx, chan_len * sizeof(struct device_counters));
270 rate_ctrs = (struct rate_ctr_group**) talloc_zero_size(ctx, chan_len * sizeof(struct rate_ctr_group*));
271
272 for (i = 0; i < chan_len; i++) {
273 ctrs_pending[i].chan = PENDING_CHAN_NONE;
274 rate_ctrs[i] = rate_ctr_group_alloc(ctx, &trx_chan_ctr_group_desc, i);
275 if (!rate_ctrs[i]) {
276 LOGCHAN(i, DMAIN, ERROR) << "Failed to allocate rate ctr";
277 exit(1);
278 }
279 }
280 rate_ctr_timerfd.fd = -1;
281 if (osmo_timerfd_setup(&rate_ctr_timerfd, rate_ctr_timerfd_cb, NULL) < 0) {
282 LOGC(DMAIN, ERROR) << "Failed to setup timerfd";
283 exit(1);
284 }
285 osmo_signal_register_handler(SS_DEVICE, device_sig_cb, NULL);
Pau Espin Pedrol6a305fe2019-05-24 19:58:20 +0200286
287 /* Now set up threshold checks */
288 threshold_initied = true;
289 osmo_timer_setup(&threshold_timer, threshold_timer_cb, NULL);
290 threshold_timer_update_intv();
291}
292
293void trx_rate_ctr_threshold_add(struct ctr_threshold *ctr)
294{
295 struct ctr_threshold *new_ctr;
296
297 new_ctr = talloc_zero(trx_rate_ctr_ctx, struct ctr_threshold);
298 *new_ctr = *ctr;
299 LOGC(DMAIN, NOTICE) << "Adding new threshold check: " << ctr_threshold_2_vty_str(new_ctr);
300 llist_add(&new_ctr->list, &threshold_list);
301 threshold_timer_update_intv();
302}
303
304int trx_rate_ctr_threshold_del(struct ctr_threshold *del_ctr)
305{
306 struct ctr_threshold *ctr;
307
308 llist_for_each_entry(ctr, &threshold_list, list) {
309 if (ctr->intv != del_ctr->intv ||
310 ctr->ctr_id != del_ctr->ctr_id ||
311 ctr->val != del_ctr->val)
312 continue;
313
314 LOGC(DMAIN, NOTICE) << "Deleting threshold check: " << ctr_threshold_2_vty_str(del_ctr);
315 llist_del(&ctr->list);
316 talloc_free(ctr);
317 threshold_timer_update_intv();
318 return 0;
319 }
320 return -1;
321}
322
323void trx_rate_ctr_threshold_write_config(struct vty *vty, char *indent_prefix)
324{
325 struct ctr_threshold *ctr;
326
327 llist_for_each_entry(ctr, &threshold_list, list) {
328 vty_out(vty, "%s%s%s", indent_prefix, ctr_threshold_2_vty_str(ctr), VTY_NEWLINE);
329 }
Pau Espin Pedrol4456b6f2019-05-24 16:54:19 +0200330}