Pau Espin Pedrol | 4456b6f | 2019-05-24 16:54:19 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2019 sysmocom - s.f.m.c. GmbH |
| 3 | * All Rights Reserved |
| 4 | * |
Pau Espin Pedrol | e9ce77b | 2019-06-25 12:29:01 +0200 | [diff] [blame] | 5 | * SPDX-License-Identifier: AGPL-3.0+ |
| 6 | * |
Pau Espin Pedrol | 4456b6f | 2019-05-24 16:54:19 +0200 | [diff] [blame] | 7 | * Author: Pau Espin Pedrol <pespin@sysmocom.de> |
| 8 | * |
Pau Espin Pedrol | e9ce77b | 2019-06-25 12:29:01 +0200 | [diff] [blame] | 9 | * This program is free software: you can redistribute it and/or modify |
| 10 | * it under the terms of the GNU Affero General Public License as published by |
| 11 | * the Free Software Foundation, either version 3 of the License, or |
Pau Espin Pedrol | 4456b6f | 2019-05-24 16:54:19 +0200 | [diff] [blame] | 12 | * (at your option) any later version. |
| 13 | * |
| 14 | * This program is distributed in the hope that it will be useful, |
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
Pau Espin Pedrol | e9ce77b | 2019-06-25 12:29:01 +0200 | [diff] [blame] | 17 | * GNU Affero General Public License for more details. |
Pau Espin Pedrol | 4456b6f | 2019-05-24 16:54:19 +0200 | [diff] [blame] | 18 | * |
Pau Espin Pedrol | e9ce77b | 2019-06-25 12:29:01 +0200 | [diff] [blame] | 19 | * You should have received a copy of the GNU Affero General Public License |
Pau Espin Pedrol | 4456b6f | 2019-05-24 16:54:19 +0200 | [diff] [blame] | 20 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
Pau Espin Pedrol | e9ce77b | 2019-06-25 12:29:01 +0200 | [diff] [blame] | 21 | * See the COPYING file in the main directory for details. |
Pau Espin Pedrol | 4456b6f | 2019-05-24 16:54:19 +0200 | [diff] [blame] | 22 | */ |
| 23 | |
| 24 | /* |
| 25 | * rate_ctr API uses several osmocom select loop features, and as a result, |
| 26 | * calls to it must be done through the main thread (the one running the osmocom |
| 27 | * loop in osmo-trx). |
| 28 | * Since read/write from/to SDR is done in separate threads (even read and write |
| 29 | * each use a different thread), we must use some sort of message passing system |
| 30 | * between main thread feeding rate_ctr structures and the Rx/Tx threads |
| 31 | * generating the events. |
| 32 | * The idea is that upon read/write issues, lower layers (SDR APIs) provide us with |
| 33 | * underrun/overrun/droppedPackets information, and in that case we pass that up |
| 34 | * the stack through signal <SS_DEVICE,S_DEVICE_COUNTER_CHANGE> with signal_cb |
| 35 | * being a pointer to a "struct device_counters" structure, which contains |
| 36 | * device (implementation agnostic) statful counters for different kind of |
| 37 | * statistics. |
| 38 | * That signal is processed here in device_sig_cb, where a copy of the "struct |
| 39 | * device_counters" structure is held and the main thread is instructed through |
| 40 | * a timerfd to update rate_ctr APIs against this copy. All this is done inside |
Martin Hauke | 066fd04 | 2019-10-13 19:08:00 +0200 | [diff] [blame] | 41 | * a mutex to avoid different race conditions (between Rx andTx threads, and |
Pau Espin Pedrol | 4456b6f | 2019-05-24 16:54:19 +0200 | [diff] [blame] | 42 | * between Rx/Tx and main thread). For the same reason, callers of signal |
| 43 | * <SS_DEVICE,S_DEVICE_COUNTER_CHANGE> (device_sig_cb), that is Rx/Tx threads, |
| 44 | * must do so with PTHREAD_CANCEL_DISABLE, in order to avoid possible deadlocks |
| 45 | * in case the main thread decides to cancel other threads due to a shutdown |
| 46 | * operation (fi SIGKILL received) |
| 47 | */ |
| 48 | |
| 49 | #include <string.h> |
| 50 | #include <stdint.h> |
| 51 | #include <inttypes.h> |
| 52 | #include <netinet/in.h> |
| 53 | #include <arpa/inet.h> |
| 54 | |
| 55 | extern "C" { |
| 56 | #include <osmocom/core/talloc.h> |
| 57 | #include <osmocom/core/utils.h> |
| 58 | #include <osmocom/core/rate_ctr.h> |
| 59 | #include <osmocom/core/select.h> |
| 60 | #include <osmocom/core/stats.h> |
Pau Espin Pedrol | 6a305fe | 2019-05-24 19:58:20 +0200 | [diff] [blame] | 61 | #include <osmocom/core/timer.h> |
Pau Espin Pedrol | 4456b6f | 2019-05-24 16:54:19 +0200 | [diff] [blame] | 62 | |
| 63 | #include "osmo_signal.h" |
| 64 | #include "trx_vty.h" |
| 65 | #include "trx_rate_ctr.h" |
| 66 | } |
| 67 | #include "Threads.h" |
| 68 | #include "Logger.h" |
| 69 | |
| 70 | /* Used in ctrs_pending, when set it means that channel slot contains unused |
| 71 | (non-pending) counter data */ |
| 72 | #define PENDING_CHAN_NONE SIZE_MAX |
| 73 | |
Pau Espin Pedrol | 6a305fe | 2019-05-24 19:58:20 +0200 | [diff] [blame] | 74 | static void *trx_rate_ctr_ctx; |
| 75 | |
Pau Espin Pedrol | 4456b6f | 2019-05-24 16:54:19 +0200 | [diff] [blame] | 76 | static struct rate_ctr_group** rate_ctrs; |
| 77 | static struct device_counters* ctrs_pending; |
| 78 | static size_t chan_len; |
| 79 | static struct osmo_fd rate_ctr_timerfd; |
| 80 | static Mutex rate_ctr_mutex; |
| 81 | |
Pau Espin Pedrol | 6a305fe | 2019-05-24 19:58:20 +0200 | [diff] [blame] | 82 | struct osmo_timer_list threshold_timer; |
| 83 | static LLIST_HEAD(threshold_list); |
| 84 | static int threshold_timer_sched_secs; |
| 85 | static bool threshold_initied; |
| 86 | |
| 87 | const struct value_string rate_ctr_intv[] = { |
| 88 | { RATE_CTR_INTV_SEC, "per-second" }, |
| 89 | { RATE_CTR_INTV_MIN, "per-minute" }, |
| 90 | { RATE_CTR_INTV_HOUR, "per-hour" }, |
| 91 | { RATE_CTR_INTV_DAY, "per-day" }, |
| 92 | { 0, NULL } |
| 93 | }; |
| 94 | |
| 95 | const struct value_string trx_chan_ctr_names[] = { |
Pau Espin Pedrol | 6a305fe | 2019-05-24 19:58:20 +0200 | [diff] [blame] | 96 | { TRX_CTR_RX_OVERRUNS, "rx_overruns" }, |
| 97 | { TRX_CTR_TX_UNDERRUNS, "tx_underruns" }, |
| 98 | { TRX_CTR_RX_DROP_EV, "rx_drop_events" }, |
| 99 | { TRX_CTR_RX_DROP_SMPL, "rx_drop_samples" }, |
Pau Espin Pedrol | 68a7809 | 2019-07-29 20:11:25 +0200 | [diff] [blame] | 100 | { TRX_CTR_TX_DROP_EV, "tx_drop_events" }, |
| 101 | { TRX_CTR_TX_DROP_SMPL, "tx_drop_samples" }, |
Pau Espin Pedrol | 6a305fe | 2019-05-24 19:58:20 +0200 | [diff] [blame] | 102 | { 0, NULL } |
Pau Espin Pedrol | 4456b6f | 2019-05-24 16:54:19 +0200 | [diff] [blame] | 103 | }; |
| 104 | |
| 105 | static const struct rate_ctr_desc trx_chan_ctr_desc[] = { |
Pau Espin Pedrol | 68a7809 | 2019-07-29 20:11:25 +0200 | [diff] [blame] | 106 | [TRX_CTR_RX_OVERRUNS] = { "device:rx_overruns", "Number of Rx overruns in FIFO queue" }, |
| 107 | [TRX_CTR_TX_UNDERRUNS] = { "device:tx_underruns", "Number of Tx underruns in FIFO queue" }, |
Pau Espin Pedrol | 4456b6f | 2019-05-24 16:54:19 +0200 | [diff] [blame] | 108 | [TRX_CTR_RX_DROP_EV] = { "device:rx_drop_events", "Number of times Rx samples were dropped by HW" }, |
| 109 | [TRX_CTR_RX_DROP_SMPL] = { "device:rx_drop_samples", "Number of Rx samples dropped by HW" }, |
Pau Espin Pedrol | 68a7809 | 2019-07-29 20:11:25 +0200 | [diff] [blame] | 110 | [TRX_CTR_TX_DROP_EV] = { "device:tx_drop_events", "Number of times Tx samples were dropped by HW" }, |
| 111 | [TRX_CTR_TX_DROP_SMPL] = { "device:tx_drop_samples", "Number of Tx samples dropped by HW" } |
Pau Espin Pedrol | 4456b6f | 2019-05-24 16:54:19 +0200 | [diff] [blame] | 112 | }; |
| 113 | |
| 114 | static const struct rate_ctr_group_desc trx_chan_ctr_group_desc = { |
| 115 | .group_name_prefix = "trx:chan", |
| 116 | .group_description = "osmo-trx statistics", |
| 117 | .class_id = OSMO_STATS_CLASS_GLOBAL, |
| 118 | .num_ctr = ARRAY_SIZE(trx_chan_ctr_desc), |
| 119 | .ctr_desc = trx_chan_ctr_desc, |
| 120 | }; |
| 121 | |
| 122 | static int rate_ctr_timerfd_cb(struct osmo_fd *ofd, unsigned int what) { |
| 123 | size_t chan; |
| 124 | struct rate_ctr *ctr; |
| 125 | LOGC(DMAIN, NOTICE) << "Main thread is updating counters"; |
| 126 | rate_ctr_mutex.lock(); |
| 127 | for (chan = 0; chan < chan_len; chan++) { |
| 128 | if (ctrs_pending[chan].chan == PENDING_CHAN_NONE) |
| 129 | continue; |
| 130 | LOGCHAN(chan, DMAIN, INFO) << "rate_ctr update"; |
Pau Espin Pedrol | 4456b6f | 2019-05-24 16:54:19 +0200 | [diff] [blame] | 131 | ctr = &rate_ctrs[chan]->ctr[TRX_CTR_RX_OVERRUNS]; |
| 132 | rate_ctr_add(ctr, ctrs_pending[chan].rx_overruns - ctr->current); |
| 133 | ctr = &rate_ctrs[chan]->ctr[TRX_CTR_TX_UNDERRUNS]; |
| 134 | rate_ctr_add(ctr, ctrs_pending[chan].tx_underruns - ctr->current); |
| 135 | ctr = &rate_ctrs[chan]->ctr[TRX_CTR_RX_DROP_EV]; |
| 136 | rate_ctr_add(ctr, ctrs_pending[chan].rx_dropped_events - ctr->current); |
| 137 | ctr = &rate_ctrs[chan]->ctr[TRX_CTR_RX_DROP_SMPL]; |
| 138 | rate_ctr_add(ctr, ctrs_pending[chan].rx_dropped_samples - ctr->current); |
Pau Espin Pedrol | 68a7809 | 2019-07-29 20:11:25 +0200 | [diff] [blame] | 139 | ctr = &rate_ctrs[chan]->ctr[TRX_CTR_TX_DROP_EV]; |
| 140 | rate_ctr_add(ctr, ctrs_pending[chan].tx_dropped_events - ctr->current); |
| 141 | ctr = &rate_ctrs[chan]->ctr[TRX_CTR_TX_DROP_SMPL]; |
| 142 | rate_ctr_add(ctr, ctrs_pending[chan].tx_dropped_samples - ctr->current); |
Pau Espin Pedrol | 4456b6f | 2019-05-24 16:54:19 +0200 | [diff] [blame] | 143 | |
| 144 | /* Mark as done */ |
| 145 | ctrs_pending[chan].chan = PENDING_CHAN_NONE; |
| 146 | } |
| 147 | if (osmo_timerfd_disable(&rate_ctr_timerfd) < 0) |
| 148 | LOGC(DMAIN, ERROR) << "Failed to disable timerfd"; |
| 149 | rate_ctr_mutex.unlock(); |
| 150 | return 0; |
| 151 | } |
| 152 | |
| 153 | /* Callback function to be called every time we receive a signal from DEVICE */ |
| 154 | static int device_sig_cb(unsigned int subsys, unsigned int signal, |
| 155 | void *handler_data, void *signal_data) |
| 156 | { |
| 157 | struct device_counters *ctr; |
| 158 | /* Delay sched around 20 ms, in case we receive several calls from several |
| 159 | * channels batched */ |
| 160 | struct timespec next_sched = {.tv_sec = 0, .tv_nsec = 20*1000*1000}; |
| 161 | /* no automatic re-trigger */ |
| 162 | struct timespec intv_sched = {.tv_sec = 0, .tv_nsec = 0}; |
| 163 | |
| 164 | switch (signal) { |
| 165 | case S_DEVICE_COUNTER_CHANGE: |
| 166 | ctr = (struct device_counters *)signal_data; |
| 167 | LOGCHAN(ctr->chan, DMAIN, NOTICE) << "Received counter change from radioDevice"; |
| 168 | rate_ctr_mutex.lock(); |
| 169 | ctrs_pending[ctr->chan] = *ctr; |
| 170 | if (osmo_timerfd_schedule(&rate_ctr_timerfd, &next_sched, &intv_sched) < 0) { |
| 171 | LOGC(DMAIN, ERROR) << "Failed to schedule timerfd: " << errno << " = "<< strerror(errno); |
| 172 | } |
| 173 | rate_ctr_mutex.unlock(); |
| 174 | break; |
| 175 | default: |
| 176 | break; |
| 177 | } |
| 178 | return 0; |
| 179 | } |
| 180 | |
Pau Espin Pedrol | 6a305fe | 2019-05-24 19:58:20 +0200 | [diff] [blame] | 181 | /************************************ |
| 182 | * ctr_threshold APIs |
| 183 | ************************************/ |
| 184 | static const char* ctr_threshold_2_vty_str(struct ctr_threshold *ctr) |
| 185 | { |
| 186 | static char buf[256]; |
| 187 | int rc = 0; |
| 188 | rc += snprintf(buf, sizeof(buf), "ctr-error-threshold %s", get_value_string(trx_chan_ctr_names, ctr->ctr_id)); |
| 189 | rc += snprintf(buf + rc, sizeof(buf) - rc, " %d %s", ctr->val, get_value_string(rate_ctr_intv, ctr->intv)); |
| 190 | return buf; |
| 191 | } |
| 192 | |
| 193 | static void threshold_timer_cb(void *data) |
| 194 | { |
| 195 | struct ctr_threshold *ctr_thr; |
| 196 | struct rate_ctr *rate_ctr; |
| 197 | size_t chan; |
| 198 | LOGC(DMAIN, DEBUG) << "threshold_timer_cb fired!"; |
| 199 | |
| 200 | llist_for_each_entry(ctr_thr, &threshold_list, list) { |
| 201 | for (chan = 0; chan < chan_len; chan++) { |
| 202 | rate_ctr = &rate_ctrs[chan]->ctr[ctr_thr->ctr_id]; |
| 203 | LOGCHAN(chan, DMAIN, INFO) << "checking threshold: " << ctr_threshold_2_vty_str(ctr_thr) |
| 204 | << " ("<< rate_ctr->intv[ctr_thr->intv].rate << " vs " << ctr_thr->val << ")"; |
| 205 | if (rate_ctr->intv[ctr_thr->intv].rate >= ctr_thr->val) { |
| 206 | LOGCHAN(chan, DMAIN, FATAL) << "threshold reached, stopping! " << ctr_threshold_2_vty_str(ctr_thr) |
| 207 | << " ("<< rate_ctr->intv[ctr_thr->intv].rate << " vs " << ctr_thr->val << ")"; |
| 208 | osmo_signal_dispatch(SS_MAIN, S_MAIN_STOP_REQUIRED, NULL); |
| 209 | return; |
| 210 | } |
| 211 | } |
| 212 | } |
| 213 | osmo_timer_schedule(&threshold_timer, threshold_timer_sched_secs, 0); |
| 214 | } |
| 215 | |
| 216 | static size_t ctr_threshold_2_seconds(struct ctr_threshold *ctr) |
| 217 | { |
| 218 | size_t mult = 0; |
| 219 | switch (ctr->intv) { |
| 220 | case RATE_CTR_INTV_SEC: |
| 221 | mult = 1; |
| 222 | break; |
| 223 | case RATE_CTR_INTV_MIN: |
| 224 | mult = 60; |
| 225 | break; |
| 226 | case RATE_CTR_INTV_HOUR: |
| 227 | mult = 60*60; |
| 228 | break; |
| 229 | case RATE_CTR_INTV_DAY: |
| 230 | mult = 60*60*24; |
| 231 | break; |
| 232 | default: |
| 233 | OSMO_ASSERT(false); |
| 234 | } |
| 235 | return mult; |
| 236 | } |
| 237 | |
| 238 | static void threshold_timer_update_intv() { |
| 239 | struct ctr_threshold *ctr, *min_ctr; |
| 240 | size_t secs, min_secs; |
| 241 | |
| 242 | /* Avoid scheduling timer until itself and other structures are prepared |
| 243 | by trx_rate_ctr_init */ |
| 244 | if (!threshold_initied) |
| 245 | return; |
| 246 | |
| 247 | if (llist_empty(&threshold_list)) { |
| 248 | if (osmo_timer_pending(&threshold_timer)) |
| 249 | osmo_timer_del(&threshold_timer); |
| 250 | return; |
| 251 | } |
| 252 | |
| 253 | min_ctr = llist_first_entry(&threshold_list, struct ctr_threshold, list); |
| 254 | min_secs = ctr_threshold_2_seconds(min_ctr); |
| 255 | |
| 256 | llist_for_each_entry(ctr, &threshold_list, list) { |
| 257 | secs = ctr_threshold_2_seconds(ctr); |
| 258 | if( min_secs > secs) |
| 259 | min_secs = secs; |
| 260 | } |
| 261 | |
| 262 | |
| 263 | threshold_timer_sched_secs = OSMO_MAX(min_secs / 2 - 1, 1); |
| 264 | LOGC(DMAIN, INFO) << "New ctr-error-threshold check interval: " |
| 265 | << threshold_timer_sched_secs << " seconds"; |
| 266 | osmo_timer_schedule(&threshold_timer, threshold_timer_sched_secs, 0); |
| 267 | } |
| 268 | |
| 269 | /* Init rate_ctr subsystem. Expected to be called during process start by main thread before VTY is ready */ |
Pau Espin Pedrol | 4456b6f | 2019-05-24 16:54:19 +0200 | [diff] [blame] | 270 | void trx_rate_ctr_init(void *ctx, struct trx_ctx* trx_ctx) |
| 271 | { |
| 272 | size_t i; |
Pau Espin Pedrol | 6a305fe | 2019-05-24 19:58:20 +0200 | [diff] [blame] | 273 | trx_rate_ctr_ctx = ctx; |
Pau Espin Pedrol | 4456b6f | 2019-05-24 16:54:19 +0200 | [diff] [blame] | 274 | chan_len = trx_ctx->cfg.num_chans; |
| 275 | ctrs_pending = (struct device_counters*) talloc_zero_size(ctx, chan_len * sizeof(struct device_counters)); |
| 276 | rate_ctrs = (struct rate_ctr_group**) talloc_zero_size(ctx, chan_len * sizeof(struct rate_ctr_group*)); |
| 277 | |
| 278 | for (i = 0; i < chan_len; i++) { |
| 279 | ctrs_pending[i].chan = PENDING_CHAN_NONE; |
| 280 | rate_ctrs[i] = rate_ctr_group_alloc(ctx, &trx_chan_ctr_group_desc, i); |
| 281 | if (!rate_ctrs[i]) { |
| 282 | LOGCHAN(i, DMAIN, ERROR) << "Failed to allocate rate ctr"; |
| 283 | exit(1); |
| 284 | } |
| 285 | } |
| 286 | rate_ctr_timerfd.fd = -1; |
| 287 | if (osmo_timerfd_setup(&rate_ctr_timerfd, rate_ctr_timerfd_cb, NULL) < 0) { |
| 288 | LOGC(DMAIN, ERROR) << "Failed to setup timerfd"; |
| 289 | exit(1); |
| 290 | } |
| 291 | osmo_signal_register_handler(SS_DEVICE, device_sig_cb, NULL); |
Pau Espin Pedrol | 6a305fe | 2019-05-24 19:58:20 +0200 | [diff] [blame] | 292 | |
| 293 | /* Now set up threshold checks */ |
| 294 | threshold_initied = true; |
| 295 | osmo_timer_setup(&threshold_timer, threshold_timer_cb, NULL); |
| 296 | threshold_timer_update_intv(); |
| 297 | } |
| 298 | |
| 299 | void trx_rate_ctr_threshold_add(struct ctr_threshold *ctr) |
| 300 | { |
| 301 | struct ctr_threshold *new_ctr; |
| 302 | |
| 303 | new_ctr = talloc_zero(trx_rate_ctr_ctx, struct ctr_threshold); |
| 304 | *new_ctr = *ctr; |
| 305 | LOGC(DMAIN, NOTICE) << "Adding new threshold check: " << ctr_threshold_2_vty_str(new_ctr); |
| 306 | llist_add(&new_ctr->list, &threshold_list); |
| 307 | threshold_timer_update_intv(); |
| 308 | } |
| 309 | |
| 310 | int trx_rate_ctr_threshold_del(struct ctr_threshold *del_ctr) |
| 311 | { |
| 312 | struct ctr_threshold *ctr; |
| 313 | |
| 314 | llist_for_each_entry(ctr, &threshold_list, list) { |
| 315 | if (ctr->intv != del_ctr->intv || |
| 316 | ctr->ctr_id != del_ctr->ctr_id || |
| 317 | ctr->val != del_ctr->val) |
| 318 | continue; |
| 319 | |
| 320 | LOGC(DMAIN, NOTICE) << "Deleting threshold check: " << ctr_threshold_2_vty_str(del_ctr); |
| 321 | llist_del(&ctr->list); |
| 322 | talloc_free(ctr); |
| 323 | threshold_timer_update_intv(); |
| 324 | return 0; |
| 325 | } |
| 326 | return -1; |
| 327 | } |
| 328 | |
| 329 | void trx_rate_ctr_threshold_write_config(struct vty *vty, char *indent_prefix) |
| 330 | { |
| 331 | struct ctr_threshold *ctr; |
| 332 | |
| 333 | llist_for_each_entry(ctr, &threshold_list, list) { |
| 334 | vty_out(vty, "%s%s%s", indent_prefix, ctr_threshold_2_vty_str(ctr), VTY_NEWLINE); |
| 335 | } |
Pau Espin Pedrol | 4456b6f | 2019-05-24 16:54:19 +0200 | [diff] [blame] | 336 | } |