Fix use-after-free by tun thread after tun obj destroyed
The main thread calls pthread_cancel before freeing the tun object.
However, pthread_cancel doesn't kill the thread synchronously (man
pthread_cancel). Hence, the tun thread may still be running for a while
after the tun object is/has been(ing) freed.
Let's avoid this by making sure the thread is stopped before
freeing the object.
To accomplish it, we must wait for the thread to be cancelled. A cleanup
routie is added which will signal the "tun_released" message to the main
thread through an osmo_itq, which will then free the object (since
talloc context is managed by the main thread).
Related: SYS#5523
Change-Id: Idf005359afb41d3413b09281a9ff937d5eafcc7c
diff --git a/daemon/daemon_vty.c b/daemon/daemon_vty.c
index 3c44ed1..cfbe421 100644
--- a/daemon/daemon_vty.c
+++ b/daemon/daemon_vty.c
@@ -99,7 +99,7 @@
vty_out(vty, "Cannot destrory non-existant TUN%s", VTY_NEWLINE);
return CMD_WARNING;
}
- _tun_device_deref_destroy(tun);
+ _tun_device_deref_release(tun);
pthread_rwlock_unlock(&g_daemon->rwlock);
return CMD_SUCCESS;
diff --git a/daemon/internal.h b/daemon/internal.h
index 09ba52e..fc5708d 100644
--- a/daemon/internal.h
+++ b/daemon/internal.h
@@ -7,6 +7,7 @@
#include <sys/socket.h>
#include <osmocom/core/linuxlist.h>
#include <osmocom/core/write_queue.h>
+#include <osmocom/core/it_q.h>
#include <osmocom/core/utils.h>
struct nl_sock;
@@ -84,6 +85,13 @@
/***********************************************************************
* TUN Device
***********************************************************************/
+/* Message sent tun thread -> main thread through osmo_itq */
+struct gtp_daemon_itq_msg {
+ struct llist_head list;
+ struct {
+ struct tun_device *tun;
+ } tun_released; /* tun became stopped and can be freed */
+};
struct tun_device {
/* entry in global list */
@@ -110,6 +118,9 @@
/* the thread handling Rx from the tun fd */
pthread_t thread;
+
+ /* Used to store messages to be sent to main thread, since tun thread doesn't allocate through talloc */
+ struct gtp_daemon_itq_msg itq_msg;
};
struct tun_device *
@@ -121,9 +132,10 @@
struct tun_device *
_tun_device_find(struct gtp_daemon *d, const char *devname);
-void _tun_device_deref_destroy(struct tun_device *tun);
+void _tun_device_destroy(struct tun_device *tun);
bool _tun_device_release(struct tun_device *tun);
+void _tun_device_deref_release(struct tun_device *tun);
bool tun_device_release(struct tun_device *tun);
@@ -222,6 +234,12 @@
struct osmo_stream_srv_link *cups_link;
struct osmo_signalfd *signalfd;
+ /* inter-thread queue between main thread and workers, pass struct gtp_daemon_itq_msg: */
+ struct osmo_it_q *itq;
+
+ /* Number of tunnels in progrress of being released: */
+ unsigned int reset_all_state_tun_remaining;
+
struct {
char *cups_local_ip;
uint16_t cups_local_port;
diff --git a/daemon/main.c b/daemon/main.c
index 76aeab5..014e28a 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -58,6 +58,7 @@
/* client socket */
struct osmo_stream_srv *srv;
char sockname[OSMO_SOCK_NAME_MAXLEN];
+ bool reset_all_state_res_pending;
};
struct subprocess {
@@ -493,8 +494,12 @@
subprocess_destroy(p, SIGKILL);
}
- jres = gen_uecups_result("reset_all_state_res", "OK");
- cups_client_tx_json(cc, jres);
+ if (d->reset_all_state_tun_remaining == 0) {
+ jres = gen_uecups_result("reset_all_state_res", "OK");
+ cups_client_tx_json(cc, jres);
+ } else {
+ cc->reset_all_state_res_pending = true;
+ }
return 0;
}
@@ -669,6 +674,31 @@
}
}
+static void gtp_daemon_itq_read_cb(struct osmo_it_q *q, struct llist_head *item)
+{
+ struct gtp_daemon *d = (struct gtp_daemon *)q->data;
+ struct gtp_daemon_itq_msg *itq_msg = container_of(item, struct gtp_daemon_itq_msg, list);
+
+ LOGP(DTUN, LOGL_DEBUG, "Rx new itq message from %s\n",
+ itq_msg->tun_released.tun->devname);
+
+ _tun_device_destroy(itq_msg->tun_released.tun);
+ if (d->reset_all_state_tun_remaining > 0) {
+ d->reset_all_state_tun_remaining--;
+ if (d->reset_all_state_tun_remaining == 0) {
+ struct cups_client *cc;
+ llist_for_each_entry(cc, &d->cups_clients, list) {
+ json_t *jres;
+ if (!cc->reset_all_state_res_pending)
+ continue;
+ cc->reset_all_state_res_pending = false;
+ jres = gen_uecups_result("reset_all_state_res", "OK");
+ cups_client_tx_json(cc, jres);
+ }
+ }
+ }
+}
+
static struct gtp_daemon *gtp_daemon_alloc(void *ctx)
{
struct gtp_daemon *d = talloc_zero(ctx, struct gtp_daemon);
@@ -682,6 +712,9 @@
pthread_rwlock_init(&d->rwlock, NULL);
d->main_thread = pthread_self();
+ d->itq = osmo_it_q_alloc(d, "itq", 4096, gtp_daemon_itq_read_cb, d);
+ osmo_fd_register(&d->itq->event_ofd);
+
INIT_LLIST_HEAD(&d->cups_clients);
d->cfg.cups_local_ip = talloc_strdup(d, "localhost");
diff --git a/daemon/tun_device.c b/daemon/tun_device.c
index 7d1948f..5993856 100644
--- a/daemon/tun_device.c
+++ b/daemon/tun_device.c
@@ -120,6 +120,14 @@
return 0;
}
+static void tun_device_pthread_cleanup_routine(void *data)
+{
+ struct tun_device *tun = data;
+ LOGTUN(tun, LOGL_DEBUG, "pthread_cleanup\n");
+ int rc = osmo_it_q_enqueue(tun->d->itq, &tun->itq_msg, list);
+ OSMO_ASSERT(rc == 0);
+}
+
/* one thread for reading from each TUN device (TUN -> GTP encapsulation) */
static void *tun_device_thread(void *arg)
{
@@ -136,6 +144,8 @@
gtph->flags = 0x30;
gtph->type = GTP_TPDU;
+ pthread_cleanup_push(tun_device_pthread_cleanup_routine, tun);
+
while (1) {
struct gtp_tunnel *t;
struct pkt_info pinfo;
@@ -187,6 +197,7 @@
exit(1);
}
}
+ pthread_cleanup_pop(1);
}
static int tun_open(int flags, const char *name)
@@ -376,24 +387,24 @@
return tun;
}
-/* UNLOCKED hard/forced destroy; caller must make sure references are cleaned up */
-static void _tun_device_destroy(struct tun_device *tun)
+/* UNLOCKED hard/forced destroy; caller must make sure references are cleaned
+ * up, and tun thread is stopped beforehand by calling
+ * _tun_device_{deref_}release */
+void _tun_device_destroy(struct tun_device *tun)
{
/* talloc is not thread safe, all alloc/free must come from main thread */
ASSERT_MAIN_THREAD(tun->d);
+ LOGTUN(tun, LOGL_INFO, "Destroying\n");
- pthread_cancel(tun->thread);
- llist_del(&tun->list);
if (tun->netns_name)
close(tun->netns_fd);
close(tun->fd);
nl_socket_free(tun->nl);
- LOGTUN(tun, LOGL_INFO, "Destroying\n");
talloc_free(tun);
}
-/* UNLOCKED remove all objects referencing this tun and then destroy */
-void _tun_device_deref_destroy(struct tun_device *tun)
+/* UNLOCKED remove all objects referencing this tun and then start async tun release procedure */
+void _tun_device_deref_release(struct tun_device *tun)
{
struct gtp_daemon *d = tun->d;
char *devname = talloc_strdup(d, tun->devname);
@@ -412,12 +423,12 @@
* check if the tun can still be found in the list */
tun2 = _tun_device_find(d, devname);
if (tun2 && tun2 == tun)
- _tun_device_destroy(tun2);
+ _tun_device_release(tun2);
talloc_free(devname);
}
-/* UNLOCKED release a reference; destroy if refcount drops to 0 */
+/* UNLOCKED release a reference; start async tun release procedure if refcount drops to 0 */
bool _tun_device_release(struct tun_device *tun)
{
bool released = false;
@@ -427,10 +438,17 @@
tun->use_count--;
if (tun->use_count == 0) {
- _tun_device_destroy(tun);
+ LOGTUN(tun, LOGL_INFO, "Releasing\n");
+ llist_del(&tun->list);
+ tun->itq_msg.tun_released.tun = tun;
+ tun->d->reset_all_state_tun_remaining++;
+ /* We cancel the thread: the pthread_cleanup routing will send a message
+ * back to us (main thread) when finally cancelled. */
+ pthread_cancel(tun->thread);
released = true;
- } else
+ } else {
LOGTUN(tun, LOGL_DEBUG, "Release; new use_count=%lu\n", tun->use_count);
+ }
return released;
}