Fix use-after-free by tun thread after tun obj destroyed

The main thread calls pthread_cancel before freeing the tun object.
However, pthread_cancel doesn't kill the thread synchronously (man
pthread_cancel). Hence, the tun thread may still be running for a while
after the tun object is/has been(ing) freed.
Let's avoid this by making sure the thread is stopped before
freeing the object.
To accomplish it, we must wait for the thread to be cancelled. A cleanup
routie is added which will signal the "tun_released" message to the main
thread through an osmo_itq, which will then free the object (since
talloc context is managed by the main thread).

Related: SYS#5523
Change-Id: Idf005359afb41d3413b09281a9ff937d5eafcc7c
diff --git a/daemon/daemon_vty.c b/daemon/daemon_vty.c
index 3c44ed1..cfbe421 100644
--- a/daemon/daemon_vty.c
+++ b/daemon/daemon_vty.c
@@ -99,7 +99,7 @@
 		vty_out(vty, "Cannot destrory non-existant TUN%s", VTY_NEWLINE);
 		return CMD_WARNING;
 	}
-	_tun_device_deref_destroy(tun);
+	_tun_device_deref_release(tun);
 	pthread_rwlock_unlock(&g_daemon->rwlock);
 
 	return CMD_SUCCESS;
diff --git a/daemon/internal.h b/daemon/internal.h
index 09ba52e..fc5708d 100644
--- a/daemon/internal.h
+++ b/daemon/internal.h
@@ -7,6 +7,7 @@
 #include <sys/socket.h>
 #include <osmocom/core/linuxlist.h>
 #include <osmocom/core/write_queue.h>
+#include <osmocom/core/it_q.h>
 #include <osmocom/core/utils.h>
 
 struct nl_sock;
@@ -84,6 +85,13 @@
 /***********************************************************************
  * TUN Device
  ***********************************************************************/
+/* Message sent tun thread -> main thread through osmo_itq */
+struct gtp_daemon_itq_msg {
+	struct llist_head list;
+	struct {
+		struct tun_device *tun;
+	} tun_released; /* tun became stopped and can be freed */
+};
 
 struct tun_device {
 	/* entry in global list */
@@ -110,6 +118,9 @@
 
 	/* the thread handling Rx from the tun fd */
 	pthread_t thread;
+
+	/* Used to store messages to be sent to main thread, since tun thread doesn't allocate through talloc */
+	struct gtp_daemon_itq_msg itq_msg;
 };
 
 struct tun_device *
@@ -121,9 +132,10 @@
 struct tun_device *
 _tun_device_find(struct gtp_daemon *d, const char *devname);
 
-void _tun_device_deref_destroy(struct tun_device *tun);
+void _tun_device_destroy(struct tun_device *tun);
 
 bool _tun_device_release(struct tun_device *tun);
+void _tun_device_deref_release(struct tun_device *tun);
 
 bool tun_device_release(struct tun_device *tun);
 
@@ -222,6 +234,12 @@
 	struct osmo_stream_srv_link *cups_link;
 	struct osmo_signalfd *signalfd;
 
+	/* inter-thread queue between main thread and workers, pass struct gtp_daemon_itq_msg: */
+	struct osmo_it_q *itq;
+
+	/* Number of tunnels in progrress of being released: */
+	unsigned int reset_all_state_tun_remaining;
+
 	struct {
 		char *cups_local_ip;
 		uint16_t cups_local_port;
diff --git a/daemon/main.c b/daemon/main.c
index 76aeab5..014e28a 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -58,6 +58,7 @@
 	/* client socket */
 	struct osmo_stream_srv *srv;
 	char sockname[OSMO_SOCK_NAME_MAXLEN];
+	bool reset_all_state_res_pending;
 };
 
 struct subprocess {
@@ -493,8 +494,12 @@
 		subprocess_destroy(p, SIGKILL);
 	}
 
-	jres = gen_uecups_result("reset_all_state_res", "OK");
-	cups_client_tx_json(cc, jres);
+	if (d->reset_all_state_tun_remaining == 0) {
+		jres = gen_uecups_result("reset_all_state_res", "OK");
+		cups_client_tx_json(cc, jres);
+	} else {
+		cc->reset_all_state_res_pending = true;
+	}
 
 	return 0;
 }
@@ -669,6 +674,31 @@
 	}
 }
 
+static void gtp_daemon_itq_read_cb(struct osmo_it_q *q, struct llist_head *item)
+{
+	struct gtp_daemon *d = (struct gtp_daemon *)q->data;
+	struct gtp_daemon_itq_msg *itq_msg = container_of(item, struct gtp_daemon_itq_msg, list);
+
+	LOGP(DTUN, LOGL_DEBUG, "Rx new itq message from %s\n",
+		 itq_msg->tun_released.tun->devname);
+
+	_tun_device_destroy(itq_msg->tun_released.tun);
+	if (d->reset_all_state_tun_remaining > 0) {
+		d->reset_all_state_tun_remaining--;
+		if (d->reset_all_state_tun_remaining == 0) {
+			struct cups_client *cc;
+			llist_for_each_entry(cc, &d->cups_clients, list) {
+				json_t *jres;
+				if (!cc->reset_all_state_res_pending)
+					continue;
+				cc->reset_all_state_res_pending = false;
+				jres = gen_uecups_result("reset_all_state_res", "OK");
+				cups_client_tx_json(cc, jres);
+			}
+		}
+	}
+}
+
 static struct gtp_daemon *gtp_daemon_alloc(void *ctx)
 {
 	struct gtp_daemon *d = talloc_zero(ctx, struct gtp_daemon);
@@ -682,6 +712,9 @@
 	pthread_rwlock_init(&d->rwlock, NULL);
 	d->main_thread = pthread_self();
 
+	d->itq = osmo_it_q_alloc(d, "itq", 4096, gtp_daemon_itq_read_cb, d);
+	osmo_fd_register(&d->itq->event_ofd);
+
 	INIT_LLIST_HEAD(&d->cups_clients);
 
 	d->cfg.cups_local_ip = talloc_strdup(d, "localhost");
diff --git a/daemon/tun_device.c b/daemon/tun_device.c
index 7d1948f..5993856 100644
--- a/daemon/tun_device.c
+++ b/daemon/tun_device.c
@@ -120,6 +120,14 @@
 	return 0;
 }
 
+static void tun_device_pthread_cleanup_routine(void *data)
+{
+	struct tun_device *tun = data;
+	LOGTUN(tun, LOGL_DEBUG, "pthread_cleanup\n");
+	int rc = osmo_it_q_enqueue(tun->d->itq, &tun->itq_msg, list);
+	OSMO_ASSERT(rc == 0);
+}
+
 /* one thread for reading from each TUN device (TUN -> GTP encapsulation) */
 static void *tun_device_thread(void *arg)
 {
@@ -136,6 +144,8 @@
 	gtph->flags = 0x30;
 	gtph->type = GTP_TPDU;
 
+	pthread_cleanup_push(tun_device_pthread_cleanup_routine, tun);
+
 	while (1) {
 		struct gtp_tunnel *t;
 		struct pkt_info pinfo;
@@ -187,6 +197,7 @@
 			exit(1);
 		}
 	}
+	pthread_cleanup_pop(1);
 }
 
 static int tun_open(int flags, const char *name)
@@ -376,24 +387,24 @@
 	return tun;
 }
 
-/* UNLOCKED hard/forced destroy; caller must make sure references are cleaned up */
-static void _tun_device_destroy(struct tun_device *tun)
+/* UNLOCKED hard/forced destroy; caller must make sure references are cleaned
+ * up, and tun thread is stopped beforehand by calling
+ * _tun_device_{deref_}release */
+void _tun_device_destroy(struct tun_device *tun)
 {
 	/* talloc is not thread safe, all alloc/free must come from main thread */
 	ASSERT_MAIN_THREAD(tun->d);
+	LOGTUN(tun, LOGL_INFO, "Destroying\n");
 
-	pthread_cancel(tun->thread);
-	llist_del(&tun->list);
 	if (tun->netns_name)
 		close(tun->netns_fd);
 	close(tun->fd);
 	nl_socket_free(tun->nl);
-	LOGTUN(tun, LOGL_INFO, "Destroying\n");
 	talloc_free(tun);
 }
 
-/* UNLOCKED remove all objects referencing this tun and then destroy */
-void _tun_device_deref_destroy(struct tun_device *tun)
+/* UNLOCKED remove all objects referencing this tun and then start async tun release procedure */
+void _tun_device_deref_release(struct tun_device *tun)
 {
 	struct gtp_daemon *d = tun->d;
 	char *devname = talloc_strdup(d, tun->devname);
@@ -412,12 +423,12 @@
 	 * check if the tun can still be found in the list */
 	tun2 = _tun_device_find(d, devname);
 	if (tun2 && tun2 == tun)
-		_tun_device_destroy(tun2);
+		_tun_device_release(tun2);
 
 	talloc_free(devname);
 }
 
-/* UNLOCKED release a reference; destroy if refcount drops to 0 */
+/* UNLOCKED release a reference; start async tun release procedure if refcount drops to 0 */
 bool _tun_device_release(struct tun_device *tun)
 {
 	bool released = false;
@@ -427,10 +438,17 @@
 
 	tun->use_count--;
 	if (tun->use_count == 0) {
-		_tun_device_destroy(tun);
+		LOGTUN(tun, LOGL_INFO, "Releasing\n");
+		llist_del(&tun->list);
+		tun->itq_msg.tun_released.tun = tun;
+		tun->d->reset_all_state_tun_remaining++;
+		/* We cancel the thread: the pthread_cleanup routing will send a message
+		 * back to us (main thread) when finally cancelled. */
+		pthread_cancel(tun->thread);
 		released = true;
-	} else
+	} else {
 		LOGTUN(tun, LOGL_DEBUG, "Release; new use_count=%lu\n", tun->use_count);
+	}
 
 	return released;
 }