Inactive connection cleanup (disabled by default)

Add a watchdog timer to connections, and close these connections when
the watchdog timer expires. Kick the watchdog whenever RTP messages or
the relevant MGCP messages arrive. Add the currently remaining timeout
to "show mgcp stats" in the VTY.

This feature is disabled by default, as it is incompatible with LCLS
(connections in LCLS state appear to be inactive). Enable it with the
new "conn-timeout" VTY setting. In general, this feature can be used to
work around interoperability problems causing connections to stay open
forever, and slowly exhausting all available ports. This happened for
various reasons already.

MDCX is the only relevant MGCP message:
- CRCX creates the conn and timer
- DLCX deletes the conn and timer
- MDCX is the only remaining supported MGCP message that indicates a CI
- Can't easily generically parse a CI for all MGCP messages, parsing is
  done in handle_modify_con().

Related: OS#3429
Change-Id: I18886052e090466f73829133c24f011806cc1fe0
diff --git a/src/libosmo-mgcp/mgcp_conn.c b/src/libosmo-mgcp/mgcp_conn.c
index fce8a78..a8341d6 100644
--- a/src/libosmo-mgcp/mgcp_conn.c
+++ b/src/libosmo-mgcp/mgcp_conn.c
@@ -29,6 +29,7 @@
 #include <osmocom/mgcp/mgcp_codec.h>
 #include <osmocom/gsm/gsm_utils.h>
 #include <osmocom/core/rate_ctr.h>
+#include <osmocom/core/timer.h>
 #include <ctype.h>
 
 const static struct rate_ctr_group_desc rate_ctr_group_desc = {
@@ -125,6 +126,23 @@
 	rate_ctr_group_free(conn_rtp->rate_ctr_group);
 }
 
+void mgcp_conn_watchdog_cb(void *data)
+{
+	struct mgcp_conn *conn = data;
+	LOGP(DLMGCP, LOGL_ERROR, "endpoint:0x%x CI:%s connection timed out!\n", ENDPOINT_NUMBER(conn->endp), conn->id);
+	mgcp_conn_free(conn->endp, conn->id);
+}
+
+void mgcp_conn_watchdog_kick(struct mgcp_conn *conn)
+{
+	int timeout = conn->endp->cfg->conn_timeout;
+	if (!timeout)
+		return;
+
+	LOGP(DLMGCP, LOGL_DEBUG, "endpoint:0x%x CI:%s watchdog kicked\n", ENDPOINT_NUMBER(conn->endp), conn->id);
+	osmo_timer_schedule(&conn->watchdog, timeout, 0);
+}
+
 /*! allocate a new connection list entry.
  *  \param[in] ctx talloc context
  *  \param[in] endp associated endpoint
@@ -167,6 +185,9 @@
 		OSMO_ASSERT(false);
 	}
 
+	/* Initialize watchdog */
+	osmo_timer_setup(&conn->watchdog, mgcp_conn_watchdog_cb, conn);
+	mgcp_conn_watchdog_kick(conn);
 	llist_add(&conn->entry, &endp->conns);
 
 	return conn;
@@ -274,6 +295,7 @@
 		OSMO_ASSERT(false);
 	}
 
+	osmo_timer_del(&conn->watchdog);
 	llist_del(&conn->entry);
 	talloc_free(conn);
 }
diff --git a/src/libosmo-mgcp/mgcp_network.c b/src/libosmo-mgcp/mgcp_network.c
index 7af8e71..2c86f8f 100644
--- a/src/libosmo-mgcp/mgcp_network.c
+++ b/src/libosmo-mgcp/mgcp_network.c
@@ -1246,6 +1246,8 @@
 	if (len < 0)
 		return -1;
 
+	mgcp_conn_watchdog_kick(conn_src->conn);
+
 	/* Check if the connection is in loopback mode, if yes, just send the
 	 * incoming data back to the origin */
 	if (conn_src->conn->mode == MGCP_CONN_LOOPBACK) {
diff --git a/src/libosmo-mgcp/mgcp_protocol.c b/src/libosmo-mgcp/mgcp_protocol.c
index f141485..9f95ea4 100644
--- a/src/libosmo-mgcp/mgcp_protocol.c
+++ b/src/libosmo-mgcp/mgcp_protocol.c
@@ -1141,6 +1141,8 @@
 		return create_err_response(endp, 400, "MDCX", p->trans);
 	}
 
+	mgcp_conn_watchdog_kick(conn->conn);
+
 	if (mode) {
 		if (mgcp_parse_conn_mode(mode, endp, conn->conn) != 0) {
 			rate_ctr_inc(&rate_ctrs->ctr[MGCP_MDCX_FAIL_INVALID_MODE]);
diff --git a/src/libosmo-mgcp/mgcp_vty.c b/src/libosmo-mgcp/mgcp_vty.c
index 83f845a..ef63b04 100644
--- a/src/libosmo-mgcp/mgcp_vty.c
+++ b/src/libosmo-mgcp/mgcp_vty.c
@@ -154,6 +154,10 @@
 		vty_out(vty, "  osmux dummy %s%s",
 			g_cfg->osmux_dummy ? "on" : "off", VTY_NEWLINE);
 	}
+
+	if (g_cfg->conn_timeout)
+		vty_out(vty, "  conn-timeout %u%s", g_cfg->conn_timeout, VTY_NEWLINE);
+
 	return CMD_SUCCESS;
 }
 
@@ -215,6 +219,13 @@
 		vty_out(vty, "   CONN: %s%s", mgcp_conn_dump(conn), VTY_NEWLINE);
 
 		if (show_stats) {
+			if (endp->cfg->conn_timeout) {
+				struct timeval remaining;
+				osmo_timer_remaining(&conn->watchdog, NULL, &remaining);
+				vty_out(vty, "   Currently remaining timeout (seconds): %d.%06d%s",
+					(int)remaining.tv_sec, (int)remaining.tv_usec, VTY_NEWLINE);
+			}
+
 			/* FIXME: Also add verbosity for other
 			 * connection types (E1) as soon as
 			 * the implementation is available */
@@ -1327,6 +1338,18 @@
 	return CMD_SUCCESS;
 }
 
+DEFUN(cfg_mgcp_conn_timeout,
+      cfg_mgcp_conn_timeout_cmd,
+      "conn-timeout <1-65534>",
+      "Set a time after which inactive connections (CIs) are closed. This can be used to work around interoperability"
+      " problems causing connections to stay open forever, and slowly exhausting all available ports. Do not enable"
+      " when LCLS is used (connections in LCLS state appear to be inactive)!\n"
+      "Timeout value (sec.)\n")
+{
+	g_cfg->conn_timeout = strtoul(argv[0], NULL, 10);
+	return CMD_SUCCESS;
+}
+
 int mgcp_vty_init(void)
 {
 	install_element_ve(&show_mgcp_cmd);
@@ -1391,6 +1414,7 @@
 	install_element(MGCP_NODE, &cfg_mgcp_allow_transcoding_cmd);
 	install_element(MGCP_NODE, &cfg_mgcp_no_allow_transcoding_cmd);
 	install_element(MGCP_NODE, &cfg_mgcp_domain_cmd);
+	install_element(MGCP_NODE, &cfg_mgcp_conn_timeout_cmd);
 
 	install_element(MGCP_NODE, &cfg_mgcp_trunk_cmd);
 	install_node(&trunk_node, config_write_trunk);