sgs_iface: detect and react to VLR/HLR failure

The HLR (which is connected via the GSUP interface) may fail and
disconnect. On the next location update the VLR will try to talk to the
HLR and fail. This failure event is not communicated towards the SGs
related code and the SGs-association will remain in the LA-PRESENT state
forever. Lets add code to report the problem to the SGs code and trigger
a RESET an the SGs interface.

- Add a flag to report an HLR problem back to the SGs code
- Fix the FSM that controls the reset
- Make sure the all SGs associations are reset when the failure occurs.

Change-Id: Icc7df92879728bc98c85fc1d5d8b4c6246501b12
Related: OS#3859
diff --git a/src/libmsc/sgs_iface.c b/src/libmsc/sgs_iface.c
index d83a730..53ab853 100644
--- a/src/libmsc/sgs_iface.c
+++ b/src/libmsc/sgs_iface.c
@@ -376,15 +376,26 @@
 	unsigned int new_id_len = 0;
 	uint8_t resp_msg_type;
 
+	/* Determine message type that is sent next (needed for logging) */
 	if (response->accepted)
 		resp_msg_type = SGSAP_MSGT_LOC_UPD_ACK;
+	else if (response->error)
+		resp_msg_type = SGSAP_MSGT_RESET_IND;
 	else
 		resp_msg_type = SGSAP_MSGT_LOC_UPD_REJ;
 
+	/* Determine MME */
 	mme = sgs_mme_ctx_by_vsub(vsub, resp_msg_type);
 	if (!mme)
 		return;
 
+	/* Handle error (HLR failure) */
+	if (response->error) {
+		osmo_fsm_inst_dispatch(mme->fi, SGS_VLRR_E_START_RESET, NULL);
+		return;
+	}
+
+	/* Handle LU accept/reject */
 	if (response->accepted) {
 		if (vsub->tmsi_new != GSM_RESERVED_TMSI) {
 			new_id_len = gsm48_generate_mid_from_tmsi(new_id, vsub->tmsi_new);
@@ -1120,6 +1131,10 @@
 		reset_params.vlr_name_present = true;
 		reset_ind = gsm29118_create_reset_ind(&reset_params);
 		sgs_tx(sgc, reset_ind);
+
+		/* Perform a reset of the SGS FSM of all subscribers that are present in the VLR */
+		vlr_sgs_reset(gsm_network->vlr);
+
 		osmo_fsm_inst_state_chg(fi, SGS_VLRR_ST_WAIT_ACK, sgs->cfg.timer[SGS_STATE_TS11], 11);
 		break;
 	default:
@@ -1187,6 +1202,7 @@
 static struct osmo_fsm sgs_vlr_reset_fsm = {
 	.name = "SGs-VLR-RESET",
 	.states = sgs_vlr_reset_fsm_states,
+	.num_states = ARRAY_SIZE(sgs_vlr_reset_fsm_states),
 	.allstate_event_mask = S(SGS_VLRR_E_START_RESET),
 	.allstate_action = sgs_vlr_reset_fsm_allstate,
 	.timer_cb = sgs_vlr_reset_fsm_timer_cb,
diff --git a/src/libvlr/vlr.c b/src/libvlr/vlr.c
index b156b43..2753096 100644
--- a/src/libvlr/vlr.c
+++ b/src/libvlr/vlr.c
@@ -854,7 +854,7 @@
 static int vlr_subscr_handle_lu_res(struct vlr_subscr *vsub,
 				    const struct osmo_gsup_message *gsup)
 {
-	struct sgs_lu_response sgs_lu_response;
+	struct sgs_lu_response sgs_lu_response = {0};
 	bool sgs_lu_in_progress = false;
 
 	if (vsub->sgs_fsm->state == SGS_UE_ST_LA_UPD_PRES)
@@ -885,7 +885,7 @@
 static int vlr_subscr_handle_lu_err(struct vlr_subscr *vsub,
 				    const struct osmo_gsup_message *gsup)
 {
-	struct sgs_lu_response sgs_lu_response;
+	struct sgs_lu_response sgs_lu_response = {0};
 	bool sgs_lu_in_progress = false;
 
 	if (vsub->sgs_fsm->state == SGS_UE_ST_LA_UPD_PRES)
diff --git a/src/libvlr/vlr_sgs_fsm.c b/src/libvlr/vlr_sgs_fsm.c
index 13639ca..49ad09a 100644
--- a/src/libvlr/vlr_sgs_fsm.c
+++ b/src/libvlr/vlr_sgs_fsm.c
@@ -48,24 +48,6 @@
 	{0, NULL}
 };
 
-/* Initiate location update and change to SGS_UE_ST_LA_UPD_PRES state */
-static void perform_lu(struct osmo_fsm_inst *fi)
-{
-	struct vlr_subscr *vsub = fi->priv;
-	int rc;
-	osmo_fsm_inst_state_chg(fi, SGS_UE_ST_LA_UPD_PRES, 0, 0);
-	vsub->ms_not_reachable_flag = false;
-
-	/* Note: At the moment we allocate a new TMSI on each LU. */
-	rc = vlr_subscr_alloc_tmsi(vsub);
-	if (rc != 0)
-		LOGPFSML(fi, LOGL_ERROR, "(sub %s) VLR LU tmsi allocation failed\n", vlr_subscr_name(vsub));
-
-	rc = vlr_subscr_req_lu(vsub);
-	if (rc != 0)
-		LOGPFSML(fi, LOGL_ERROR, "(sub %s) HLR LU request failed\n", vlr_subscr_name(vsub));
-}
-
 /* Send the SGs Association to NULL state immediately */
 static void to_null(struct osmo_fsm_inst *fi)
 {
@@ -86,6 +68,37 @@
 		osmo_timer_del(&vsub->sgs.Ts5);
 }
 
+/* Initiate location update and change to SGS_UE_ST_LA_UPD_PRES state */
+static void perform_lu(struct osmo_fsm_inst *fi)
+{
+	struct vlr_subscr *vsub = fi->priv;
+	struct sgs_lu_response sgs_lu_response = {0};
+	int rc;
+
+	/* Note: At the moment we allocate a new TMSI on each LU. */
+	rc = vlr_subscr_alloc_tmsi(vsub);
+	if (rc != 0) {
+		LOGPFSML(fi, LOGL_ERROR, "(sub %s) VLR LU tmsi allocation failed\n", vlr_subscr_name(vsub));
+		goto error;
+	}
+
+	rc = vlr_subscr_req_lu(vsub);
+	if (rc != 0) {
+		LOGPFSML(fi, LOGL_ERROR, "(sub %s) HLR LU request failed\n", vlr_subscr_name(vsub));
+		goto error;
+	}
+
+	osmo_fsm_inst_state_chg(fi, SGS_UE_ST_LA_UPD_PRES, 0, 0);
+	vsub->ms_not_reachable_flag = false;
+	return;
+
+error:
+	to_null(fi);
+	sgs_lu_response.error = true;
+	sgs_lu_response.vsub = vsub;
+	vsub->sgs.response_cb(&sgs_lu_response);
+}
+
 /* Respawn a pending paging (Timer is reset and a new paging request is sent) */
 static void respawn_paging(struct vlr_subscr *vsub)
 {