Add special 7-bit encoding and decoding functions for USSD coding

Handling 7-bit coding is a little different for USSD, as TS 03.38
states:

To avoid the situation where the receiving entity confuses 7 binary
zero pad bits as the @ character, the carriage return or <CR>
character shall be used for padding in this situation [...].

If <CR> is intended to be the last character and the message
(including the wanted <CR>) ends on an octet boundary, then another
<CR> must be added together with a padding bit 0. The receiving entity
will perform the carriage return function twice, but this will not
result in misoperation as the definition of <CR> [...] is identical to
the definition of <CR><CR>.

The receiving entity shall remove the final <CR> character where the
message ends on an octet boundary with <CR> as the last character.

Jacob has verified the fix with fakeBTS and the wireshark dissector.

Fixes: OW#947
Reviewed-by: Jacob Erlbeck <jerlbeck@sysmocom.de>
diff --git a/include/osmocom/gsm/gsm_utils.h b/include/osmocom/gsm/gsm_utils.h
index a572f50..6cd46e4 100644
--- a/include/osmocom/gsm/gsm_utils.h
+++ b/include/osmocom/gsm/gsm_utils.h
@@ -57,8 +57,10 @@
 enum gsm_band gsm_band_parse(const char *mhz);
 
 int gsm_7bit_decode(char *decoded, const uint8_t *user_data, uint8_t length);
+int gsm_7bit_decode_ussd(char *decoded, const uint8_t *user_data, uint8_t length);
 int gsm_7bit_decode_hdr(char *decoded, const uint8_t *user_data, uint8_t length, uint8_t ud_hdr_ind);
 int gsm_7bit_encode(uint8_t *result, const char *data);
+int gsm_7bit_encode_ussd(uint8_t *result, const char *data, int *octets_written);
 int gsm_7bit_encode_oct(uint8_t *result, const char *data, int *octets_written);
 
 /* the three functions below are helper functions and here for the unit test */
diff --git a/src/gsm/gsm0480.c b/src/gsm/gsm0480.c
index b9b3ed9..cc693fe 100644
--- a/src/gsm/gsm0480.c
+++ b/src/gsm/gsm0480.c
@@ -105,7 +105,7 @@
 	msgb_put_u8(msg, ASN1_OCTET_STRING_TAG);
 	ussd_len_ptr = msgb_put(msg, 1);
 	data = msgb_put(msg, 0);
-	len = gsm_7bit_encode(data, text);
+	gsm_7bit_encode_ussd(data, text, &len);
 	msgb_put(msg, len);
 	ussd_len_ptr[0] = len;
 	/* USSD-String } */
@@ -172,7 +172,7 @@
 	msgb_put_u8(msg, 0x82);
 	tmp_len = msgb_put(msg, 1);
 	data = msgb_put(msg, 0);
-	len = gsm_7bit_encode(data, text);
+	gsm_7bit_encode_ussd(data, text, &len);
 	tmp_len[0] = len;
 	msgb_put(msg, len);
 
@@ -401,8 +401,8 @@
 				/* Prevent a mobile-originated buffer-overrun! */
 				if (num_chars > MAX_LEN_USSD_STRING)
 					num_chars = MAX_LEN_USSD_STRING;
-				gsm_7bit_decode(req->text,
-						&(uss_req_data[7]), num_chars);
+				gsm_7bit_decode_ussd(req->text,
+					&(uss_req_data[7]), num_chars);
 				rc = 1;
 			}
 		}
@@ -423,7 +423,7 @@
 
 	/* First put the payload text into the message */
 	ptr8 = msgb_put(msg, 0);
-	response_len = gsm_7bit_encode(ptr8, text);
+	gsm_7bit_encode_ussd(ptr8, text, &response_len);
 	msgb_put(msg, response_len);
 
 	/* Then wrap it as an Octet String */
diff --git a/src/gsm/gsm_utils.c b/src/gsm/gsm_utils.c
index 54b965e..3dd1537 100644
--- a/src/gsm/gsm_utils.c
+++ b/src/gsm/gsm_utils.c
@@ -172,6 +172,19 @@
 	return gsm_7bit_decode_hdr(text, user_data, septet_l, 0);
 }
 
+int gsm_7bit_decode_ussd(char *text, const uint8_t *user_data, uint8_t length)
+{
+	int i;
+
+	gsm_7bit_decode_hdr(text, user_data, length, 0);
+	i = strlen(text);
+	/* remove last <CR>, if it fits up to the end of last octet */
+	if (i && (user_data[gsm_get_octet_len(length) - 1] >> 1) == '\r')
+		text[--i] = '\0';
+
+	return i;
+}
+
 /* GSM 03.38 6.2.1 Prepare character packing */
 int gsm_septet_encode(uint8_t *result, const char *data)
 {
@@ -254,6 +267,24 @@
 	return gsm_7bit_encode_oct(result, data, &out);
 }
 
+int gsm_7bit_encode_ussd(uint8_t *result, const char *data, int *octets)
+{
+	int y;
+
+	y = gsm_7bit_encode_oct(result, data, octets);
+	/* if last octet contains only one bit, add <CR> */
+	if (((y * 7) & 7) == 1)
+		result[(*octets) - 1] |= ('\r' << 1);
+	/* if last character is <CR> and completely fills last octet, add
+	 * another <CR>. */
+	if (y && ((y * 7) & 7) == 0 && (result[(*octets) - 1] >> 1) == '\r') {
+		result[(*octets)++] = '\r';
+		y++;
+	}
+
+	return y;
+}
+
 int gsm_7bit_encode_oct(uint8_t *result, const char *data, int *octets)
 {
 	int y = 0;
diff --git a/src/gsm/libosmogsm.map b/src/gsm/libosmogsm.map
index 8a020db..1b985e1 100644
--- a/src/gsm/libosmogsm.map
+++ b/src/gsm/libosmogsm.map
@@ -132,8 +132,10 @@
 gsm48_rr_att_tlvdef;
 
 gsm_7bit_decode;
+gsm_7bit_decode_ussd;
 gsm_7bit_decode_hdr;
 gsm_7bit_encode;
+gsm_7bit_encode_ussd;
 gsm_7bit_encode_oct;
 
 gsm_arfcn2band;
diff --git a/tests/ussd/ussd_test.c b/tests/ussd/ussd_test.c
index 55384f1..d41c141 100644
--- a/tests/ussd/ussd_test.c
+++ b/tests/ussd/ussd_test.c
@@ -22,6 +22,7 @@
 #include <osmocom/core/application.h>
 #include <osmocom/core/logging.h>
 #include <osmocom/gsm/gsm0480.h>
+#include <osmocom/gsm/gsm_utils.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -68,6 +69,20 @@
 
 struct log_info info = {};
 
+void gsm_7bit_ussd(char *text)
+{
+	uint8_t coded[256];
+	char decoded[256];
+	int y;
+
+	printf("original = %s\n", osmo_hexdump((uint8_t *)text, strlen(text)));
+	gsm_7bit_encode_ussd(coded, text, &y);
+	printf("encoded = %s\n", osmo_hexdump(coded, y));
+	gsm_7bit_decode_ussd(decoded, coded, y * 8 / 7);
+	y = strlen(decoded);
+	printf("decoded = %s\n\n", osmo_hexdump((uint8_t *)decoded, y));
+}
+
 int main(int argc, char **argv)
 {
 	struct ussd_request req;
@@ -93,5 +108,12 @@
 		printf("Result for %d is %d\n", rc, i);
 	}
 
+	printf("<CR> case test for 7 bit encode\n");
+	gsm_7bit_ussd("01234567");
+	gsm_7bit_ussd("0123456");
+	gsm_7bit_ussd("01234567\r");
+	gsm_7bit_ussd("0123456\r");
+	gsm_7bit_ussd("012345\r");
+
 	return 0;
 }
diff --git a/tests/ussd/ussd_test.ok b/tests/ussd/ussd_test.ok
index 1b6316e..91f2a31 100644
--- a/tests/ussd/ussd_test.ok
+++ b/tests/ussd/ussd_test.ok
@@ -51,3 +51,24 @@
 Result for 0 is 7
 Result for 0 is 6
 Result for 1 is 5
+<CR> case test for 7 bit encode
+original = 30 31 32 33 34 35 36 37 
+encoded = b0 98 6c 46 ab d9 6e 
+decoded = 30 31 32 33 34 35 36 37 
+
+original = 30 31 32 33 34 35 36 
+encoded = b0 98 6c 46 ab d9 1a 
+decoded = 30 31 32 33 34 35 36 
+
+original = 30 31 32 33 34 35 36 37 0d 
+encoded = b0 98 6c 46 ab d9 6e 0d 
+decoded = 30 31 32 33 34 35 36 37 0d 
+
+original = 30 31 32 33 34 35 36 0d 
+encoded = b0 98 6c 46 ab d9 1a 0d 
+decoded = 30 31 32 33 34 35 36 0d 0d 
+
+original = 30 31 32 33 34 35 0d 
+encoded = b0 98 6c 46 ab 35 1a 
+decoded = 30 31 32 33 34 35 0d 
+