nat: Introduce a prefix lookup tree (trie) for number rewriting

* It is a trie. The max depth of the trie is the length of the
longest prefix. The lookup is O(lookuped_prefix), but as the prefix
length is limited, the lookup time is constant.

* Each node can hold the entire prefix, has place for the rewrite
  rule with up to three digits.

* A trie with 20k entries will take about 3MB ram.

* Filling the trie 100 times takes ~800ms on my i7 laptop

* 10.000.000 lookups take 315ms.. (for the same prefix).

* 93/99 lines are tested, 6/6 functions are tested, 49 of 54 branches
  are tested. Only memory allocation failures are not covered

* A late addition is to handle the '+' sign and to increase the number
  of chars in the rewrite prefix. The timing/line coverage has not
  been updated after this change.
diff --git a/openbsc/.gitignore b/openbsc/.gitignore
index 3743f69..79222f7 100644
--- a/openbsc/.gitignore
+++ b/openbsc/.gitignore
@@ -50,6 +50,7 @@
 
 #tests
 tests/bsc-nat/bsc_nat_test
+tests/bsc-nat-trie/bsc_nat_trie_test
 tests/channel/channel_test
 tests/db/db_test
 tests/debug/debug_test
diff --git a/openbsc/configure.ac b/openbsc/configure.ac
index 0227d26..ce2a328 100644
--- a/openbsc/configure.ac
+++ b/openbsc/configure.ac
@@ -157,6 +157,7 @@
     tests/db/Makefile
     tests/channel/Makefile
     tests/bsc-nat/Makefile
+    tests/bsc-nat-trie/Makefile
     tests/mgcp/Makefile
     tests/gprs/Makefile
     tests/si/Makefile
diff --git a/openbsc/include/openbsc/Makefile.am b/openbsc/include/openbsc/Makefile.am
index 93b30bb..6ba6d1b 100644
--- a/openbsc/include/openbsc/Makefile.am
+++ b/openbsc/include/openbsc/Makefile.am
@@ -13,7 +13,7 @@
 		osmo_bsc_rf.h osmo_bsc.h network_listen.h bsc_nat_sccp.h \
 		osmo_msc_data.h osmo_bsc_grace.h sms_queue.h abis_om2000.h \
 		bss.h gsm_data_shared.h control_cmd.h ipaccess.h mncc_int.h \
-		arfcn_range_encode.h
+		arfcn_range_encode.h nat_rewrite_trie.h
 
 openbsc_HEADERS = gsm_04_08.h meas_rep.h bsc_api.h
 openbscdir = $(includedir)/openbsc
diff --git a/openbsc/include/openbsc/nat_rewrite_trie.h b/openbsc/include/openbsc/nat_rewrite_trie.h
new file mode 100644
index 0000000..4445c3e
--- /dev/null
+++ b/openbsc/include/openbsc/nat_rewrite_trie.h
@@ -0,0 +1,44 @@
+/*
+ * (C) 2013 by On-Waves
+ * (C) 2013 by Holger Hans Peter Freyther <zecke@selfish.org>
+ * All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#ifndef NAT_REWRITE_FILE_H
+#define NAT_REWRITE_FILE_H
+
+#include <osmocom/core/linuxrbtree.h>
+
+struct nat_rewrite_rule {
+	/* For digits 0-9 and + */
+	struct nat_rewrite_rule *rules[11];
+
+	char empty;
+	char prefix[14];
+	char rewrite[4];
+};
+
+struct nat_rewrite {
+	struct nat_rewrite_rule rule;
+	size_t prefixes;
+};
+
+
+struct nat_rewrite *nat_rewrite_parse(void *ctx, const char *filename);
+struct nat_rewrite_rule *nat_rewrite_lookup(struct nat_rewrite *, const char *prefix);
+void nat_rewrite_dump(struct nat_rewrite *rewr);
+
+#endif
diff --git a/openbsc/src/osmo-bsc_nat/Makefile.am b/openbsc/src/osmo-bsc_nat/Makefile.am
index e2ba551..aae6d69 100644
--- a/openbsc/src/osmo-bsc_nat/Makefile.am
+++ b/openbsc/src/osmo-bsc_nat/Makefile.am
@@ -7,7 +7,7 @@
 
 osmo_bsc_nat_SOURCES = bsc_filter.c bsc_mgcp_utils.c bsc_nat.c bsc_nat_utils.c \
 		  bsc_nat_vty.c bsc_sccp.c bsc_ussd.c bsc_nat_ctrl.c \
-		  bsc_nat_rewrite.c bsc_nat_filter.c
+		  bsc_nat_rewrite.c bsc_nat_filter.c bsc_nat_rewrite_trie.c
 osmo_bsc_nat_LDADD = $(top_builddir)/src/libcommon/libcommon.a \
 		$(top_builddir)/src/libmgcp/libmgcp.a \
 		$(top_builddir)/src/libbsc/libbsc.a \
diff --git a/openbsc/src/osmo-bsc_nat/bsc_nat.c b/openbsc/src/osmo-bsc_nat/bsc_nat.c
index 1237339..897c657 100644
--- a/openbsc/src/osmo-bsc_nat/bsc_nat.c
+++ b/openbsc/src/osmo-bsc_nat/bsc_nat.c
@@ -1480,6 +1480,7 @@
 	.is_config_node	= bsc_vty_is_config_node,
 };
 
+
 int main(int argc, char **argv)
 {
 	int rc;
@@ -1526,6 +1527,8 @@
 	/* seed the PRNG */
 	srand(time(NULL));
 
+
+
 	/*
 	 * Setup the MGCP code..
 	 */
diff --git a/openbsc/src/osmo-bsc_nat/bsc_nat_rewrite_trie.c b/openbsc/src/osmo-bsc_nat/bsc_nat_rewrite_trie.c
new file mode 100644
index 0000000..57043ac
--- /dev/null
+++ b/openbsc/src/osmo-bsc_nat/bsc_nat_rewrite_trie.c
@@ -0,0 +1,240 @@
+/* Handling for loading a re-write file/database */
+/*
+ * (C) 2013 by On-Waves
+ * (C) 2013 by Holger Hans Peter Freyther <zecke@selfish.org>
+ * All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <openbsc/nat_rewrite_trie.h>
+#include <openbsc/debug.h>
+
+#include <osmocom/core/talloc.h>
+#include <osmocom/core/utils.h>
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#define CHECK_IS_DIGIT_OR_FAIL(prefix, pos)						\
+	if (!isdigit(prefix[pos]) && prefix[pos] != '+') {				\
+			LOGP(DNAT, LOGL_ERROR,						\
+				"Prefix(%s) contains non ascii text at(%d=%c)\n",	\
+				prefix, pos, prefix[pos]);				\
+			goto fail;							\
+	}
+#define TO_INT(c) \
+	((c) == '+' ? 10 : ((c - '0') % 10))
+
+static void insert_rewrite_node(struct nat_rewrite_rule *rule, struct nat_rewrite *root)
+{
+	struct nat_rewrite_rule *new = &root->rule;
+
+	const size_t len = strlen(rule->prefix);
+	int i;
+
+	if (len == 0) {
+		LOGP(DNAT, LOGL_ERROR, "An empty prefix does not make sense.\n");
+		goto fail;
+	}
+
+	for (i = 0; i < len - 1; ++i) {
+		int pos;
+
+		/* check if the input is valid */
+		CHECK_IS_DIGIT_OR_FAIL(rule->prefix, i);
+
+		/* check if the next node is already valid */
+		pos = TO_INT(rule->prefix[i]);
+		if (!new->rules[pos]) {
+			new->rules[pos] = talloc_zero(root, struct nat_rewrite_rule);
+			if (!new->rules[pos]) {
+				LOGP(DNAT, LOGL_ERROR,
+					"Failed to allocate memory.\n");
+				goto fail;
+			}
+
+			new->rules[pos]->empty = 1;
+		}
+
+		/* we continue here */
+		new = new->rules[pos];
+	}
+
+	/* new now points to the place where we want to add it */
+	int pos;
+
+	/* check if the input is valid */
+	CHECK_IS_DIGIT_OR_FAIL(rule->prefix, (len - 1));
+
+	/* check if the next node is already valid */
+	pos = TO_INT(rule->prefix[len - 1]);
+	if (!new->rules[pos])
+		new->rules[pos] = rule;
+	else if (new->rules[pos]->empty) {
+		/* copy over entries */
+		new->rules[pos]->empty = 0;
+		memcpy(new->rules[pos]->prefix, rule->prefix, sizeof(rule->prefix));
+		memcpy(new->rules[pos]->rewrite, rule->rewrite, sizeof(rule->rewrite));
+		talloc_free(rule);
+	} else {
+		LOGP(DNAT, LOGL_ERROR,
+			"Prefix(%s) is already installed\n", rule->prefix);
+		goto fail;
+	}
+
+	root->prefixes += 1;
+	return;
+
+fail:
+	talloc_free(rule);
+	return;
+}
+
+static void handle_line(struct nat_rewrite *rewrite, char *line)
+{
+	char *split;
+	struct nat_rewrite_rule *rule;
+	size_t size_prefix, size_end, len;
+
+
+	/* Find the ',' in the line */
+	len = strlen(line);
+	split = strstr(line, ",");
+	if (!split) {
+		LOGP(DNAT, LOGL_ERROR, "Line doesn't contain ','\n");
+		return;
+	}
+
+	/* Check if there is space for the rewrite rule */
+	size_prefix = split - line;
+	if (len - size_prefix <= 2) {
+		LOGP(DNAT, LOGL_ERROR, "No rewrite available.\n");
+		return;
+	}
+
+	/* Continue after the ',' to the end */
+	split = &line[size_prefix + 1];
+	size_end = strlen(split) - 1;
+
+	/* Check if both strings can fit into the static array */
+	if (size_prefix > sizeof(rule->prefix) - 1) {
+		LOGP(DNAT, LOGL_ERROR,
+			"Prefix is too long with %zu\n", size_prefix);
+		return;
+	}
+
+	if (size_end > sizeof(rule->rewrite) - 1) {
+		LOGP(DNAT, LOGL_ERROR,
+			"Rewrite is too long with %zu on %s\n",
+			size_end, &line[size_prefix + 1]);
+		return;
+	}
+
+	/* Now create the entry and insert it into the trie */
+	rule = talloc_zero(rewrite, struct nat_rewrite_rule);
+	if (!rule) {
+		LOGP(DNAT, LOGL_ERROR, "Can not allocate memory\n");
+		return;
+	}
+
+	memcpy(rule->prefix, line, size_prefix);
+	assert(size_prefix < sizeof(rule->prefix));
+	rule->prefix[size_prefix] = '\0';
+
+	memcpy(rule->rewrite, split, size_end);
+	assert(size_end < sizeof(rule->rewrite));
+	rule->rewrite[size_end] = '\0';
+
+	/* now insert and balance the tree */
+	insert_rewrite_node(rule, rewrite);
+}
+
+struct nat_rewrite *nat_rewrite_parse(void *ctx, const char *filename)
+{
+	FILE *file;
+	char *line = NULL;
+	size_t n = 2342;
+	struct nat_rewrite *res;
+
+	file = fopen(filename, "r");
+	if (!file)
+		return NULL;
+
+	res = talloc_zero(ctx, struct nat_rewrite);
+	if (!res) {
+		fclose(file);
+		return NULL;
+	}
+
+	/* mark the root as empty */
+	res->rule.empty = 1;
+
+	while (getline(&line, &n, file) != -1) {
+		handle_line(res, line);
+	}
+
+	free(line);
+	fclose(file);
+	return res;
+}
+
+/**
+ * Simple find that tries to do a longest match...
+ */
+struct nat_rewrite_rule *nat_rewrite_lookup(struct nat_rewrite *rewrite,
+					const char *prefix)
+{
+	struct nat_rewrite_rule *rule = &rewrite->rule;
+	struct nat_rewrite_rule *last = NULL;
+	const int len = OSMO_MIN(strlen(prefix), (sizeof(rule->prefix) - 1));
+	int i;
+
+	for (i = 0; rule && i < len; ++i) {
+		int pos;
+
+		CHECK_IS_DIGIT_OR_FAIL(prefix, i);
+		pos = TO_INT(prefix[i]);
+
+		rule = rule->rules[pos];
+		if (rule && !rule->empty)
+			last = rule;
+	}
+
+	return last;
+
+fail:
+	return NULL;
+}
+
+static void nat_rewrite_dump_rec(struct nat_rewrite_rule *rule)
+{
+	int i;
+	if (!rule->empty)
+		printf("%s,%s\n", rule->prefix, rule->rewrite);
+
+	for (i = 0; i < ARRAY_SIZE(rule->rules); ++i) {
+		if (!rule->rules[i])
+			continue;
+		nat_rewrite_dump_rec(rule->rules[i]);
+	}
+}
+
+void nat_rewrite_dump(struct nat_rewrite *rewrite)
+{
+	nat_rewrite_dump_rec(&rewrite->rule);
+}
diff --git a/openbsc/tests/Makefile.am b/openbsc/tests/Makefile.am
index c9afeed..0597c14 100644
--- a/openbsc/tests/Makefile.am
+++ b/openbsc/tests/Makefile.am
@@ -1,7 +1,7 @@
 SUBDIRS = gsm0408 db channel mgcp gprs si abis
 
 if BUILD_NAT
-SUBDIRS += bsc-nat
+SUBDIRS += bsc-nat bsc-nat-trie
 endif
 
 if BUILD_SMPP
diff --git a/openbsc/tests/bsc-nat-trie/Makefile.am b/openbsc/tests/bsc-nat-trie/Makefile.am
new file mode 100644
index 0000000..355ed64
--- /dev/null
+++ b/openbsc/tests/bsc-nat-trie/Makefile.am
@@ -0,0 +1,18 @@
+AM_CPPFLAGS = $(all_includes) -I$(top_srcdir)/include
+AM_CFLAGS=-Wall -ggdb3 $(LIBOSMOCORE_CFLAGS) $(LIBOSMOGSM_CFLAGS) $(LIBOSMOSCCP_CFLAGS) $(LIBOSMOABIS_CFLAGS) $(COVERAGE_CFLAGS)
+AM_LDFLAGS = $(COVERAGE_LDFLAGS)
+
+EXTRA_DIST = bsc_nat_trie_test.ok prefixes.csv
+
+noinst_PROGRAMS = bsc_nat_trie_test
+
+bsc_nat_trie_test_SOURCES = bsc_nat_trie_test.c \
+			$(top_srcdir)/src/osmo-bsc_nat/bsc_nat_rewrite_trie.c
+bsc_nat_trie_test_LDADD = $(top_builddir)/src/libbsc/libbsc.a \
+			$(top_srcdir)/src/libctrl/libctrl.a \
+			$(top_srcdir)/src/libmgcp/libmgcp.a \
+			$(top_srcdir)/src/libtrau/libtrau.a \
+			$(top_srcdir)/src/libcommon/libcommon.a \
+			$(LIBOSMOCORE_LIBS) $(LIBOSMOGSM_LIBS) -lrt \
+			$(LIBOSMOSCCP_LIBS) $(LIBOSMOVTY_LIBS) \
+			$(LIBOSMOABIS_LIBS)
diff --git a/openbsc/tests/bsc-nat-trie/bsc_nat_trie_test.c b/openbsc/tests/bsc-nat-trie/bsc_nat_trie_test.c
new file mode 100644
index 0000000..4b4df2f
--- /dev/null
+++ b/openbsc/tests/bsc-nat-trie/bsc_nat_trie_test.c
@@ -0,0 +1,87 @@
+/*
+ * (C) 2013 by On-Waves
+ * (C) 2013 by Holger Hans Peter Freyther <zecke@selfish.org>
+ * All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <openbsc/nat_rewrite_trie.h>
+#include <openbsc/debug.h>
+
+#include <osmocom/core/application.h>
+#include <osmocom/core/backtrace.h>
+#include <osmocom/core/talloc.h>
+#include <osmocom/core/utils.h>
+
+#include <string.h>
+
+int main(int argc, char **argv)
+{
+	struct nat_rewrite *trie;
+
+	osmo_init_logging(&log_info);
+
+	printf("Testing the trie\n");
+
+	trie = nat_rewrite_parse(NULL, "prefixes.csv");
+	OSMO_ASSERT(trie);
+
+	/* verify that it has been parsed */
+	OSMO_ASSERT(trie->prefixes == 17);
+	printf("Dumping the internal trie\n");
+	nat_rewrite_dump(trie);
+
+	/* now do the matching... */
+	OSMO_ASSERT(!nat_rewrite_lookup(trie, ""));
+	OSMO_ASSERT(!nat_rewrite_lookup(trie, "2"));
+
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "1")->rewrite, "1") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "12")->rewrite, "2") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "123")->rewrite, "3") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "1234")->rewrite, "4") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "12345")->rewrite, "5") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "123456")->rewrite, "6") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "1234567")->rewrite, "7") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "12345678")->rewrite, "8") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "123456789")->rewrite, "9") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "1234567890")->rewrite, "10") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "13")->rewrite, "11") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "14")->rewrite, "12") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "15")->rewrite, "13") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "16")->rewrite, "14") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "823455")->rewrite, "15") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "82")->rewrite, "16") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "+49123445")->rewrite, "17") == 0);
+
+	/* match a prefix */
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "121")->rewrite, "2") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "1292323")->rewrite, "2") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "12345678901")->rewrite, "10") == 0);
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "160")->rewrite, "14") == 0);
+
+	OSMO_ASSERT(strcmp(nat_rewrite_lookup(trie, "12345678901123452123123")->rewrite, "10") == 0);
+
+	/* invalid input */
+	OSMO_ASSERT(!nat_rewrite_lookup(trie, "12abc"));
+
+	talloc_free(trie);
+
+	trie = nat_rewrite_parse(NULL, "does_not_exist.csv");
+	OSMO_ASSERT(!trie);
+
+	printf("Done with the tests.\n");
+	return 0;
+}
diff --git a/openbsc/tests/bsc-nat-trie/bsc_nat_trie_test.ok b/openbsc/tests/bsc-nat-trie/bsc_nat_trie_test.ok
new file mode 100644
index 0000000..4d4cc99
--- /dev/null
+++ b/openbsc/tests/bsc-nat-trie/bsc_nat_trie_test.ok
@@ -0,0 +1,20 @@
+Testing the trie
+Dumping the internal trie
+1,1
+12,2
+123,3
+1234,4
+12345,5
+123456,6
+1234567,7
+12345678,8
+123456789,9
+1234567890,10
+13,11
+14,12
+15,13
+16,14
+82,16
+823455,15
++49123,17
+Done with the tests.
diff --git a/openbsc/tests/bsc-nat-trie/prefixes.csv b/openbsc/tests/bsc-nat-trie/prefixes.csv
new file mode 100644
index 0000000..35485b1
--- /dev/null
+++ b/openbsc/tests/bsc-nat-trie/prefixes.csv
@@ -0,0 +1,25 @@
+1,1
+12,2
+123,3
+1234,4
+12345,5
+123456,6
+1234567,7
+12345678,8
+123456789,9
+1234567890,10
+13,11
+14,12
+15,13
+16,14
+823455,15
+82,16
++49123,17
+1ABC,18
+12345678901234567890,19
+,20
+14A,21
+124,324324324234
+1234567890,10
+no line
+99,
diff --git a/openbsc/tests/testsuite.at b/openbsc/tests/testsuite.at
index ea3fa1c..148b3d3 100644
--- a/openbsc/tests/testsuite.at
+++ b/openbsc/tests/testsuite.at
@@ -48,6 +48,14 @@
 AT_CHECK([$abs_top_builddir/tests/smpp/smpp_test], [], [expout], [experr])
 AT_CLEANUP
 
+AT_SETUP([bsc-nat-trie])
+AT_KEYWORDS([bsc-nat-trie])
+AT_CHECK([test "$enable_nat_test" != no || exit 77])
+cp $abs_srcdir/bsc-nat-trie/prefixes.csv .
+cat $abs_srcdir/bsc-nat-trie/bsc_nat_trie_test.ok > expout
+AT_CHECK([$abs_top_builddir/tests/bsc-nat-trie/bsc_nat_trie_test], [], [expout], [ignore])
+AT_CLEANUP
+
 AT_SETUP([si])
 AT_KEYWORDS([si])
 cat $abs_srcdir/si/si_test.ok > expout