CRC4: use proper CRC4 table to avoid bit-reversal of each byte

In commit 9bd2c9ffe7cf82c5d0a12406db018717d9b78858 we fixed the CRC4
computation by bit-reversing every byte before using it in the CRC
table.  This is of course a waste of CPU cycles.  Let's just compute
the CRC4 table slightly different (thanks to Dietter):

The following commands using pycrc from pycrc.org were used:
./pycrc.py --width=4 --poly=0x3 --reflect-in=false --reflect-out=false --xor-out=0 --xor-in=0 --algorithm table-driven  --generate c -o crc4itu.c
./pycrc.py --width=4 --poly=0x3 --reflect-in=false --reflect-out=false --xor-out=0 --xor-in=0 --algorithm table-driven  --generate h -o crc4itu.h
diff --git a/src/crc4itu.c b/src/crc4itu.c
index b927d4b..f13895a 100644
--- a/src/crc4itu.c
+++ b/src/crc4itu.c
@@ -1,29 +1,56 @@
+/**
+ * \file
+ * Functions and types for CRC checks.
+ *
+ * Generated on Sat May 12 09:39:22 2018
+ * by pycrc v0.9.1, https://pycrc.org
+ * using the configuration:
+ *  - Width         = 4
+ *  - Poly          = 0x3
+ *  - XorIn         = 0x0
+ *  - ReflectIn     = False
+ *  - XorOut        = 0x0
+ *  - ReflectOut    = False
+ *  - Algorithm     = table-driven
+ */
+#include "crc4itu.h"     /* include the header file generated with pycrc */
+#include <stdlib.h>
 #include <stdint.h>
-#include <osmocom/core/bits.h>
 
-static const uint8_t crc4_table_byte[256] = {
-	0x0, 0x7, 0xe, 0x9, 0x5, 0x2, 0xb, 0xc, 0xa, 0xd, 0x4, 0x3, 0xf, 0x8, 0x1, 0x6,
-	0xd, 0xa, 0x3, 0x4, 0x8, 0xf, 0x6, 0x1, 0x7, 0x0, 0x9, 0xe, 0x2, 0x5, 0xc, 0xb,
-	0x3, 0x4, 0xd, 0xa, 0x6, 0x1, 0x8, 0xf, 0x9, 0xe, 0x7, 0x0, 0xc, 0xb, 0x2, 0x5,
-	0xe, 0x9, 0x0, 0x7, 0xb, 0xc, 0x5, 0x2, 0x4, 0x3, 0xa, 0xd, 0x1, 0x6, 0xf, 0x8,
-	0x6, 0x1, 0x8, 0xf, 0x3, 0x4, 0xd, 0xa, 0xc, 0xb, 0x2, 0x5, 0x9, 0xe, 0x7, 0x0,
-	0xb, 0xc, 0x5, 0x2, 0xe, 0x9, 0x0, 0x7, 0x1, 0x6, 0xf, 0x8, 0x4, 0x3, 0xa, 0xd,
-	0x5, 0x2, 0xb, 0xc, 0x0, 0x7, 0xe, 0x9, 0xf, 0x8, 0x1, 0x6, 0xa, 0xd, 0x4, 0x3,
-	0x8, 0xf, 0x6, 0x1, 0xd, 0xa, 0x3, 0x4, 0x2, 0x5, 0xc, 0xb, 0x7, 0x0, 0x9, 0xe,
-	0xc, 0xb, 0x2, 0x5, 0x9, 0xe, 0x7, 0x0, 0x6, 0x1, 0x8, 0xf, 0x3, 0x4, 0xd, 0xa,
-	0x1, 0x6, 0xf, 0x8, 0x4, 0x3, 0xa, 0xd, 0xb, 0xc, 0x5, 0x2, 0xe, 0x9, 0x0, 0x7,
-	0xf, 0x8, 0x1, 0x6, 0xa, 0xd, 0x4, 0x3, 0x5, 0x2, 0xb, 0xc, 0x0, 0x7, 0xe, 0x9,
-	0x2, 0x5, 0xc, 0xb, 0x7, 0x0, 0x9, 0xe, 0x8, 0xf, 0x6, 0x1, 0xd, 0xa, 0x3, 0x4,
-	0xa, 0xd, 0x4, 0x3, 0xf, 0x8, 0x1, 0x6, 0x0, 0x7, 0xe, 0x9, 0x5, 0x2, 0xb, 0xc,
-	0x7, 0x0, 0x9, 0xe, 0x2, 0x5, 0xc, 0xb, 0xd, 0xa, 0x3, 0x4, 0x8, 0xf, 0x6, 0x1,
-	0x9, 0xe, 0x7, 0x0, 0xc, 0xb, 0x2, 0x5, 0x3, 0x4, 0xd, 0xa, 0x6, 0x1, 0x8, 0xf,
-	0x4, 0x3, 0xa, 0xd, 0x1, 0x6, 0xf, 0x8, 0xe, 0x9, 0x0, 0x7, 0xb, 0xc, 0x5, 0x2,
+
+
+/**
+ * Static table used for the table_driven implementation.
+ */
+static const crc_t crc_table[256] = {
+    0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02,
+    0x05, 0x06, 0x03, 0x00, 0x09, 0x0a, 0x0f, 0x0c, 0x0e, 0x0d, 0x08, 0x0b, 0x02, 0x01, 0x04, 0x07,
+    0x0a, 0x09, 0x0c, 0x0f, 0x06, 0x05, 0x00, 0x03, 0x01, 0x02, 0x07, 0x04, 0x0d, 0x0e, 0x0b, 0x08,
+    0x0f, 0x0c, 0x09, 0x0a, 0x03, 0x00, 0x05, 0x06, 0x04, 0x07, 0x02, 0x01, 0x08, 0x0b, 0x0e, 0x0d,
+    0x07, 0x04, 0x01, 0x02, 0x0b, 0x08, 0x0d, 0x0e, 0x0c, 0x0f, 0x0a, 0x09, 0x00, 0x03, 0x06, 0x05,
+    0x02, 0x01, 0x04, 0x07, 0x0e, 0x0d, 0x08, 0x0b, 0x09, 0x0a, 0x0f, 0x0c, 0x05, 0x06, 0x03, 0x00,
+    0x0d, 0x0e, 0x0b, 0x08, 0x01, 0x02, 0x07, 0x04, 0x06, 0x05, 0x00, 0x03, 0x0a, 0x09, 0x0c, 0x0f,
+    0x08, 0x0b, 0x0e, 0x0d, 0x04, 0x07, 0x02, 0x01, 0x03, 0x00, 0x05, 0x06, 0x0f, 0x0c, 0x09, 0x0a,
+    0x0e, 0x0d, 0x08, 0x0b, 0x02, 0x01, 0x04, 0x07, 0x05, 0x06, 0x03, 0x00, 0x09, 0x0a, 0x0f, 0x0c,
+    0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09,
+    0x04, 0x07, 0x02, 0x01, 0x08, 0x0b, 0x0e, 0x0d, 0x0f, 0x0c, 0x09, 0x0a, 0x03, 0x00, 0x05, 0x06,
+    0x01, 0x02, 0x07, 0x04, 0x0d, 0x0e, 0x0b, 0x08, 0x0a, 0x09, 0x0c, 0x0f, 0x06, 0x05, 0x00, 0x03,
+    0x09, 0x0a, 0x0f, 0x0c, 0x05, 0x06, 0x03, 0x00, 0x02, 0x01, 0x04, 0x07, 0x0e, 0x0d, 0x08, 0x0b,
+    0x0c, 0x0f, 0x0a, 0x09, 0x00, 0x03, 0x06, 0x05, 0x07, 0x04, 0x01, 0x02, 0x0b, 0x08, 0x0d, 0x0e,
+    0x03, 0x00, 0x05, 0x06, 0x0f, 0x0c, 0x09, 0x0a, 0x08, 0x0b, 0x0e, 0x0d, 0x04, 0x07, 0x02, 0x01,
+    0x06, 0x05, 0x00, 0x03, 0x0a, 0x09, 0x0c, 0x0f, 0x0d, 0x0e, 0x0b, 0x08, 0x01, 0x02, 0x07, 0x04
 };
 
-uint8_t crc4itu(uint8_t crc, const uint8_t *data, unsigned int len)
+
+crc_t crc4itu_update(crc_t crc, const void *data, size_t data_len)
 {
-	crc &= 0xf;
-	while (len--)
-		crc = crc4_table_byte[crc ^ osmo_revbytebits_8(*data++)];
-	return crc;
+    const unsigned char *d = (const unsigned char *)data;
+    unsigned int tbl_idx;
+
+    while (data_len--) {
+        tbl_idx = (crc << 4) ^ *d;
+        crc = crc_table[tbl_idx] & 0xf;
+        d++;
+    }
+    return crc & 0xf;
 }
diff --git a/src/crc4itu.h b/src/crc4itu.h
index ba8e79a..220b50f 100644
--- a/src/crc4itu.h
+++ b/src/crc4itu.h
@@ -1,3 +1,106 @@
-#pragma once
+/**
+ * \file
+ * Functions and types for CRC checks.
+ *
+ * Generated on Sat May 12 09:41:12 2018
+ * by pycrc v0.9.1, https://pycrc.org
+ * using the configuration:
+ *  - Width         = 4
+ *  - Poly          = 0x3
+ *  - XorIn         = 0x0
+ *  - ReflectIn     = False
+ *  - XorOut        = 0x0
+ *  - ReflectOut    = False
+ *  - Algorithm     = table-driven
+ *
+ * This file defines the functions crc4itu_init(), crc4itu_update() and crc_finalize().
+ *
+ * The crc4itu_init() function returns the inital \c crc value and must be called
+ * before the first call to crc4itu_update().
+ * Similarly, the crc_finalize() function must be called after the last call
+ * to crc4itu_update(), before the \c crc is being used.
+ * is being used.
+ *
+ * The crc4itu_update() function can be called any number of times (including zero
+ * times) in between the crc4itu_init() and crc_finalize() calls.
+ *
+ * This pseudo-code shows an example usage of the API:
+ * \code{.c}
+ * crc_t crc;
+ * unsigned char data[MAX_DATA_LEN];
+ * size_t data_len;
+ *
+ * crc = crc4itu_init();
+ * while ((data_len = read_data(data, MAX_DATA_LEN)) > 0) {
+ *     crc = crc4itu_update(crc, data, data_len);
+ * }
+ * crc = crc_finalize(crc);
+ * \endcode
+ */
+#ifndef CRC4ITU_H
+#define CRC4ITU_H
 
-uint8_t crc4itu(uint8_t crc, const uint8_t *data, unsigned int len);
+#include <stdlib.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/**
+ * The definition of the used algorithm.
+ *
+ * This is not used anywhere in the generated code, but it may be used by the
+ * application code to call algorithm-specific code, if desired.
+ */
+#define CRC_ALGO_TABLE_DRIVEN 1
+
+
+/**
+ * The type of the CRC values.
+ *
+ * This type must be big enough to contain at least 4 bits.
+ */
+typedef uint_fast8_t crc_t;
+
+
+/**
+ * Calculate the initial crc value.
+ *
+ * \return     The initial crc value.
+ */
+static inline crc_t crc4itu_init(void)
+{
+    return 0x0;
+}
+
+
+/**
+ * Update the crc value with new data.
+ *
+ * \param[in] crc      The current crc value.
+ * \param[in] data     Pointer to a buffer of \a data_len bytes.
+ * \param[in] data_len Number of bytes in the \a data buffer.
+ * \return             The updated crc value.
+ */
+crc_t crc4itu_update(crc_t crc, const void *data, size_t data_len);
+
+
+/**
+ * Calculate the final crc value.
+ *
+ * \param[in] crc  The current crc value.
+ * \return     The final crc value.
+ */
+static inline crc_t crc_finalize(crc_t crc)
+{
+    return crc;
+}
+
+
+#ifdef __cplusplus
+}           /* closing brace for extern "C" */
+#endif
+
+#endif      /* CRC4ITU_H */
diff --git a/src/osmo_e1.c b/src/osmo_e1.c
index a328eb1..2b83622 100644
--- a/src/osmo_e1.c
+++ b/src/osmo_e1.c
@@ -109,7 +109,7 @@
 	e1i->tx.crc4_error = false;
 	e1i->tx.frame_nr = 0;
 	e1i->tx.crc4_last_smf = 0;
-	e1i->tx.crc4 = 0;
+	e1i->tx.crc4 = crc4itu_init();
 
 	e1i->rx.frame_nr = 0;
 	memset(&e1i->rx.ts0_history, 0, sizeof(e1i->rx.ts0_history));
@@ -279,9 +279,9 @@
 	/* mask off the C bits */
 	if (is_correct_fas(ts0))
 		ts0 &= 0x7F;
-	e1i->tx.crc4 = crc4itu(e1i->tx.crc4, &ts0, 1);
+	e1i->tx.crc4 = crc4itu_update(e1i->tx.crc4, &ts0, 1);
 	/* add the remaining bytes/bits */
-	e1i->tx.crc4 = crc4itu(e1i->tx.crc4, out_frame+1, ARRAY_SIZE(e1i->ts)-1);
+	e1i->tx.crc4 = crc4itu_update(e1i->tx.crc4, out_frame+1, ARRAY_SIZE(e1i->ts)-1);
 }
 
 /*! Pull one to-be-transmitted E1 frame (256bits) from the E1 instance
@@ -395,10 +395,10 @@
 	if (smf2)
 		offset = 8;
 
-	crc |= (e1i->rx.ts0_history[0+offset] >> 7) << 0;
-	crc |= (e1i->rx.ts0_history[2+offset] >> 7) << 1;
-	crc |= (e1i->rx.ts0_history[4+offset] >> 7) << 2;
-	crc |= (e1i->rx.ts0_history[6+offset] >> 7) << 3;
+	crc |= (e1i->rx.ts0_history[0+offset] >> 7) << 3;
+	crc |= (e1i->rx.ts0_history[2+offset] >> 7) << 2;
+	crc |= (e1i->rx.ts0_history[4+offset] >> 7) << 1;
+	crc |= (e1i->rx.ts0_history[6+offset] >> 7) << 0;
 
 	return crc;
 }
@@ -412,9 +412,9 @@
 	/* mask off the C bits */
 	if (is_correct_fas(ts0))
 		ts0 &= 0x7F;
-	e1i->rx.crc4 = crc4itu(e1i->rx.crc4, &ts0, 1);
+	e1i->rx.crc4 = crc4itu_update(e1i->rx.crc4, &ts0, 1);
 	/* add the remaining bytes/bits */
-	e1i->rx.crc4 = crc4itu(e1i->rx.crc4, rx_frame+1, ARRAY_SIZE(e1i->ts)-1);
+	e1i->rx.crc4 = crc4itu_update(e1i->rx.crc4, rx_frame+1, ARRAY_SIZE(e1i->ts)-1);
 }
 
 /* FSM State handler */
@@ -499,7 +499,7 @@
 			}
 			/* rotate computed CRC4 one further */
 			e1i->rx.crc4_last_smf = e1i->rx.crc4;
-			e1i->rx.crc4 = 0;
+			e1i->rx.crc4 = crc4itu_init();
 			break;
 		default:
 			break;