Add cycle benchmarking support

This enables benchmarking of the codec.  It will print
the amount of CPU cycles needed for encoding/decoding a single
20ms frame on average.
diff --git a/contrib/benchmark.sh b/contrib/benchmark.sh
new file mode 100755
index 0000000..0148667
--- /dev/null
+++ b/contrib/benchmark.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+GAPK=./src/gapk
+PCMFILE=$1
+BASE=`basename $PCMFILE`
+
+echo
+echo Encoder Benchmark:
+$GAPK -i "$PCMFILE" -f rawpcm-s16le -o "$BASE.fr" -g gsm
+$GAPK -i "$PCMFILE" -f rawpcm-s16le -o "$BASE.efr" -g amr-efr
+$GAPK -i "$PCMFILE" -f rawpcm-s16le -o "$BASE.hr" -g ti-hr
+
+echo
+echo Decoder Benchmark:
+$GAPK -i "$BASE.fr"  -g rawpcm-s16le -o /dev/null -f gsm
+$GAPK -i "$BASE.efr" -g rawpcm-s16le -o /dev/null -f amr-efr
+$GAPK -i "$BASE.hr"  -g rawpcm-s16le -o /dev/null -f ti-hr
diff --git a/contrib/gen_testdata.sh b/contrib/gen_testdata.sh
new file mode 100755
index 0000000..a8bb8c9
--- /dev/null
+++ b/contrib/gen_testdata.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+# script to generate a .s16 file (s16le with 8kHz) from an arbitrary MP3
+# usage: gen_testdata.sh foo.mp3 foo.s16
+
+WAV=`mktemp`
+MP3=$1
+S16=$1
+
+mpg123 -w "$WAV" "$MP3"
+sox "$WAV" -b 16 -c 1 -r 8000 "$S16"
+rm "$WAV"
diff --git a/include/gapk/benchmark.h b/include/gapk/benchmark.h
new file mode 100644
index 0000000..49c2c36
--- /dev/null
+++ b/include/gapk/benchmark.h
@@ -0,0 +1,60 @@
+#ifndef _BENCHMARK_H
+#define _BENCHMARK_H
+
+/*
+ * This file is part of gapk (GSM Audio Pocket Knife).
+ *
+ * gapk is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * gapk is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with gapk.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * (C) 2014 Harald Welte <laforge@gnumonks.org>
+ */
+
+#include <gapk/get_cycles.h>
+#include <gapk/codecs.h>
+
+#define NUM_AVG	102400
+
+struct benchmark_cycles {
+	cycles_t enc[NUM_AVG];
+	unsigned int enc_used;
+	cycles_t dec[NUM_AVG];
+	unsigned int dec_used;
+};
+
+extern struct benchmark_cycles codec_cycles[_CODEC_MAX];
+
+static inline void benchmark_stop(enum codec_type codec, int encode, unsigned long cycles)
+{
+	struct benchmark_cycles *bc = &codec_cycles[codec];
+
+	if (encode) {
+		bc->enc_used = (bc->enc_used + 1) % NUM_AVG;
+		bc->enc[bc->enc_used] = cycles;
+	} else {
+		bc->dec_used = (bc->dec_used + 1) % NUM_AVG;
+		bc->dec[bc->dec_used] = cycles;
+	}
+}
+
+#define BENCHMARK_START		do {						\
+					cycles_t _cycles_start, _cycles_stop;	\
+					_cycles_start = get_cycles()
+
+#define BENCHMARK_STOP(x,y)		_cycles_stop = get_cycles();			    \
+					benchmark_stop(x, y, _cycles_stop - _cycles_start); \
+				} while (0)
+
+void benchmark_dump(void);
+
+#endif
diff --git a/include/gapk/get_cycles.h b/include/gapk/get_cycles.h
new file mode 100644
index 0000000..ec530fc
--- /dev/null
+++ b/include/gapk/get_cycles.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2005 Mellanox Technologies Ltd.,
+ *	     (c) 2005 Harald Welte <laforge@gnumonks.org>, All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#ifndef GET_CLOCK_H
+#define GET_CLOCK_H
+
+#if 0
+
+#define _POSIX_C_SOURCE 199506L
+#include <unistd.h>
+#include <time.h>
+
+/* Ideally we would be using clock_getres() and clock_gettime(). 
+ * glibc manpage says CLOCK_PROCESS_CPUTIME_ID is only defined if it is
+ * actually present.  however, on ppc64 it is defined but not implemented. */
+#ifdef CLOCK_PROCESS_CPUTIME_ID
+typedef long cycles_t;
+static inline cycles_t get_cycles()
+{
+	struct timespec ts;
+
+#if defined (__x86_64__) || defined(__i386__)
+	asm volatile ("cpuid"); /* flush pipeline */
+#endif
+	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
+	return ts.tv_nsec;
+}
+#endif
+
+#endif
+
+#if defined (__x86_64__) || defined(__i386__)
+/* Note: only x86 CPUs which have rdtsc instruction are supported. */
+typedef unsigned long long cycles_t;
+static inline cycles_t get_cycles()
+{
+	unsigned low, high;
+	unsigned long long val;
+	asm volatile ("cpuid"); /* flush pipeline */
+	asm volatile ("rdtsc" : "=a" (low), "=d" (high));
+	val = high;
+	val = (val << 32) | low;
+	return val;
+}
+#elif defined(__PPC64__)
+/* Note: only PPC CPUs which have mftb instruction are supported. */
+typedef unsigned long long cycles_t;
+static inline cycles_t get_cycles()
+{
+	cycles_t ret;
+
+	asm volatile ("mftb %0" : "=r" (ret) : );
+	return ret;
+}
+#elif defined(__sparc__)
+/* Note: only sparc64 supports this register */
+typedef unsigned long long cycles_t;
+#define TICK_PRIV_BIT  (1ULL << 63)
+static inline cycles_t get_cycles()
+{
+	cycles_t ret;
+
+#if defined(__sparcv9) || defined(__arch64__)
+	asm volatile ("rd %%tick, %0" : "=r" (ret));
+#else
+	asm volatile ("rd %%tick, %%g1\n\t"
+		      "srlx %%g1, 32, %H0\n\t"
+		      "srl  %%g1,  0, %L0"
+		      : "=r" (ret)
+		      : /* no inputs */
+		      : "g1");
+#endif
+	return ret & ~TICK_PRIV_BIT;
+}
+#elif defined(__PPC__)
+#define CPU_FTR_601                  0x00000100
+typedef unsigned long cycles_t;
+static inline cycles_t get_cycles()
+{
+	cycles_t ret;
+
+	asm volatile (
+		"98:	mftb %0\n"
+		"99:\n"
+		".section __ftr_fixup,\"a\"\n"
+		"	.long %1\n"
+		"	.long 0\n"
+		"	.long 98b\n"
+		"	.long 99b\n"
+		".previous"
+		: "=r" (ret) : "i" (CPU_FTR_601));
+	return ret;
+}
+#elif defined(__ia64__) || defined(__mips__) || \
+      defined(__s390__)
+/* Itanium2 and up has ar.itc (Itanium1 has errata) */
+/* PPC64 has mftb */
+#include <asm/timex.h>
+#else
+#warning get_cycles not implemented for this architecture: attempt asm/timex.h
+#include <asm/timex.h>
+#endif
+
+extern double get_cpu_mhz(void);
+
+#endif /* GET_CLOCK_H */
diff --git a/src/Makefile.am b/src/Makefile.am
index 818f215..788d425 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -6,7 +6,7 @@
 
 COM_SOURCES = procqueue.c pq_file.c pq_format.c pq_codec.c pq_rtp.c \
 		formats.c fmt_amr.c fmt_gsm.c fmt_hr_ref.c fmt_racal.c \
-			fmt_rawpcm.c fmt_ti.c \
+			fmt_rawpcm.c fmt_ti.c benchmark.c \
 		codecs.c codec_pcm.c codec_hr.c codec_fr.c codec_efr.c
 
 bin_PROGRAMS = gapk
diff --git a/src/benchmark.c b/src/benchmark.c
new file mode 100644
index 0000000..e2c14a6
--- /dev/null
+++ b/src/benchmark.c
@@ -0,0 +1,55 @@
+/*
+ * This file is part of gapk (GSM Audio Pocket Knife).
+ *
+ * gapk is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * gapk is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with gapk.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * (C) 2014 Harald Welte <laforge@gnumonks.org>
+ */
+
+#include <stdio.h>
+
+#include <gapk/benchmark.h>
+
+struct benchmark_cycles codec_cycles[_CODEC_MAX];
+
+void benchmark_dump(void)
+{
+	int i;
+
+	for (i = 0; i < _CODEC_MAX; i++) {
+		struct benchmark_cycles *bc = &codec_cycles[i];
+		unsigned long long total;
+		int j;
+
+		if (bc->enc_used) {
+			total = 0;
+			for (j = 0; j < bc->enc_used; j++)
+				total += bc->enc[j];
+
+			printf("Codec %u (ENC): %llu cycles for %u frames => "
+				"%llu cycles/frame\n", i, total, bc->enc_used,
+				total / bc->enc_used);
+		}
+
+		if (bc->dec_used) {
+			total = 0;
+			for (j = 0; j < bc->dec_used; j++)
+				total += bc->dec[j];
+
+			printf("Codec %u (DEC): %llu cycles for %u frames => "
+				"%llu cycles/frame\n", i, total, bc->dec_used,
+				total / bc->dec_used);
+		}
+	}
+}
diff --git a/src/codec_efr.c b/src/codec_efr.c
index 141dec1..55bbd11 100644
--- a/src/codec_efr.c
+++ b/src/codec_efr.c
@@ -18,6 +18,7 @@
  */
 
 #include <gapk/codecs.h>
+#include <gapk/benchmark.h>
 
 #include "config.h"
 
@@ -68,6 +69,7 @@
 	struct codec_efr_state *st = state;
 	int rv;
 
+	BENCHMARK_START;
 	rv = Encoder_Interface_Encode(
 		st->encoder,
 		MR122,
@@ -75,6 +77,7 @@
 		(unsigned char*) cod,
 		1
 	);
+	BENCHMARK_STOP(CODEC_EFR, 1);
 
 	return rv != 32;
 }
@@ -84,12 +87,14 @@
 {
 	struct codec_efr_state *st = state;
 
+	BENCHMARK_START;
 	Decoder_Interface_Decode(
 		st->decoder,
 		(const unsigned char*) cod,
 		(short *) pcm,
 		0
 	);
+	BENCHMARK_STOP(CODEC_EFR, 0);
 
 	return 0;
 }
diff --git a/src/codec_fr.c b/src/codec_fr.c
index 3b1bb7c..1bda210 100644
--- a/src/codec_fr.c
+++ b/src/codec_fr.c
@@ -18,6 +18,7 @@
  */
 
 #include <gapk/codecs.h>
+#include <gapk/benchmark.h>
 
 #include "config.h"
 
@@ -54,7 +55,9 @@
 	gsm gh = (gsm)state;
 	uint8_t pcm_b[2*160];	/* local copy as libgsm src isn't const ! */
 	memcpy(pcm_b, pcm, 2*160);
+	BENCHMARK_START;
 	gsm_encode(gh, (gsm_signal*)pcm, (gsm_byte*)cod);
+	BENCHMARK_STOP(CODEC_FR, 1);
 	return 0;
 }
 
@@ -63,8 +66,12 @@
 {
 	gsm gh = (gsm)state;
 	uint8_t cod_b[33];	/* local copy as libgsm src isn't const ! */
+	int rc;
 	memcpy(cod_b, cod, 33);
-	return gsm_decode(gh, (gsm_byte*)cod_b, (gsm_signal*)pcm);
+	BENCHMARK_START;
+	rc = gsm_decode(gh, (gsm_byte*)cod_b, (gsm_signal*)pcm);
+	BENCHMARK_STOP(CODEC_FR, 1);
+	return rc;
 }
 
 #endif /* HAVE_LIBGSM */
diff --git a/src/codec_hr.c b/src/codec_hr.c
index 26a75a5..d0e09ce 100644
--- a/src/codec_hr.c
+++ b/src/codec_hr.c
@@ -18,6 +18,7 @@
  */
 
 #include <gapk/codecs.h>
+#include <gapk/benchmark.h>
 
 #include "config.h"
 
@@ -43,14 +44,22 @@
 codec_hr_encode(void *_state, uint8_t *cod, const uint8_t *pcm)
 {
 	struct gsmhr *state = _state;
-	return gsmhr_encode(state, (int16_t *)cod, (const int16_t *)pcm);
+	int rc;
+	BENCHMARK_START;
+	rc = gsmhr_encode(state, (int16_t *)cod, (const int16_t *)pcm);
+	BENCHMARK_STOP(CODEC_HR, 1);
+	return rc;
 }
 
 static int
 codec_hr_decode(void *_state, uint8_t *pcm, const uint8_t *cod)
 {
 	struct gsmhr *state = _state;
-	return gsmhr_decode(state, (int16_t *)pcm, (const int16_t *)cod);
+	int rc;
+	BENCHMARK_START;
+	rc = gsmhr_decode(state, (int16_t *)pcm, (const int16_t *)cod);
+	BENCHMARK_STOP(CODEC_HR, 0);
+	return rc;
 }
 
 #endif /* HAVE_LIBGSMHR */
diff --git a/src/main.c b/src/main.c
index dfe46dd..900eadf 100644
--- a/src/main.c
+++ b/src/main.c
@@ -34,6 +34,7 @@
 #include <gapk/codecs.h>
 #include <gapk/formats.h>
 #include <gapk/procqueue.h>
+#include <gapk/benchmark.h>
 
 
 struct gapk_options
@@ -557,6 +558,8 @@
 
 	/* Release processing queue */
 	pq_destroy(gs->pq);
+
+	benchmark_dump();
 	
 	return rv;
 }