Transceiver52M: Separate architecture specific files

Move x86 specific files into their own directory as this
area is about to get crowded with the addition of ARM
support.

Signed-off-by: Thomas Tsou <tom@tsou.cc>
diff --git a/Transceiver52M/Makefile.am b/Transceiver52M/Makefile.am
index d002b04..c03116e 100644
--- a/Transceiver52M/Makefile.am
+++ b/Transceiver52M/Makefile.am
@@ -21,10 +21,13 @@
 
 include $(top_srcdir)/Makefile.common
 
-AM_CFLAGS = $(STD_DEFINES_AND_INCLUDES) -std=gnu99 -march=native
-AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES)
+AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) -I./common
 AM_CXXFLAGS = -ldl -lpthread
 
+SUBDIRS = x86
+
+ARCH_LA = x86/libarch.la
+
 #UHD wins if both are defined
 if UHD
 AM_CPPFLAGS += $(UHD_CFLAGS)
@@ -52,9 +55,7 @@
 	radioClock.cpp \
 	sigProcLib.cpp \
 	Transceiver.cpp \
-	DummyLoad.cpp \
-	convolve.c \
-	convert.c
+	DummyLoad.cpp
 
 libtransceiver_la_SOURCES = \
 	$(COMMON_SOURCES) \
@@ -79,8 +80,8 @@
 	rcvLPF_651.h \
 	sendLPF_961.h \
 	Resampler.h \
-	convolve.h \
-	convert.h
+	common/convolve.h \
+	common/convert.h
 
 USRPping_SOURCES = USRPping.cpp
 USRPping_LDADD = \
@@ -90,12 +91,14 @@
 transceiver_SOURCES = runTransceiver.cpp
 transceiver_LDADD = \
 	libtransceiver.la \
+	$(ARCH_LA) \
 	$(GSM_LA) \
 	$(COMMON_LA) $(SQLITE_LA)
 
 sigProcLibTest_SOURCES = sigProcLibTest.cpp
 sigProcLibTest_LDADD = \
 	libtransceiver.la \
+	$(ARCH_LA) \
 	$(GSM_LA) \
 	$(COMMON_LA) $(SQLITE_LA)
 
diff --git a/Transceiver52M/convert.h b/Transceiver52M/common/convert.h
similarity index 100%
rename from Transceiver52M/convert.h
rename to Transceiver52M/common/convert.h
diff --git a/Transceiver52M/convolve.h b/Transceiver52M/common/convolve.h
similarity index 100%
rename from Transceiver52M/convolve.h
rename to Transceiver52M/common/convolve.h
diff --git a/Transceiver52M/common/convolve_base.c b/Transceiver52M/common/convolve_base.c
new file mode 100644
index 0000000..41dba1c
--- /dev/null
+++ b/Transceiver52M/common/convolve_base.c
@@ -0,0 +1,156 @@
+/*
+ * Convolution
+ * Copyright (C) 2012, 2013 Thomas Tsou <tom@tsou.cc>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <malloc.h>
+#include <string.h>
+#include <stdio.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* Base multiply and accumulate complex-real */
+static void mac_real(float *x, float *h, float *y)
+{
+	y[0] += x[0] * h[0];
+	y[1] += x[1] * h[0];
+}
+
+/* Base multiply and accumulate complex-complex */
+static void mac_cmplx(float *x, float *h, float *y)
+{
+	y[0] += x[0] * h[0] - x[1] * h[1];
+	y[1] += x[0] * h[1] + x[1] * h[0];
+}
+
+/* Base vector complex-complex multiply and accumulate */
+static void mac_real_vec_n(float *x, float *h, float *y,
+			   int len, int step, int offset)
+{
+	for (int i = offset; i < len; i += step)
+		mac_real(&x[2 * i], &h[2 * i], y);
+}
+
+/* Base vector complex-complex multiply and accumulate */
+static void mac_cmplx_vec_n(float *x, float *h, float *y,
+			    int len, int step, int offset)
+{
+	for (int i = offset; i < len; i += step)
+		mac_cmplx(&x[2 * i], &h[2 * i], y);
+}
+
+/* Base complex-real convolution */
+int _base_convolve_real(float *x, int x_len,
+			float *h, int h_len,
+			float *y, int y_len,
+			int start, int len,
+			int step, int offset)
+{
+	for (int i = 0; i < len; i++) {
+		mac_real_vec_n(&x[2 * (i - (h_len - 1) + start)],
+			       h,
+			       &y[2 * i], h_len,
+			       step, offset);
+	}
+
+	return len;
+}
+
+/* Base complex-complex convolution */
+int _base_convolve_complex(float *x, int x_len,
+			   float *h, int h_len,
+			   float *y, int y_len,
+			   int start, int len,
+			   int step, int offset)
+{
+	for (int i = 0; i < len; i++) {
+		mac_cmplx_vec_n(&x[2 * (i - (h_len - 1) + start)],
+				h,
+				&y[2 * i],
+				h_len, step, offset);
+	}
+
+	return len;
+}
+
+/* Buffer validity checks */
+int bounds_check(int x_len, int h_len, int y_len,
+		 int start, int len, int step)
+{
+	if ((x_len < 1) || (h_len < 1) ||
+	    (y_len < 1) || (len < 1) || (step < 1)) {
+		fprintf(stderr, "Convolve: Invalid input\n");
+		return -1;
+	}
+
+	if ((start + len > x_len) || (len > y_len) || (x_len < h_len)) {
+		fprintf(stderr, "Convolve: Boundary exception\n");
+		fprintf(stderr, "start: %i, len: %i, x: %i, h: %i, y: %i\n",
+				start, len, x_len, h_len, y_len);
+		return -1;
+	}
+
+	return 0;
+}
+
+/* API: Non-aligned (no SSE) complex-real */
+int base_convolve_real(float *x, int x_len,
+		       float *h, int h_len,
+		       float *y, int y_len,
+		       int start, int len,
+		       int step, int offset)
+{
+	if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
+		return -1;
+
+	memset(y, 0, len * 2 * sizeof(float));
+
+	return _base_convolve_real(x, x_len,
+				   h, h_len,
+				   y, y_len,
+				   start, len, step, offset);
+}
+
+/* API: Non-aligned (no SSE) complex-complex */
+int base_convolve_complex(float *x, int x_len,
+			  float *h, int h_len,
+			  float *y, int y_len,
+			  int start, int len,
+			  int step, int offset)
+{
+	if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
+		return -1;
+
+	memset(y, 0, len * 2 * sizeof(float));
+
+	return _base_convolve_complex(x, x_len,
+				      h, h_len,
+				      y, y_len,
+				      start, len, step, offset);
+}
+
+/* Aligned filter tap allocation */
+void *convolve_h_alloc(int len)
+{
+#ifdef HAVE_SSE3
+	return memalign(16, len * 2 * sizeof(float));
+#else
+	return malloc(len * 2 * sizeof(float));
+#endif
+}
diff --git a/Transceiver52M/x86/Makefile.am b/Transceiver52M/x86/Makefile.am
new file mode 100644
index 0000000..0621b17
--- /dev/null
+++ b/Transceiver52M/x86/Makefile.am
@@ -0,0 +1,8 @@
+AM_CFLAGS = -Wall -std=gnu99 -march=native -I../common
+
+noinst_LTLIBRARIES = libarch.la
+
+libarch_la_SOURCES = \
+	../common/convolve_base.c \
+	convert.c \
+	convolve.c
diff --git a/Transceiver52M/convert.c b/Transceiver52M/x86/convert.c
similarity index 98%
rename from Transceiver52M/convert.c
rename to Transceiver52M/x86/convert.c
index dc5e748..1d2f208 100644
--- a/Transceiver52M/convert.c
+++ b/Transceiver52M/x86/convert.c
@@ -19,6 +19,7 @@
 
 #include <malloc.h>
 #include <string.h>
+#include "convert.h"
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
@@ -164,7 +165,7 @@
 }
 #endif
 
-#ifndef HAVE_SSE_4_1
+#ifndef HAVE_SSE3
 static void convert_si16_ps(float *out, short *in, int len)
 {
 	for (int i = 0; i < len; i++)
diff --git a/Transceiver52M/convolve.c b/Transceiver52M/x86/convolve.c
similarity index 83%
rename from Transceiver52M/convolve.c
rename to Transceiver52M/x86/convolve.c
index 6f48ea0..ed85d97 100644
--- a/Transceiver52M/convolve.c
+++ b/Transceiver52M/x86/convolve.c
@@ -20,11 +20,28 @@
 #include <malloc.h>
 #include <string.h>
 #include <stdio.h>
+#include "convolve.h"
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
+/* Forward declarations from base implementation */
+int _base_convolve_real(float *x, int x_len,
+			float *h, int h_len,
+			float *y, int y_len,
+			int start, int len,
+			int step, int offset);
+
+int _base_convolve_complex(float *x, int x_len,
+			   float *h, int h_len,
+			   float *y, int y_len,
+			   int start, int len,
+			   int step, int offset);
+
+int bounds_check(int x_len, int h_len, int y_len,
+		 int start, int len, int step);
+
 #ifdef HAVE_SSE3
 #include <xmmintrin.h>
 #include <pmmintrin.h>
@@ -493,90 +510,6 @@
 }
 #endif
 
-/* Base multiply and accumulate complex-real */
-static void mac_real(float *x, float *h, float *y)
-{
-	y[0] += x[0] * h[0];
-	y[1] += x[1] * h[0];
-}
-
-/* Base multiply and accumulate complex-complex */
-static void mac_cmplx(float *x, float *h, float *y)
-{
-	y[0] += x[0] * h[0] - x[1] * h[1];
-	y[1] += x[0] * h[1] + x[1] * h[0];
-}
-
-/* Base vector complex-complex multiply and accumulate */
-static void mac_real_vec_n(float *x, float *h, float *y,
-			   int len, int step, int offset)
-{
-	for (int i = offset; i < len; i += step)
-		mac_real(&x[2 * i], &h[2 * i], y);
-}
-
-/* Base vector complex-complex multiply and accumulate */
-static void mac_cmplx_vec_n(float *x, float *h, float *y,
-			    int len, int step, int offset)
-{
-	for (int i = offset; i < len; i += step)
-		mac_cmplx(&x[2 * i], &h[2 * i], y);
-}
-
-/* Base complex-real convolution */
-static int _base_convolve_real(float *x, int x_len,
-			       float *h, int h_len,
-			       float *y, int y_len,
-			       int start, int len,
-			       int step, int offset)
-{
-	for (int i = 0; i < len; i++) {
-		mac_real_vec_n(&x[2 * (i - (h_len - 1) + start)],
-			       h,
-			       &y[2 * i], h_len,
-			       step, offset);
-	}
-
-	return len;
-}
-
-/* Base complex-complex convolution */
-static int _base_convolve_complex(float *x, int x_len,
-				  float *h, int h_len,
-				  float *y, int y_len,
-				  int start, int len,
-				  int step, int offset)
-{
-	for (int i = 0; i < len; i++) {
-		mac_cmplx_vec_n(&x[2 * (i - (h_len - 1) + start)],
-				h,
-				&y[2 * i],
-				h_len, step, offset);
-	}
-
-	return len;
-}
-
-/* Buffer validity checks */
-static int bounds_check(int x_len, int h_len, int y_len,
-			int start, int len, int step)
-{
-	if ((x_len < 1) || (h_len < 1) ||
-	    (y_len < 1) || (len < 1) || (step < 1)) {
-		fprintf(stderr, "Convolve: Invalid input\n");
-		return -1;
-	}
-
-	if ((start + len > x_len) || (len > y_len) || (x_len < h_len)) {
-		fprintf(stderr, "Convolve: Boundary exception\n");
-		fprintf(stderr, "start: %i, len: %i, x: %i, h: %i, y: %i\n",
-				start, len, x_len, h_len, y_len);
-		return -1;
-	}
-
-	return 0;
-}
-
 /* API: Aligned complex-real */
 int convolve_real(float *x, int x_len,
 		  float *h, int h_len,
@@ -666,49 +599,3 @@
 
 	return len;
 }
-
-/* API: Non-aligned (no SSE) complex-real */
-int base_convolve_real(float *x, int x_len,
-		       float *h, int h_len,
-		       float *y, int y_len,
-		       int start, int len,
-		       int step, int offset)
-{
-	if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
-		return -1;
-
-	memset(y, 0, len * 2 * sizeof(float));
-
-	return _base_convolve_real(x, x_len,
-				   h, h_len,
-				   y, y_len,
-				   start, len, step, offset);
-}
-
-/* API: Non-aligned (no SSE) complex-complex */
-int base_convolve_complex(float *x, int x_len,
-			  float *h, int h_len,
-			  float *y, int y_len,
-			  int start, int len,
-			  int step, int offset)
-{
-	if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
-		return -1;
-
-	memset(y, 0, len * 2 * sizeof(float));
-
-	return _base_convolve_complex(x, x_len,
-				      h, h_len,
-				      y, y_len,
-				      start, len, step, offset);
-}
-
-/* Aligned filter tap allocation */
-void *convolve_h_alloc(int len)
-{
-#ifdef HAVE_SSE3
-	return memalign(16, len * 2 * sizeof(float));
-#else
-	return malloc(len * 2 * sizeof(float));
-#endif
-}
diff --git a/configure.ac b/configure.ac
index 42fcde6..2bfe825 100644
--- a/configure.ac
+++ b/configure.ac
@@ -101,6 +101,7 @@
     CommonLibs/Makefile \
     GSM/Makefile \
     Transceiver52M/Makefile \
+    Transceiver52M/x86/Makefile \
     sqlite3/Makefile \
 ])