Make run faster on ARM CPUs using fast math approximation

Use --fast-math to use sine/cosine tables and approximate atan2.
2018-11-10 15:16:20 +01:00
parent e8429166c3
commit 32025915d5
17 changed files with 386 additions and 194 deletions
--- a/src/libam/am.c
+++ b/src/libam/am.c
@@ -17,12 +17,55 @@
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

+#include <stdio.h>
 #include <stdint.h>
+#include <stdlib.h>
 #include <string.h>
+#include <errno.h>
 #include <math.h>
 #include "../libsample/sample.h"
 #include "am.h"

+static int has_init = 0;
+static int fast_math = 0;
+static float *sin_tab = NULL, *cos_tab = NULL;
+
+/* global init */
+int am_init(int _fast_math)
+{
+	fast_math = _fast_math;
+
+	if (fast_math) {
+		int i;
+
+		sin_tab = calloc(65536+16384, sizeof(*sin_tab));
+		if (!sin_tab) {
+			fprintf(stderr, "No mem!\n");
+			return -ENOMEM;
+		}
+		cos_tab = sin_tab + 16384;
+
+		/* generate sine and cosine */
+		for (i = 0; i < 65536+16384; i++)
+			sin_tab[i] = sin(2.0 * M_PI * (double)i / 65536.0);
+	}
+
+	has_init = 1;
+
+	return 0;
+}
+
+/* global exit */
+void am_exit(void)
+{
+	if (sin_tab) {
+		free(sin_tab);
+		sin_tab = cos_tab = NULL;
+	}
+
+	has_init = 0;
+}
+
 #define CARRIER_FILTER 30.0

 /* Amplitude modulation in SDR:
@@ -36,7 +79,10 @@ int am_mod_init(am_mod_t *mod, double samplerate, double offset, double gain, do
 	memset(mod, 0, sizeof(*mod));
 	mod->gain = gain;
 	mod->bias = bias;
-	mod->phasestep = 2.0 * M_PI * offset / samplerate;
+	if (fast_math)
+		mod->rot = 65536.0 * offset / samplerate;
+	else
+		mod->rot = 2.0 * M_PI * offset / samplerate;

 	return 0;
 }
@@ -49,20 +95,30 @@ void am_modulate_complex(am_mod_t *mod, sample_t *amplitude, int num, float *bas
 {
 	int s;
 	double vector;
-	double phasestep = mod->phasestep;
+	double rot = mod->rot;
 	double phase = mod->phase;
 	double gain = mod->gain;
 	double bias = mod->bias;

 	for (s = 0; s < num; s++) {
 		vector = *amplitude++ * gain + bias;
-		*baseband++ = cos(phase) * vector;
-		*baseband++ = sin(phase) * vector;
-		phase += phasestep;
-		if (phase < 0.0)
-			phase += 2.0 * M_PI;
-		else if (phase >= 2.0 * M_PI)
-			phase -= 2.0 * M_PI;
+		if (fast_math) {
+			*baseband++ += cos_tab[(uint16_t)phase] * vector;
+			*baseband++ += sin_tab[(uint16_t)phase] * vector;
+			phase += rot;
+			if (phase < 0.0)
+				phase += 65536.0;
+			else if (phase >= 65536.0)
+				phase -= 65536.0;
+		} else {
+			*baseband++ = cos(phase) * vector;
+			*baseband++ = sin(phase) * vector;
+			phase += rot;
+			if (phase < 0.0)
+				phase += 2.0 * M_PI;
+			else if (phase >= 2.0 * M_PI)
+				phase -= 2.0 * M_PI;
+		}
 	}

 	mod->phase = phase;
@@ -73,7 +129,10 @@ int am_demod_init(am_demod_t *demod, double samplerate, double offset, double ba
 {
 	memset(demod, 0, sizeof(*demod));
 	demod->gain = gain;
-	demod->phasestep = 2 * M_PI * -offset / samplerate;
+	if (fast_math)
+		demod->rot = 65536.0 * -offset / samplerate;
+	else
+		demod->rot = 2 * M_PI * -offset / samplerate;

 	/* use fourth order (2 iter) filter, since it is as fast as second order (1 iter) filter */
 	iir_lowpass_init(&demod->lp[0], bandwidth, samplerate, 2);
@@ -93,7 +152,7 @@ void am_demod_exit(am_demod_t __attribute__((unused)) *demod)
 void am_demodulate_complex(am_demod_t *demod, sample_t *amplitude, int length, float *baseband, sample_t *I, sample_t *Q, sample_t *carrier)
 {
 	int s, ss;
-	double phasestep = demod->phasestep;
+	double rot = demod->rot;
 	double phase = demod->phase;
 	double gain = demod->gain;
 	double i, q;
@@ -103,13 +162,22 @@ void am_demodulate_complex(am_demod_t *demod, sample_t *amplitude, int length, f
 	for (s = 0, ss = 0; s < length; s++) {
 		i = baseband[ss++];
 		q = baseband[ss++];
-		_sin = sin(phase);
-		_cos = cos(phase);
-		phase += phasestep;
-		if (phase < 0.0)
-			phase += 2.0 * M_PI;
-		else if (phase >= 2.0 * M_PI)
-			phase -= 2.0 * M_PI;
+		phase += rot;
+		if (fast_math) {
+			if (phase < 0.0)
+				phase += 65536.0;
+			else if (phase >= 65536.0)
+				phase -= 65536.0;
+			_sin = sin_tab[(uint16_t)phase];
+			_cos = cos_tab[(uint16_t)phase];
+		} else {
+			if (phase < 0.0)
+				phase += 2.0 * M_PI;
+			else if (phase >= 2.0 * M_PI)
+				phase -= 2.0 * M_PI;
+			_sin = sin(phase);
+			_cos = cos(phase);
+		}
 		I[s] = i * _cos - q * _sin;
 		Q[s] = i * _sin + q * _cos;
 	}
--- a/src/libam/am.h
+++ b/src/libam/am.h
@@ -1,7 +1,10 @@
 #include "../libfilter/iir_filter.h"

+int am_init(int fast_math);
+void am_exit(void);
+
 typedef struct am_mod {
-	double	phasestep;	/* angle to rotate vector per sample */
+	double	rot;		/* angle to rotate vector per sample */
 	double	phase;		/* current phase */
 	double	gain;		/* gain to be multiplied to amplitude */
 	double	bias;		/* DC offset to add (carrier amplitude) */
@@ -12,9 +15,8 @@ void am_mod_exit(am_mod_t *mod);
 void am_modulate_complex(am_mod_t *mod, sample_t *amplitude, int num, float *baseband);

 typedef struct am_demod {
-	double	phasestep;	/* angle to rotate vector per sample */
+	double	rot;		/* angle to rotate vector per sample */
 	double	phase;		/* current rotation phase (used to shift) */
-	double	last_phase;	/* last phase of FM (used to demodulate) */
 	iir_filter_t lp[3];	/* filters received IQ signal/carrier */
 	double	gain;		/* gain to be expected from amplitude */
 	double	bias;		/* DC offset to be expected (carrier amplitude) */