Make run faster on ARM CPUs using fast math approximation

Use --fast-math to use sine/cosine tables and approximate atan2.
This commit is contained in:
Andreas Eversberg
2018-11-10 15:16:20 +01:00
parent e8429166c3
commit 32025915d5
17 changed files with 386 additions and 194 deletions

View File

@@ -26,13 +26,56 @@
#include "../libsample/sample.h"
#include "fm.h"
//#define FAST_SINE
static int has_init = 0;
static int fast_math = 0;
static float *sin_tab = NULL, *cos_tab = NULL;
/* global init */
int fm_init(int _fast_math)
{
fast_math = _fast_math;
if (fast_math) {
int i;
sin_tab = calloc(65536+16384, sizeof(*sin_tab));
if (!sin_tab) {
fprintf(stderr, "No mem!\n");
return -ENOMEM;
}
cos_tab = sin_tab + 16384;
/* generate sine and cosine */
for (i = 0; i < 65536+16384; i++)
sin_tab[i] = sin(2.0 * M_PI * (double)i / 65536.0);
}
has_init = 1;
return 0;
}
/* global exit */
void fm_exit(void)
{
if (sin_tab) {
free(sin_tab);
sin_tab = cos_tab = NULL;
}
has_init = 0;
}
/* init FM modulator */
int fm_mod_init(fm_mod_t *mod, double samplerate, double offset, double amplitude)
{
int i;
if (!has_init) {
fprintf(stderr, "libfm was not initialized, plese fix!\n");
abort();
}
memset(mod, 0, sizeof(*mod));
mod->samplerate = samplerate;
mod->offset = offset;
@@ -51,19 +94,6 @@ int fm_mod_init(fm_mod_t *mod, double samplerate, double offset, double amplitud
mod->ramp_tab[i] = 0.5 - cos(M_PI * i / mod->ramp_length) / 2.0;
mod->ramp_tab[0] = mod->ramp_tab[1] / 2.0; /* never be 0 */
#ifdef FAST_SINE
mod->sin_tab = calloc(65536+16384, sizeof(*mod->sin_tab));
if (!mod->sin_tab) {
fprintf(stderr, "No mem!\n");
fm_mod_exit(mod);
return -ENOMEM;
}
/* generate sine and cosine */
for (i = 0; i < 65536+16384; i++)
mod->sin_tab[i] = sin(2.0 * M_PI * (double)i / 65536.0) * amplitude;
#endif
return 0;
}
@@ -73,10 +103,6 @@ void fm_mod_exit(fm_mod_t *mod)
free(mod->ramp_tab);
mod->ramp_tab = NULL;
}
if (mod->sin_tab) {
free(mod->sin_tab);
mod->sin_tab = NULL;
}
}
/* do frequency modulation of samples and add them to existing baseband */
@@ -85,11 +111,7 @@ void fm_modulate_complex(fm_mod_t *mod, sample_t *frequency, uint8_t *power, int
double dev, rate, phase, offset;
int ramp, ramp_length;
double *ramp_tab;
#ifdef FAST_SINE
double *sin_tab, *cos_tab;
#else
double amplitude;
#endif
rate = mod->samplerate;
phase = mod->phase;
@@ -97,12 +119,7 @@ void fm_modulate_complex(fm_mod_t *mod, sample_t *frequency, uint8_t *power, int
ramp = mod->ramp;
ramp_length = mod->ramp_length;
ramp_tab = mod->ramp_tab;
#ifdef FAST_SINE
sin_tab = mod->sin_tab;
cos_tab = mod->sin_tab + 16384;
#else
amplitude = mod->amplitude;
#endif
again:
switch (mod->state) {
@@ -118,23 +135,23 @@ again:
dev = offset + *frequency++;
power++;
length--;
#ifdef FAST_SINE
phase += 65536.0 * dev / rate;
if (phase < 0.0)
phase += 65536.0;
else if (phase >= 65536.0)
phase -= 65536.0;
*baseband++ += cos_tab[(uint16_t)phase];
*baseband++ += sin_tab[(uint16_t)phase];
#else
phase += 2.0 * M_PI * dev / rate;
if (phase < 0.0)
phase += 2.0 * M_PI;
else if (phase >= 2.0 * M_PI)
phase -= 2.0 * M_PI;
*baseband++ += cos(phase) * amplitude;
*baseband++ += sin(phase) * amplitude;
#endif
if (fast_math) {
phase += 65536.0 * dev / rate;
if (phase < 0.0)
phase += 65536.0;
else if (phase >= 65536.0)
phase -= 65536.0;
*baseband++ += cos_tab[(uint16_t)phase] * amplitude;
*baseband++ += sin_tab[(uint16_t)phase] * amplitude;
} else {
phase += 2.0 * M_PI * dev / rate;
if (phase < 0.0)
phase += 2.0 * M_PI;
else if (phase >= 2.0 * M_PI)
phase -= 2.0 * M_PI;
*baseband++ += cos(phase) * amplitude;
*baseband++ += sin(phase) * amplitude;
}
}
break;
case MOD_STATE_RAMP_DOWN:
@@ -151,23 +168,23 @@ again:
dev = offset + *frequency++;
power++;
length--;
#ifdef FAST_SINE
phase += 65536.0 * dev / rate;
if (phase < 0.0)
phase += 65536.0;
else if (phase >= 65536.0)
phase -= 65536.0;
*baseband++ += cos_tab[(uint16_t)phase] * ramp_tab[ramp];
*baseband++ += sin_tab[(uint16_t)phase] * ramp_tab[ramp];
#else
phase += 2.0 * M_PI * dev / rate;
if (phase < 0.0)
phase += 2.0 * M_PI;
else if (phase >= 2.0 * M_PI)
phase -= 2.0 * M_PI;
*baseband++ += cos(phase) * amplitude * ramp_tab[ramp];
*baseband++ += sin(phase) * amplitude * ramp_tab[ramp];
#endif
if (fast_math) {
phase += 65536.0 * dev / rate;
if (phase < 0.0)
phase += 65536.0;
else if (phase >= 65536.0)
phase -= 65536.0;
*baseband++ += cos_tab[(uint16_t)phase] * amplitude * ramp_tab[ramp];
*baseband++ += sin_tab[(uint16_t)phase] * amplitude * ramp_tab[ramp];
} else {
phase += 2.0 * M_PI * dev / rate;
if (phase < 0.0)
phase += 2.0 * M_PI;
else if (phase >= 2.0 * M_PI)
phase -= 2.0 * M_PI;
*baseband++ += cos(phase) * amplitude * ramp_tab[ramp];
*baseband++ += sin(phase) * amplitude * ramp_tab[ramp];
}
ramp--;
}
break;
@@ -186,23 +203,23 @@ again:
* we still continue with a carrier, but it has very low amplitude.
* the low amplitude is set in ramp_tab[0]
*/
#ifdef FAST_SINE
phase += 65536.0 * dev / rate;
if (phase < 0.0)
phase += 65536.0;
else if (phase >= 65536.0)
phase -= 65536.0;
*baseband++ += cos_tab[(uint16_t)phase] * ramp_tab[ramp];
*baseband++ += sin_tab[(uint16_t)phase] * ramp_tab[ramp];
#else
phase += 2.0 * M_PI * dev / rate;
if (phase < 0.0)
phase += 2.0 * M_PI;
else if (phase >= 2.0 * M_PI)
phase -= 2.0 * M_PI;
*baseband++ += cos(phase) * amplitude * ramp_tab[ramp];
*baseband++ += sin(phase) * amplitude * ramp_tab[ramp];
#endif
if (fast_math) {
phase += 65536.0 * dev / rate;
if (phase < 0.0)
phase += 65536.0;
else if (phase >= 65536.0)
phase -= 65536.0;
*baseband++ += cos_tab[(uint16_t)phase] * amplitude * ramp_tab[ramp];
*baseband++ += sin_tab[(uint16_t)phase] * amplitude * ramp_tab[ramp];
} else {
phase += 2.0 * M_PI * dev / rate;
if (phase < 0.0)
phase += 2.0 * M_PI;
else if (phase >= 2.0 * M_PI)
phase -= 2.0 * M_PI;
*baseband++ += cos(phase) * amplitude * ramp_tab[ramp];
*baseband++ += sin(phase) * amplitude * ramp_tab[ramp];
}
}
break;
case MOD_STATE_RAMP_UP:
@@ -220,23 +237,23 @@ again:
dev = offset + *frequency++;
power++;
length--;
#ifdef FAST_SINE
phase += 65536.0 * dev / rate;
if (phase < 0.0)
phase += 65536.0;
else if (phase >= 65536.0)
phase -= 65536.0;
*baseband++ += cos_tab[(uint16_t)phase] * ramp_tab[ramp];
*baseband++ += sin_tab[(uint16_t)phase] * ramp_tab[ramp];
#else
phase += 2.0 * M_PI * dev / rate;
if (phase < 0.0)
phase += 2.0 * M_PI;
else if (phase >= 2.0 * M_PI)
phase -= 2.0 * M_PI;
*baseband++ += cos(phase) * amplitude * ramp_tab[ramp];
*baseband++ += sin(phase) * amplitude * ramp_tab[ramp];
#endif
if (fast_math) {
phase += 65536.0 * dev / rate;
if (phase < 0.0)
phase += 65536.0;
else if (phase >= 65536.0)
phase -= 65536.0;
*baseband++ += cos_tab[(uint16_t)phase] * amplitude * ramp_tab[ramp];
*baseband++ += sin_tab[(uint16_t)phase] * amplitude * ramp_tab[ramp];
} else {
phase += 2.0 * M_PI * dev / rate;
if (phase < 0.0)
phase += 2.0 * M_PI;
else if (phase >= 2.0 * M_PI)
phase -= 2.0 * M_PI;
*baseband++ += cos(phase) * amplitude * ramp_tab[ramp];
*baseband++ += sin(phase) * amplitude * ramp_tab[ramp];
}
ramp++;
}
break;
@@ -251,41 +268,62 @@ again:
/* init FM demodulator */
int fm_demod_init(fm_demod_t *demod, double samplerate, double offset, double bandwidth)
{
if (!has_init) {
fprintf(stderr, "libfm was not initialized, plese fix!\n");
abort();
}
memset(demod, 0, sizeof(*demod));
demod->samplerate = samplerate;
#ifdef FAST_SINE
demod->rot = 65536.0 * -offset / samplerate;
#else
demod->rot = 2 * M_PI * -offset / samplerate;
#endif
if (fast_math)
demod->rot = 65536.0 * -offset / samplerate;
else
demod->rot = 2 * M_PI * -offset / samplerate;
/* use fourth order (2 iter) filter, since it is as fast as second order (1 iter) filter */
iir_lowpass_init(&demod->lp[0], bandwidth / 2.0, samplerate, 2);
iir_lowpass_init(&demod->lp[1], bandwidth / 2.0, samplerate, 2);
#ifdef FAST_SINE
int i;
demod->sin_tab = calloc(65536+16384, sizeof(*demod->sin_tab));
if (!demod->sin_tab) {
fprintf(stderr, "No mem!\n");
return -ENOMEM;
}
/* generate sine and cosine */
for (i = 0; i < 65536+16384; i++)
demod->sin_tab[i] = sin(2.0 * M_PI * (double)i / 65536.0);
#endif
return 0;
}
void fm_demod_exit(fm_demod_t *demod)
void fm_demod_exit(fm_demod_t __attribute__ ((unused)) *demod)
{
if (demod->sin_tab) {
free(demod->sin_tab);
demod->sin_tab = NULL;
}
static inline float fast_tan(float z)
{
const float n1 = 0.97239411f;
const float n2 = -0.19194795f;
return (n1 + n2 * z * z) * z;
}
static inline float fast_atan2(float y, float x)
{
if (x != 0.0) {
if (fabsf(x) > fabsf(y)) {
const float z = y / x;
if (x > 0.0) /* atan2(y,x) = atan(y/x) if x > 0 */
return fast_tan(z);
else if (y >= 0.0) /* atan2(y,x) = atan(y/x) + PI if x < 0, y >= 0 */
return fast_tan(z) + M_PI;
else /* atan2(y,x) = atan(y/x) - PI if x < 0, y < 0 */
return fast_tan(z) - M_PI;
} else { /* Use property atan(y/x) = PI/2 - atan(x/y) if |y/x| > 1 */
const float z = x / y;
if (y > 0.0) /* atan2(y,x) = PI/2 - atan(x/y) if |y/x| > 1, y > 0 */
return -fast_tan(z) + M_PI_2;
else /* atan2(y,x) = -PI/2 - atan(x/y) if |y/x| > 1, y < 0 */
return -fast_tan(z) - M_PI_2;
}
} else {
if (y > 0.0) /* x = 0, y > 0 */
return M_PI_2;
else if (y < 0.0) /* x = 0, y < 0 */
return -M_PI_2;
}
return 0.0; /* x,y = 0. return 0, because NaN would harm further processing */
}
/* do frequency demodulation of baseband and write them to samples */
@@ -295,36 +333,29 @@ void fm_demodulate_complex(fm_demod_t *demod, sample_t *frequency, int length, f
double _sin, _cos;
sample_t i, q;
int s, ss;
#ifdef FAST_SINE
double *sin_tab, *cos_tab;
#endif
rate = demod->samplerate;
phase = demod->phase;
rot = demod->rot;
#ifdef FAST_SINE
sin_tab = demod->sin_tab;
cos_tab = demod->sin_tab + 16384;
#endif
for (s = 0, ss = 0; s < length; s++) {
phase += rot;
i = baseband[ss++];
q = baseband[ss++];
#ifdef FAST_SINE
if (phase < 0.0)
phase += 65536.0;
else if (phase >= 65536.0)
phase -= 65536.0;
_sin = sin_tab[(uint16_t)phase];
_cos = cos_tab[(uint16_t)phase];
#else
if (phase < 0.0)
phase += 2.0 * M_PI;
else if (phase >= 2.0 * M_PI)
phase -= 2.0 * M_PI;
_sin = sin(phase);
_cos = cos(phase);
#endif
if (fast_math) {
if (phase < 0.0)
phase += 65536.0;
else if (phase >= 65536.0)
phase -= 65536.0;
_sin = sin_tab[(uint16_t)phase];
_cos = cos_tab[(uint16_t)phase];
} else {
if (phase < 0.0)
phase += 2.0 * M_PI;
else if (phase >= 2.0 * M_PI)
phase -= 2.0 * M_PI;
_sin = sin(phase);
_cos = cos(phase);
}
I[s] = i * _cos - q * _sin;
Q[s] = i * _sin + q * _cos;
}
@@ -333,7 +364,10 @@ void fm_demodulate_complex(fm_demod_t *demod, sample_t *frequency, int length, f
iir_process(&demod->lp[1], Q, length);
last_phase = demod->last_phase;
for (s = 0; s < length; s++) {
phase = atan2(Q[s], I[s]);
if (fast_math)
phase = fast_atan2(Q[s], I[s]);
else
phase = atan2(Q[s], I[s]);
dev = (phase - last_phase) / 2 / M_PI;
last_phase = phase;
if (dev < -0.49)
@@ -352,35 +386,28 @@ void fm_demodulate_real(fm_demod_t *demod, sample_t *frequency, int length, samp
double _sin, _cos;
sample_t i;
int s, ss;
#ifdef FAST_SINE
double *sin_tab, *cos_tab;
#endif
rate = demod->samplerate;
phase = demod->phase;
rot = demod->rot;
#ifdef FAST_SINE
sin_tab = demod->sin_tab;
cos_tab = demod->sin_tab + 16384;
#endif
for (s = 0, ss = 0; s < length; s++) {
phase += rot;
i = baseband[ss++];
#ifdef FAST_SINE
if (phase < 0.0)
phase += 65536.0;
else if (phase >= 65536.0)
phase -= 65536.0;
_sin = sin_tab[(uint16_t)phase];
_cos = cos_tab[(uint16_t)phase];
#else
if (phase < 0.0)
phase += 2.0 * M_PI;
else if (phase >= 2.0 * M_PI)
phase -= 2.0 * M_PI;
_sin = sin(phase);
_cos = cos(phase);
#endif
if (fast_math) {
if (phase < 0.0)
phase += 65536.0;
else if (phase >= 65536.0)
phase -= 65536.0;
_sin = sin_tab[(uint16_t)phase];
_cos = cos_tab[(uint16_t)phase];
} else {
if (phase < 0.0)
phase += 2.0 * M_PI;
else if (phase >= 2.0 * M_PI)
phase -= 2.0 * M_PI;
_sin = sin(phase);
_cos = cos(phase);
}
I[s] = i * _cos;
Q[s] = i * _sin;
}
@@ -389,7 +416,10 @@ void fm_demodulate_real(fm_demod_t *demod, sample_t *frequency, int length, samp
iir_process(&demod->lp[1], Q, length);
last_phase = demod->last_phase;
for (s = 0; s < length; s++) {
phase = atan2(Q[s], I[s]);
if (fast_math)
phase = fast_atan2(Q[s], I[s]);
else
phase = atan2(Q[s], I[s]);
dev = (phase - last_phase) / 2 / M_PI;
last_phase = phase;
if (dev < -0.49)

View File

@@ -1,5 +1,8 @@
#include "../libfilter/iir_filter.h"
int fm_init(int fast_math);
void fm_exit(void);
enum fm_mod_state {
MOD_STATE_OFF, /* transmitter off, no IQ vector */
MOD_STATE_ON, /* transmitter on, FM modulated IQ vector */
@@ -12,7 +15,6 @@ typedef struct fm_mod {
double offset; /* offset to calculated center frequency */
double amplitude; /* how much amplitude to add to the buff */
double phase; /* current phase of FM (used to shift and modulate ) */
double *sin_tab; /* sine/cosine table for modulation */
enum fm_mod_state state;/* state of transmit power */
double *ramp_tab; /* half cosine ramp up */
int ramp; /* current ramp position */
@@ -29,7 +31,6 @@ typedef struct fm_demod {
double rot; /* rotation step per sample to shift rx frequency (used to shift) */
double last_phase; /* last phase of FM (used to demodulate) */
iir_filter_t lp[2]; /* filters received IQ signal */
double *sin_tab; /* sine/cosine table rotation */
} fm_demod_t;
int fm_demod_init(fm_demod_t *demod, double samplerate, double offset, double bandwidth);