Make run faster on ARM CPUs using fast math approximation

Use --fast-math to use sine/cosine tables and approximate atan2.
This commit is contained in:
Andreas Eversberg
2018-11-10 15:16:20 +01:00
parent e8429166c3
commit 32025915d5
17 changed files with 386 additions and 194 deletions

View File

@@ -17,12 +17,55 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <math.h>
#include "../libsample/sample.h"
#include "am.h"
static int has_init = 0;
static int fast_math = 0;
static float *sin_tab = NULL, *cos_tab = NULL;
/* global init */
int am_init(int _fast_math)
{
fast_math = _fast_math;
if (fast_math) {
int i;
sin_tab = calloc(65536+16384, sizeof(*sin_tab));
if (!sin_tab) {
fprintf(stderr, "No mem!\n");
return -ENOMEM;
}
cos_tab = sin_tab + 16384;
/* generate sine and cosine */
for (i = 0; i < 65536+16384; i++)
sin_tab[i] = sin(2.0 * M_PI * (double)i / 65536.0);
}
has_init = 1;
return 0;
}
/* global exit */
void am_exit(void)
{
if (sin_tab) {
free(sin_tab);
sin_tab = cos_tab = NULL;
}
has_init = 0;
}
#define CARRIER_FILTER 30.0
/* Amplitude modulation in SDR:
@@ -36,7 +79,10 @@ int am_mod_init(am_mod_t *mod, double samplerate, double offset, double gain, do
memset(mod, 0, sizeof(*mod));
mod->gain = gain;
mod->bias = bias;
mod->phasestep = 2.0 * M_PI * offset / samplerate;
if (fast_math)
mod->rot = 65536.0 * offset / samplerate;
else
mod->rot = 2.0 * M_PI * offset / samplerate;
return 0;
}
@@ -49,20 +95,30 @@ void am_modulate_complex(am_mod_t *mod, sample_t *amplitude, int num, float *bas
{
int s;
double vector;
double phasestep = mod->phasestep;
double rot = mod->rot;
double phase = mod->phase;
double gain = mod->gain;
double bias = mod->bias;
for (s = 0; s < num; s++) {
vector = *amplitude++ * gain + bias;
*baseband++ = cos(phase) * vector;
*baseband++ = sin(phase) * vector;
phase += phasestep;
if (phase < 0.0)
phase += 2.0 * M_PI;
else if (phase >= 2.0 * M_PI)
phase -= 2.0 * M_PI;
if (fast_math) {
*baseband++ += cos_tab[(uint16_t)phase] * vector;
*baseband++ += sin_tab[(uint16_t)phase] * vector;
phase += rot;
if (phase < 0.0)
phase += 65536.0;
else if (phase >= 65536.0)
phase -= 65536.0;
} else {
*baseband++ = cos(phase) * vector;
*baseband++ = sin(phase) * vector;
phase += rot;
if (phase < 0.0)
phase += 2.0 * M_PI;
else if (phase >= 2.0 * M_PI)
phase -= 2.0 * M_PI;
}
}
mod->phase = phase;
@@ -73,7 +129,10 @@ int am_demod_init(am_demod_t *demod, double samplerate, double offset, double ba
{
memset(demod, 0, sizeof(*demod));
demod->gain = gain;
demod->phasestep = 2 * M_PI * -offset / samplerate;
if (fast_math)
demod->rot = 65536.0 * -offset / samplerate;
else
demod->rot = 2 * M_PI * -offset / samplerate;
/* use fourth order (2 iter) filter, since it is as fast as second order (1 iter) filter */
iir_lowpass_init(&demod->lp[0], bandwidth, samplerate, 2);
@@ -93,7 +152,7 @@ void am_demod_exit(am_demod_t __attribute__((unused)) *demod)
void am_demodulate_complex(am_demod_t *demod, sample_t *amplitude, int length, float *baseband, sample_t *I, sample_t *Q, sample_t *carrier)
{
int s, ss;
double phasestep = demod->phasestep;
double rot = demod->rot;
double phase = demod->phase;
double gain = demod->gain;
double i, q;
@@ -103,13 +162,22 @@ void am_demodulate_complex(am_demod_t *demod, sample_t *amplitude, int length, f
for (s = 0, ss = 0; s < length; s++) {
i = baseband[ss++];
q = baseband[ss++];
_sin = sin(phase);
_cos = cos(phase);
phase += phasestep;
if (phase < 0.0)
phase += 2.0 * M_PI;
else if (phase >= 2.0 * M_PI)
phase -= 2.0 * M_PI;
phase += rot;
if (fast_math) {
if (phase < 0.0)
phase += 65536.0;
else if (phase >= 65536.0)
phase -= 65536.0;
_sin = sin_tab[(uint16_t)phase];
_cos = cos_tab[(uint16_t)phase];
} else {
if (phase < 0.0)
phase += 2.0 * M_PI;
else if (phase >= 2.0 * M_PI)
phase -= 2.0 * M_PI;
_sin = sin(phase);
_cos = cos(phase);
}
I[s] = i * _cos - q * _sin;
Q[s] = i * _sin + q * _cos;
}

View File

@@ -1,7 +1,10 @@
#include "../libfilter/iir_filter.h"
int am_init(int fast_math);
void am_exit(void);
typedef struct am_mod {
double phasestep; /* angle to rotate vector per sample */
double rot; /* angle to rotate vector per sample */
double phase; /* current phase */
double gain; /* gain to be multiplied to amplitude */
double bias; /* DC offset to add (carrier amplitude) */
@@ -12,9 +15,8 @@ void am_mod_exit(am_mod_t *mod);
void am_modulate_complex(am_mod_t *mod, sample_t *amplitude, int num, float *baseband);
typedef struct am_demod {
double phasestep; /* angle to rotate vector per sample */
double rot; /* angle to rotate vector per sample */
double phase; /* current rotation phase (used to shift) */
double last_phase; /* last phase of FM (used to demodulate) */
iir_filter_t lp[3]; /* filters received IQ signal/carrier */
double gain; /* gain to be expected from amplitude */
double bias; /* DC offset to be expected (carrier amplitude) */