CMSIS-DSP: Modified spline f32 to compute coeffs at init time

- Moved coefficients computation to init function
- Improved documentation
pull/19/head
ClaudioMartino 6 years ago committed by Christophe Favergeon
parent 1c6bd2c3d6
commit 1d51b2ba00

@ -2357,39 +2357,44 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den)
*/
typedef struct
{
uint32_t n_x; /**< Number of known data points */
arm_spline_type type; /**< Type (boundary conditions) */
float32_t * buffer;
const float32_t * x; /**< x values */
const float32_t * y; /**< y values */
uint32_t n_x; /**< Number of known data points */
float32_t * coeffs; /**< Coefficients buffer (b,c, and d) */
} arm_spline_instance_f32;
/**
* @brief Processing function for the floating-point cubic spline interpolation.
* @param[in] S points to an instance of the floating-point spline structure.
* @param[in] x points to the x values of the known data points.
* @param[in] y points to the y values of the known data points.
* @param[in] xq points to the x values ot the interpolated data points.
* @param[out] pDst points to the block of output data.
* @param[in] blockSize number of samples of output data.
*/
void arm_spline_f32(
arm_spline_instance_f32 * S,
const float32_t * x,
const float32_t * y,
const float32_t * xq,
float32_t * pDst,
uint32_t blockSize);
arm_spline_instance_f32 * S,
const float32_t * xq,
float32_t * pDst,
uint32_t blockSize);
/**
* @brief Initialization function for the floating-point cubic spline interpolation.
* @param[in,out] S points to an instance of the floating-point spline structure.
* @param[in] n number of known data points.
* @param[in] type type of cubic spline interpolation (boundary conditions)
* @param[in] x points to the x values of the known data points.
* @param[in] y points to the y values of the known data points.
* @param[in] n number of known data points.
* @param[in] coeffs coefficients array for b, c, and d
* @param[in] tempBuffer buffer array for internal computations
*/
void arm_spline_init_f32(
arm_spline_instance_f32 * S,
uint32_t n,
arm_spline_type type,
float32_t * buffer);
arm_spline_instance_f32 * S,
arm_spline_type type,
const float32_t * x,
const float32_t * y,
uint32_t n,
float32_t * coeffs,
float32_t * tempBuffer);
/**
* @brief Instance structure for the floating-point matrix structure.

@ -28,20 +28,10 @@
#include "arm_math.h"
/*
Temporary fix because some arrays are defined on the stack.
They should be passed as additional arguments to the function.
*/
#define MAX_DATA_POINTS 40
/**
@ingroup groupSupport
*/
/**
@defgroup SplineInterpolate Cubic Spline Interpolation
@ -130,13 +120,8 @@ They should be passed as additional arguments to the function.
- b(i) = [y(i+1)-y(i)]/h(i)-h(i)*[c(i+1)+2*c(i)]/3
- d(i) = [c(i+1)-c(i)]/[3*h(i)]
Moreover, a(i)=y(i).
@par Usage
The x input array must be strictly sorted in ascending order and it must
not contain twice the same value (x(i)<x(i+1)).
@par
@par Behaviour outside the given intervals
It is possible to compute the interpolated vector for x values outside the
input range (xq<x(1); xq>x(n)). The coefficients used to compute the y values for
@ -149,45 +134,35 @@ They should be passed as additional arguments to the function.
@addtogroup SplineInterpolate
@{
*/
/**
* @brief Processing function for the floating-point cubic spline interpolation.
* @param[in] S points to an instance of the floating-point spline structure.
* @param[in] x points to the x values of the known data points.
* @param[in] y points to the y values of the known data points.
* @param[in] xq points to the x values ot the interpolated data points.
* @param[out] pDst points to the block of output data.
* @param[in] blockSize number of samples of output data.
*/
void arm_spline_f32(
arm_spline_instance_f32 * S,
const float32_t * x,
const float32_t * y,
const float32_t * xq,
float32_t * pDst,
uint32_t blockSize)
{
const float32_t * x = S->x;
const float32_t * y = S->y;
int32_t n = S->n_x;
arm_spline_type type = S->type;
float32_t hi, hm1;
float32_t * u = (S->buffer); /* (n-1)-long buffer for u elements */
float32_t * z = (S->buffer)+(n-1); /* n-long buffer for z elements */
float32_t * c = (S->buffer)+(n+n-1); /* n-long buffer for c elements */
float32_t Bi, li;
float32_t bi, di;
float32_t x_sc;
bi = 0.0f;
di = 0.0f;
/* Coefficients (a==y for i<=n-1) */
float32_t * b = (S->coeffs);
float32_t * c = (S->coeffs)+(n-1);
float32_t * d = (S->coeffs)+(2*(n-1));
const float32_t * pXq = xq;
int32_t blkCnt = (int32_t)blockSize;
int32_t blkCnt2;
int32_t i, j;
int32_t i;
float32_t x_sc;
#ifdef ARM_MATH_NEON
float32x4_t xiv;
@ -203,70 +178,16 @@ void arm_spline_f32(
float32x4_t yv;
#endif
/* === Solve LZ=B to obtain z(i) and u(i) === */
/* --- Row 1 --- */
/* B(0) = 0, not computed */
/* u(1,2) = a(1,2)/a(1,1) = a(1,2) */
if(type == ARM_SPLINE_NATURAL)
u[0] = 0; // a(1,2) = 0
else if(type == ARM_SPLINE_PARABOLIC_RUNOUT)
u[0] = -1; // a(1,2) = -1
z[0] = 0; // z(1) = B(1)/a(1,1) = 0
/* --- Rows 2 to N-1 (N=n+1) --- */
hm1 = x[1] - x[0]; // x(2)-x(1)
for (i=1; i<n-1; i++)
{
/* Compute B(i) */
hi = x[i+1]-x[i];
Bi = 3*(y[i+1]-y[i])/hi - 3*(y[i]-y[i-1])/hm1;
/* l(i) = a(ii)-a(i,i-1)*u(i-1) = 2[h(i-1)+h(i)]-h(i-1)*u(i-1) */
li = 2*(hi+hm1) - hm1*u[i-1];
/* u(i) = a(i,i+1)/l(i) = h(i)/l(i) */
u[i] = hi/li;
/* z(i) = [B(i)-h(i-1)*z(i-1)]/l(i) */
z[i] = (Bi-hm1*z[i-1])/li;
/* Update h(i-1) */
hm1 = hi;
}
/* --- Row N --- */
/* l(N) = a(N,N)-a(N,N-1)u(N-1) */
/* z(N) = [-a(N,N-1)z(N-1)]/l(N) */
if(type == ARM_SPLINE_NATURAL)
{
//li = 1; // a(N,N)=1; a(N,N-1)=0
z[n-1] = 0; // a(N,N-1)=0
}
else if(type == ARM_SPLINE_PARABOLIC_RUNOUT)
{
li = 1+u[n-2]; // a(N,N)=1; a(N,N-1)=-1
z[n-1] = z[n-2]/li; // a(N,N-1)=-1
}
/* === Solve UX = Z to obtain c(i) === */
/* c(N) = z(N) */
c[n-1] = z[n-1];
/* c(i) = z(i)-u(i+1)c(i+1) */
for (j = n-1-1; j >= 0; --j)
c[j] = z[j] - u[j] * c[j + 1];
/* === Compute b(i) and d(i) from c(i) and create output for x(i)<x<x(i+1) === */
/* Create output for x(i)<x<x(i+1) */
for (i=0; i<n-1; i++)
{
hi = x[i+1]-x[i];
bi = (y[i+1]-y[i])/hi-hi*(c[i+1]+2*c[i])/3;
di = (c[i+1]-c[i])/(3*hi);
#ifdef ARM_MATH_NEON
xiv = vdupq_n_f32(x[i]);
aiv = vdupq_n_f32(y[i]);
biv = vdupq_n_f32(bi);
biv = vdupq_n_f32(b[i]);
civ = vdupq_n_f32(c[i]);
div = vdupq_n_f32(di);
div = vdupq_n_f32(d[i]);
while( *(pXq+4) <= x[i+1] && blkCnt > 4 )
{
@ -300,14 +221,14 @@ void arm_spline_f32(
{
x_sc = *pXq++;
*pDst = y[i]+bi*(x_sc-x[i])+c[i]*(x_sc-x[i])*(x_sc-x[i])+di*(x_sc-x[i])*(x_sc-x[i])*(x_sc-x[i]);
*pDst = y[i]+b[i]*(x_sc-x[i])+c[i]*(x_sc-x[i])*(x_sc-x[i])+d[i]*(x_sc-x[i])*(x_sc-x[i])*(x_sc-x[i]);
pDst++;
blkCnt--;
}
}
/* == Create output for remaining samples (x>=x(n)) == */
/* Create output for remaining samples (x>=x(n)) */
#ifdef ARM_MATH_NEON
/* Compute 4 outputs at a time */
blkCnt2 = blkCnt >> 2;
@ -350,12 +271,11 @@ void arm_spline_f32(
{
x_sc = *pXq++;
*pDst = y[i-1]+bi*(x_sc-x[i-1])+c[i]*(x_sc-x[i-1])*(x_sc-x[i-1])+di*(x_sc-x[i-1])*(x_sc-x[i-1])*(x_sc-x[i-1]);
*pDst = y[i-1]+b[i-1]*(x_sc-x[i-1])+c[i-1]*(x_sc-x[i-1])*(x_sc-x[i-1])+d[i-1]*(x_sc-x[i-1])*(x_sc-x[i-1])*(x_sc-x[i-1]);
pDst++;
blkCnt2--;
}
}
/**

@ -32,33 +32,141 @@
@ingroup groupSupport
*/
/**
@addtogroup SplineInterpolate
@{
*/
@par Initialization function
The initialization function takes as input two arrays that the user has to allocate:
<code>coeffs</code> will contain the b, c, and d coefficients for the (n-1) intervals
(n is the number of known points), hence its size must be 3*(n-1); <code>tempBuffer</code>
is temporally used for internal computations and its size is n+n-1.
@par
The x input array must be strictly sorted in ascending order and it must
not contain twice the same value (x(i)<x(i+1)).
*/
/**
* @brief Initialization function for the floating-point cubic spline interpolation.
* @param[in,out] S points to an instance of the floating-point spline structure.
* @param[in] n number of known data points (must > 1).
* @param[in] type type of cubic spline interpolation (boundary conditions)
* @param[out] buffer Temporary buffer
*/
* @brief Initialization function for the floating-point cubic spline interpolation.
* @param[in,out] S points to an instance of the floating-point spline structure.
* @param[in] type type of cubic spline interpolation (boundary conditions)
* @param[in] x points to the x values of the known data points.
* @param[in] y points to the y values of the known data points.
* @param[in] n number of known data points.
* @param[in] coeffs coefficients array for b, c, and d
* @param[in] tempBuffer buffer array for internal computations
*
*/
void arm_spline_init_f32(
arm_spline_instance_f32 * S,
uint32_t n,
arm_spline_type type,
float32_t * buffer)
arm_spline_instance_f32 * S,
arm_spline_type type,
const float32_t * x,
const float32_t * y,
uint32_t n,
float32_t * coeffs,
float32_t * tempBuffer)
{
S->n_x = n;
S->type = type;
S->x = x;
S->y = y;
S->n_x = n;
/*** COEFFICIENTS COMPUTATION ***/
/* Type (boundary conditions):
* - Natural spline ( S1''(x1)=0 ; Sn''(xn)=0 )
* - Parabolic runout spline ( S1''(x1)=S2''(x2) ; Sn-1''(xn-1)=Sn''(xn) )
*/
S->buffer = buffer;
- Natural spline ( S1''(x1) = 0 ; Sn''(xn) = 0 )
- Parabolic runout spline ( S1''(x1) = S2''(x2) ; Sn-1''(xn-1) = Sn''(xn) ) */
/* (n-1)-long buffers for b, c, and d coefficients */
float32_t * b = coeffs;
float32_t * c = coeffs+(n-1);
float32_t * d = coeffs+(2*(n-1));
float32_t * u = tempBuffer; /* (n-1)-long scratch buffer for u elements */
float32_t * z = tempBuffer+(n-1); /* n-long scratch buffer for z elements */
float32_t hi, hm1; /* h(i) and h(i-1) */
float32_t Bi; /* B(i), i-th element of matrix B=LZ */
float32_t li; /* l(i), i-th element of matrix L */
float32_t cp1; /* Temporary value for c(i+1) */
int32_t i; /* Loop counter */
/* == Solve LZ=B to obtain z(i) and u(i) == */
/* -- Row 1 -- */
/* B(0) = 0, not computed */
/* u(1,2) = a(1,2)/a(1,1) = a(1,2) */
if(type == ARM_SPLINE_NATURAL)
u[0] = 0; /* a(1,2) = 0 */
else if(type == ARM_SPLINE_PARABOLIC_RUNOUT)
u[0] = -1; /* a(1,2) = -1 */
z[0] = 0; /* z(1) = B(1)/a(1,1) = 0 always */
/* -- Rows 2 to N-1 (N=n+1) -- */
hm1 = x[1] - x[0]; /* Initialize h(i-1) = h(1) = x(2)-x(1) */
for (i=1; i<(int32_t)n-1; i++)
{
/* Compute B(i) */
hi = x[i+1]-x[i];
Bi = 3*(y[i+1]-y[i])/hi - 3*(y[i]-y[i-1])/hm1;
/* l(i) = a(i)-a(i,i-1)*u(i-1) = 2[h(i-1)+h(i)]-h(i-1)*u(i-1) */
li = 2*(hi+hm1) - hm1*u[i-1];
/* u(i) = a(i,i+1)/l(i) = h(i)/l(i) */
u[i] = hi/li;
/* z(i) = [B(i)-h(i-1)*z(i-1)]/l(i) */
z[i] = (Bi-hm1*z[i-1])/li;
/* Update h(i-1) for next iteration */
hm1 = hi;
}
/* -- Row N -- */
/* l(N) = a(N,N)-a(N,N-1)u(N-1) */
/* z(N) = [-a(N,N-1)z(N-1)]/l(N) */
if(type == ARM_SPLINE_NATURAL)
{
/* li = 1; a(N,N) = 1; a(N,N-1) = 0 */
z[n-1] = 0; /* a(N,N-1) = 0 */
}
else if(type == ARM_SPLINE_PARABOLIC_RUNOUT)
{
li = 1+u[n-2]; /* a(N,N) = 1; a(N,N-1) = -1 */
z[n-1] = z[n-2]/li; /* a(N,N-1) = -1 */
}
/* == Solve UX = Z to obtain c(i) and */
/* compute b(i) and d(i) from c(i) == */
cp1 = z[n-1]; /* Initialize c(i+1) = c(N) = z(N) */
for (i=n-2; i>=0; i--)
{
/* c(i) = z(i)-u(i+1)c(i+1) */
c[i] = z[i]-u[i]*cp1;
hi = x[i+1]-x[i];
/* b(i) = [y(i+1)-y(i)]/h(i)-h(i)*[c(i+1)+2*c(i)]/3 */
b[i] = (y[i+1]-y[i])/hi-hi*(cp1+2*c[i])/3;
/* d(i) = [c(i+1)-c(i)]/[3*h(i)] */
d[i] = (cp1-c[i])/(3*hi);
/* Update c(i+1) for next iteration */
cp1 = c[i];
}
/* == Finally, store the coefficients in the instance == */
S->coeffs = coeffs;
}
/**

@ -15,7 +15,9 @@ class SupportTestsF32:public Client::Suite
Client::Pattern<float32_t> inputX;
Client::Pattern<float32_t> inputY;
Client::Pattern<float32_t> outputX;
Client::Pattern<float32_t> buffer;
Client::LocalPattern<float32_t> buffer;
Client::LocalPattern<float32_t> splineCoefs;
Client::LocalPattern<float32_t> output;
Client::LocalPattern<q15_t> outputQ15;

@ -427,13 +427,15 @@
const float32_t *inpY = inputY.ptr();
const float32_t *outX = outputX.ptr();
float32_t *outp = output.ptr();
float32_t *buf = buffer.ptr();
buf=(float32_t*)malloc((3*4-1)*sizeof(float32_t));
arm_spline_instance_f32 S;
float32_t *buf = buffer.ptr(); // ((2*4-1)*sizeof(float32_t))
float32_t *coef = splineCoefs.ptr(); // ((3*(4-1))*sizeof(float32_t))
arm_spline_init_f32(&S, 4, ARM_SPLINE_PARABOLIC_RUNOUT, buf);
arm_spline_f32(&S, inpX, inpY, outX, outp, 20);
arm_spline_instance_f32 S;
arm_spline_init_f32(&S, ARM_SPLINE_PARABOLIC_RUNOUT, inpX, inpY, 4, coef, buf);
arm_spline_f32(&S, outX, outp, 20);
ASSERT_EMPTY_TAIL(buffer);
ASSERT_EMPTY_TAIL(splineCoefs);
ASSERT_EMPTY_TAIL(output);
ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
}
@ -444,13 +446,15 @@
const float32_t *inpY = inputY.ptr();
const float32_t *outX = outputX.ptr();
float32_t *outp = output.ptr();
float32_t *buf = buffer.ptr();
buf=(float32_t*)malloc((3*9-1)*sizeof(float32_t));
arm_spline_instance_f32 S;
float32_t *buf = buffer.ptr(); // ((2*9-1)*sizeof(float32_t))
float32_t *coef = splineCoefs.ptr(); // ((3*(9-1))*sizeof(float32_t))
arm_spline_init_f32(&S, 9, ARM_SPLINE_NATURAL, buf);
arm_spline_f32(&S, inpX, inpY, outX, outp, 33);
arm_spline_instance_f32 S;
arm_spline_init_f32(&S, ARM_SPLINE_NATURAL, inpX, inpY, 9, coef, buf);
arm_spline_f32(&S, outX, outp, 33);
ASSERT_EMPTY_TAIL(buffer);
ASSERT_EMPTY_TAIL(splineCoefs);
ASSERT_EMPTY_TAIL(output);
ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
}
@ -461,13 +465,15 @@
const float32_t *inpY = inputY.ptr();
const float32_t *outX = outputX.ptr();
float32_t *outp = output.ptr();
float32_t *buf = buffer.ptr();
buf=(float32_t*)malloc((3*3-1)*sizeof(float32_t));
arm_spline_instance_f32 S;
float32_t *buf = buffer.ptr(); // ((2*3-1)*sizeof(float32_t))
float32_t *coef = splineCoefs.ptr(); // ((3*(3-1))*sizeof(float32_t))
arm_spline_init_f32(&S, 3, ARM_SPLINE_PARABOLIC_RUNOUT, buf);
arm_spline_f32(&S, inpX, inpY, outX, outp, 30);
arm_spline_instance_f32 S;
arm_spline_init_f32(&S, ARM_SPLINE_PARABOLIC_RUNOUT, inpX, inpY, 3, coef, buf);
arm_spline_f32(&S, outX, outp, 30);
ASSERT_EMPTY_TAIL(buffer);
ASSERT_EMPTY_TAIL(splineCoefs);
ASSERT_EMPTY_TAIL(output);
ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
}
@ -779,6 +785,8 @@
inputY.reload(SupportTestsF32::INPUT_SPLINE_SQU_Y_F32_ID,mgr,4);
outputX.reload(SupportTestsF32::OUTPUT_SPLINE_SQU_X_F32_ID,mgr,20);
ref.reload(SupportTestsF32::REF_SPLINE_SQU_F32_ID,mgr,20);
splineCoefs.create(3*(4-1),SupportTestsF32::COEFS_SPLINE_F32_ID,mgr);
buffer.create(2*4-1,SupportTestsF32::TEMP_SPLINE_F32_ID,mgr);
output.create(20,SupportTestsF32::OUT_F32_ID,mgr);
break;
@ -787,6 +795,8 @@
inputY.reload(SupportTestsF32::INPUT_SPLINE_SIN_Y_F32_ID,mgr,9);
outputX.reload(SupportTestsF32::OUTPUT_SPLINE_SIN_X_F32_ID,mgr,33);
ref.reload(SupportTestsF32::REF_SPLINE_SIN_F32_ID,mgr,33);
splineCoefs.create(3*(9-1),SupportTestsF32::COEFS_SPLINE_F32_ID,mgr);
buffer.create(2*9-1,SupportTestsF32::TEMP_SPLINE_F32_ID,mgr);
output.create(33,SupportTestsF32::OUT_F32_ID,mgr);
break;
@ -795,6 +805,8 @@
inputY.reload(SupportTestsF32::INPUT_SPLINE_RAM_Y_F32_ID,mgr,3);
outputX.reload(SupportTestsF32::OUTPUT_SPLINE_RAM_X_F32_ID,mgr,30);
ref.reload(SupportTestsF32::REF_SPLINE_RAM_F32_ID,mgr,30);
splineCoefs.create(3*(3-1),SupportTestsF32::COEFS_SPLINE_F32_ID,mgr);
buffer.create(2*3-1,SupportTestsF32::TEMP_SPLINE_F32_ID,mgr);
output.create(30,SupportTestsF32::OUT_F32_ID,mgr);
break;

@ -313,6 +313,8 @@ group Root {
Pattern SAMPLES_Q7_ID : Samples5_q7.txt
Output OUT_F32_ID : Output
Output COEFS_SPLINE_F32_ID : SplineCoefs
Output TEMP_SPLINE_F32_ID : SplineTemp
Functions {
test_weighted_sum_f32 nb=3:test_weighted_sum_f32

Loading…
Cancel
Save