From 1d51b2ba00d30c95fb73b8344377f3790c04b597 Mon Sep 17 00:00:00 2001 From: ClaudioMartino Date: Wed, 19 Feb 2020 12:22:10 +0100 Subject: [PATCH] CMSIS-DSP: Modified spline f32 to compute coeffs at init time - Moved coefficients computation to init function - Improved documentation --- Include/arm_math.h | 35 +++-- .../SupportFunctions/arm_spline_interp_f32.c | 112 ++------------ .../arm_spline_interp_init_f32.c | 144 +++++++++++++++--- Testing/Include/Tests/SupportTestsF32.h | 4 +- Testing/Source/Tests/SupportTestsF32.cpp | 42 +++-- Testing/desc.txt | 2 + 6 files changed, 194 insertions(+), 145 deletions(-) diff --git a/Include/arm_math.h b/Include/arm_math.h index 7c1da4a5..e2121f10 100644 --- a/Include/arm_math.h +++ b/Include/arm_math.h @@ -2357,39 +2357,44 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den) */ typedef struct { - uint32_t n_x; /**< Number of known data points */ arm_spline_type type; /**< Type (boundary conditions) */ - float32_t * buffer; + const float32_t * x; /**< x values */ + const float32_t * y; /**< y values */ + uint32_t n_x; /**< Number of known data points */ + float32_t * coeffs; /**< Coefficients buffer (b,c, and d) */ } arm_spline_instance_f32; /** * @brief Processing function for the floating-point cubic spline interpolation. * @param[in] S points to an instance of the floating-point spline structure. - * @param[in] x points to the x values of the known data points. - * @param[in] y points to the y values of the known data points. * @param[in] xq points to the x values ot the interpolated data points. * @param[out] pDst points to the block of output data. * @param[in] blockSize number of samples of output data. */ void arm_spline_f32( - arm_spline_instance_f32 * S, - const float32_t * x, - const float32_t * y, - const float32_t * xq, - float32_t * pDst, - uint32_t blockSize); + arm_spline_instance_f32 * S, + const float32_t * xq, + float32_t * pDst, + uint32_t blockSize); /** * @brief Initialization function for the floating-point cubic spline interpolation. * @param[in,out] S points to an instance of the floating-point spline structure. - * @param[in] n number of known data points. * @param[in] type type of cubic spline interpolation (boundary conditions) + * @param[in] x points to the x values of the known data points. + * @param[in] y points to the y values of the known data points. + * @param[in] n number of known data points. + * @param[in] coeffs coefficients array for b, c, and d + * @param[in] tempBuffer buffer array for internal computations */ void arm_spline_init_f32( - arm_spline_instance_f32 * S, - uint32_t n, - arm_spline_type type, - float32_t * buffer); + arm_spline_instance_f32 * S, + arm_spline_type type, + const float32_t * x, + const float32_t * y, + uint32_t n, + float32_t * coeffs, + float32_t * tempBuffer); /** * @brief Instance structure for the floating-point matrix structure. diff --git a/Source/SupportFunctions/arm_spline_interp_f32.c b/Source/SupportFunctions/arm_spline_interp_f32.c index b486f25a..0f829e80 100644 --- a/Source/SupportFunctions/arm_spline_interp_f32.c +++ b/Source/SupportFunctions/arm_spline_interp_f32.c @@ -28,20 +28,10 @@ #include "arm_math.h" -/* - -Temporary fix because some arrays are defined on the stack. -They should be passed as additional arguments to the function. - -*/ -#define MAX_DATA_POINTS 40 - /** @ingroup groupSupport */ - - /** @defgroup SplineInterpolate Cubic Spline Interpolation @@ -130,13 +120,8 @@ They should be passed as additional arguments to the function. - b(i) = [y(i+1)-y(i)]/h(i)-h(i)*[c(i+1)+2*c(i)]/3 - d(i) = [c(i+1)-c(i)]/[3*h(i)] Moreover, a(i)=y(i). - - @par Usage - The x input array must be strictly sorted in ascending order and it must - not contain twice the same value (x(i)x(n)). The coefficients used to compute the y values for @@ -149,45 +134,35 @@ They should be passed as additional arguments to the function. @addtogroup SplineInterpolate @{ */ + /** * @brief Processing function for the floating-point cubic spline interpolation. * @param[in] S points to an instance of the floating-point spline structure. - * @param[in] x points to the x values of the known data points. - * @param[in] y points to the y values of the known data points. * @param[in] xq points to the x values ot the interpolated data points. * @param[out] pDst points to the block of output data. * @param[in] blockSize number of samples of output data. */ - - void arm_spline_f32( arm_spline_instance_f32 * S, - const float32_t * x, - const float32_t * y, const float32_t * xq, float32_t * pDst, uint32_t blockSize) { + const float32_t * x = S->x; + const float32_t * y = S->y; int32_t n = S->n_x; - arm_spline_type type = S->type; - - float32_t hi, hm1; - float32_t * u = (S->buffer); /* (n-1)-long buffer for u elements */ - float32_t * z = (S->buffer)+(n-1); /* n-long buffer for z elements */ - float32_t * c = (S->buffer)+(n+n-1); /* n-long buffer for c elements */ - float32_t Bi, li; - float32_t bi, di; - float32_t x_sc; - bi = 0.0f; - di = 0.0f; + /* Coefficients (a==y for i<=n-1) */ + float32_t * b = (S->coeffs); + float32_t * c = (S->coeffs)+(n-1); + float32_t * d = (S->coeffs)+(2*(n-1)); const float32_t * pXq = xq; - int32_t blkCnt = (int32_t)blockSize; int32_t blkCnt2; - int32_t i, j; + int32_t i; + float32_t x_sc; #ifdef ARM_MATH_NEON float32x4_t xiv; @@ -203,70 +178,16 @@ void arm_spline_f32( float32x4_t yv; #endif - /* === Solve LZ=B to obtain z(i) and u(i) === */ - /* --- Row 1 --- */ - /* B(0) = 0, not computed */ - /* u(1,2) = a(1,2)/a(1,1) = a(1,2) */ - if(type == ARM_SPLINE_NATURAL) - u[0] = 0; // a(1,2) = 0 - else if(type == ARM_SPLINE_PARABOLIC_RUNOUT) - u[0] = -1; // a(1,2) = -1 - - z[0] = 0; // z(1) = B(1)/a(1,1) = 0 - - /* --- Rows 2 to N-1 (N=n+1) --- */ - hm1 = x[1] - x[0]; // x(2)-x(1) - - for (i=1; i= 0; --j) - c[j] = z[j] - u[j] * c[j + 1]; - - /* === Compute b(i) and d(i) from c(i) and create output for x(i) 4 ) { @@ -300,14 +221,14 @@ void arm_spline_f32( { x_sc = *pXq++; - *pDst = y[i]+bi*(x_sc-x[i])+c[i]*(x_sc-x[i])*(x_sc-x[i])+di*(x_sc-x[i])*(x_sc-x[i])*(x_sc-x[i]); + *pDst = y[i]+b[i]*(x_sc-x[i])+c[i]*(x_sc-x[i])*(x_sc-x[i])+d[i]*(x_sc-x[i])*(x_sc-x[i])*(x_sc-x[i]); pDst++; blkCnt--; } } - /* == Create output for remaining samples (x>=x(n)) == */ + /* Create output for remaining samples (x>=x(n)) */ #ifdef ARM_MATH_NEON /* Compute 4 outputs at a time */ blkCnt2 = blkCnt >> 2; @@ -350,12 +271,11 @@ void arm_spline_f32( { x_sc = *pXq++; - *pDst = y[i-1]+bi*(x_sc-x[i-1])+c[i]*(x_sc-x[i-1])*(x_sc-x[i-1])+di*(x_sc-x[i-1])*(x_sc-x[i-1])*(x_sc-x[i-1]); + *pDst = y[i-1]+b[i-1]*(x_sc-x[i-1])+c[i-1]*(x_sc-x[i-1])*(x_sc-x[i-1])+d[i-1]*(x_sc-x[i-1])*(x_sc-x[i-1])*(x_sc-x[i-1]); pDst++; blkCnt2--; } - } /** diff --git a/Source/SupportFunctions/arm_spline_interp_init_f32.c b/Source/SupportFunctions/arm_spline_interp_init_f32.c index ed7cc812..eeb1b9df 100644 --- a/Source/SupportFunctions/arm_spline_interp_init_f32.c +++ b/Source/SupportFunctions/arm_spline_interp_init_f32.c @@ -32,33 +32,141 @@ @ingroup groupSupport */ - /** @addtogroup SplineInterpolate @{ - */ + + @par Initialization function + + The initialization function takes as input two arrays that the user has to allocate: + coeffs will contain the b, c, and d coefficients for the (n-1) intervals + (n is the number of known points), hence its size must be 3*(n-1); tempBuffer + is temporally used for internal computations and its size is n+n-1. + + @par + + The x input array must be strictly sorted in ascending order and it must + not contain twice the same value (x(i) 1). - * @param[in] type type of cubic spline interpolation (boundary conditions) - * @param[out] buffer Temporary buffer - */ + * @brief Initialization function for the floating-point cubic spline interpolation. + * @param[in,out] S points to an instance of the floating-point spline structure. + * @param[in] type type of cubic spline interpolation (boundary conditions) + * @param[in] x points to the x values of the known data points. + * @param[in] y points to the y values of the known data points. + * @param[in] n number of known data points. + * @param[in] coeffs coefficients array for b, c, and d + * @param[in] tempBuffer buffer array for internal computations + * + */ void arm_spline_init_f32( - arm_spline_instance_f32 * S, - uint32_t n, - arm_spline_type type, - float32_t * buffer) + arm_spline_instance_f32 * S, + arm_spline_type type, + const float32_t * x, + const float32_t * y, + uint32_t n, + float32_t * coeffs, + float32_t * tempBuffer) { - S->n_x = n; - S->type = type; + S->x = x; + S->y = y; + S->n_x = n; + + /*** COEFFICIENTS COMPUTATION ***/ /* Type (boundary conditions): - * - Natural spline ( S1''(x1)=0 ; Sn''(xn)=0 ) - * - Parabolic runout spline ( S1''(x1)=S2''(x2) ; Sn-1''(xn-1)=Sn''(xn) ) - */ - S->buffer = buffer; + - Natural spline ( S1''(x1) = 0 ; Sn''(xn) = 0 ) + - Parabolic runout spline ( S1''(x1) = S2''(x2) ; Sn-1''(xn-1) = Sn''(xn) ) */ + + /* (n-1)-long buffers for b, c, and d coefficients */ + float32_t * b = coeffs; + float32_t * c = coeffs+(n-1); + float32_t * d = coeffs+(2*(n-1)); + + float32_t * u = tempBuffer; /* (n-1)-long scratch buffer for u elements */ + float32_t * z = tempBuffer+(n-1); /* n-long scratch buffer for z elements */ + + float32_t hi, hm1; /* h(i) and h(i-1) */ + float32_t Bi; /* B(i), i-th element of matrix B=LZ */ + float32_t li; /* l(i), i-th element of matrix L */ + float32_t cp1; /* Temporary value for c(i+1) */ + + int32_t i; /* Loop counter */ + + /* == Solve LZ=B to obtain z(i) and u(i) == */ + + /* -- Row 1 -- */ + /* B(0) = 0, not computed */ + /* u(1,2) = a(1,2)/a(1,1) = a(1,2) */ + if(type == ARM_SPLINE_NATURAL) + u[0] = 0; /* a(1,2) = 0 */ + else if(type == ARM_SPLINE_PARABOLIC_RUNOUT) + u[0] = -1; /* a(1,2) = -1 */ + + z[0] = 0; /* z(1) = B(1)/a(1,1) = 0 always */ + + /* -- Rows 2 to N-1 (N=n+1) -- */ + hm1 = x[1] - x[0]; /* Initialize h(i-1) = h(1) = x(2)-x(1) */ + + for (i=1; i<(int32_t)n-1; i++) + { + /* Compute B(i) */ + hi = x[i+1]-x[i]; + Bi = 3*(y[i+1]-y[i])/hi - 3*(y[i]-y[i-1])/hm1; + + /* l(i) = a(i)-a(i,i-1)*u(i-1) = 2[h(i-1)+h(i)]-h(i-1)*u(i-1) */ + li = 2*(hi+hm1) - hm1*u[i-1]; + + /* u(i) = a(i,i+1)/l(i) = h(i)/l(i) */ + u[i] = hi/li; + + /* z(i) = [B(i)-h(i-1)*z(i-1)]/l(i) */ + z[i] = (Bi-hm1*z[i-1])/li; + + /* Update h(i-1) for next iteration */ + hm1 = hi; + } + + /* -- Row N -- */ + /* l(N) = a(N,N)-a(N,N-1)u(N-1) */ + /* z(N) = [-a(N,N-1)z(N-1)]/l(N) */ + if(type == ARM_SPLINE_NATURAL) + { + /* li = 1; a(N,N) = 1; a(N,N-1) = 0 */ + z[n-1] = 0; /* a(N,N-1) = 0 */ + } + else if(type == ARM_SPLINE_PARABOLIC_RUNOUT) + { + li = 1+u[n-2]; /* a(N,N) = 1; a(N,N-1) = -1 */ + z[n-1] = z[n-2]/li; /* a(N,N-1) = -1 */ + } + + /* == Solve UX = Z to obtain c(i) and */ + /* compute b(i) and d(i) from c(i) == */ + + cp1 = z[n-1]; /* Initialize c(i+1) = c(N) = z(N) */ + + for (i=n-2; i>=0; i--) + { + /* c(i) = z(i)-u(i+1)c(i+1) */ + c[i] = z[i]-u[i]*cp1; + + hi = x[i+1]-x[i]; + /* b(i) = [y(i+1)-y(i)]/h(i)-h(i)*[c(i+1)+2*c(i)]/3 */ + b[i] = (y[i+1]-y[i])/hi-hi*(cp1+2*c[i])/3; + + /* d(i) = [c(i+1)-c(i)]/[3*h(i)] */ + d[i] = (cp1-c[i])/(3*hi); + + /* Update c(i+1) for next iteration */ + cp1 = c[i]; + } + + /* == Finally, store the coefficients in the instance == */ + + S->coeffs = coeffs; } /** diff --git a/Testing/Include/Tests/SupportTestsF32.h b/Testing/Include/Tests/SupportTestsF32.h index 0fe693cd..4bba8e73 100755 --- a/Testing/Include/Tests/SupportTestsF32.h +++ b/Testing/Include/Tests/SupportTestsF32.h @@ -15,7 +15,9 @@ class SupportTestsF32:public Client::Suite Client::Pattern inputX; Client::Pattern inputY; Client::Pattern outputX; - Client::Pattern buffer; + + Client::LocalPattern buffer; + Client::LocalPattern splineCoefs; Client::LocalPattern output; Client::LocalPattern outputQ15; diff --git a/Testing/Source/Tests/SupportTestsF32.cpp b/Testing/Source/Tests/SupportTestsF32.cpp index 25ddd6fb..6c347276 100755 --- a/Testing/Source/Tests/SupportTestsF32.cpp +++ b/Testing/Source/Tests/SupportTestsF32.cpp @@ -427,13 +427,15 @@ const float32_t *inpY = inputY.ptr(); const float32_t *outX = outputX.ptr(); float32_t *outp = output.ptr(); - float32_t *buf = buffer.ptr(); - buf=(float32_t*)malloc((3*4-1)*sizeof(float32_t)); - arm_spline_instance_f32 S; + float32_t *buf = buffer.ptr(); // ((2*4-1)*sizeof(float32_t)) + float32_t *coef = splineCoefs.ptr(); // ((3*(4-1))*sizeof(float32_t)) - arm_spline_init_f32(&S, 4, ARM_SPLINE_PARABOLIC_RUNOUT, buf); - arm_spline_f32(&S, inpX, inpY, outX, outp, 20); + arm_spline_instance_f32 S; + arm_spline_init_f32(&S, ARM_SPLINE_PARABOLIC_RUNOUT, inpX, inpY, 4, coef, buf); + arm_spline_f32(&S, outX, outp, 20); + ASSERT_EMPTY_TAIL(buffer); + ASSERT_EMPTY_TAIL(splineCoefs); ASSERT_EMPTY_TAIL(output); ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD); } @@ -444,13 +446,15 @@ const float32_t *inpY = inputY.ptr(); const float32_t *outX = outputX.ptr(); float32_t *outp = output.ptr(); - float32_t *buf = buffer.ptr(); - buf=(float32_t*)malloc((3*9-1)*sizeof(float32_t)); - arm_spline_instance_f32 S; + float32_t *buf = buffer.ptr(); // ((2*9-1)*sizeof(float32_t)) + float32_t *coef = splineCoefs.ptr(); // ((3*(9-1))*sizeof(float32_t)) - arm_spline_init_f32(&S, 9, ARM_SPLINE_NATURAL, buf); - arm_spline_f32(&S, inpX, inpY, outX, outp, 33); + arm_spline_instance_f32 S; + arm_spline_init_f32(&S, ARM_SPLINE_NATURAL, inpX, inpY, 9, coef, buf); + arm_spline_f32(&S, outX, outp, 33); + ASSERT_EMPTY_TAIL(buffer); + ASSERT_EMPTY_TAIL(splineCoefs); ASSERT_EMPTY_TAIL(output); ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD); } @@ -461,13 +465,15 @@ const float32_t *inpY = inputY.ptr(); const float32_t *outX = outputX.ptr(); float32_t *outp = output.ptr(); - float32_t *buf = buffer.ptr(); - buf=(float32_t*)malloc((3*3-1)*sizeof(float32_t)); - arm_spline_instance_f32 S; + float32_t *buf = buffer.ptr(); // ((2*3-1)*sizeof(float32_t)) + float32_t *coef = splineCoefs.ptr(); // ((3*(3-1))*sizeof(float32_t)) - arm_spline_init_f32(&S, 3, ARM_SPLINE_PARABOLIC_RUNOUT, buf); - arm_spline_f32(&S, inpX, inpY, outX, outp, 30); + arm_spline_instance_f32 S; + arm_spline_init_f32(&S, ARM_SPLINE_PARABOLIC_RUNOUT, inpX, inpY, 3, coef, buf); + arm_spline_f32(&S, outX, outp, 30); + ASSERT_EMPTY_TAIL(buffer); + ASSERT_EMPTY_TAIL(splineCoefs); ASSERT_EMPTY_TAIL(output); ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD); } @@ -779,6 +785,8 @@ inputY.reload(SupportTestsF32::INPUT_SPLINE_SQU_Y_F32_ID,mgr,4); outputX.reload(SupportTestsF32::OUTPUT_SPLINE_SQU_X_F32_ID,mgr,20); ref.reload(SupportTestsF32::REF_SPLINE_SQU_F32_ID,mgr,20); + splineCoefs.create(3*(4-1),SupportTestsF32::COEFS_SPLINE_F32_ID,mgr); + buffer.create(2*4-1,SupportTestsF32::TEMP_SPLINE_F32_ID,mgr); output.create(20,SupportTestsF32::OUT_F32_ID,mgr); break; @@ -787,6 +795,8 @@ inputY.reload(SupportTestsF32::INPUT_SPLINE_SIN_Y_F32_ID,mgr,9); outputX.reload(SupportTestsF32::OUTPUT_SPLINE_SIN_X_F32_ID,mgr,33); ref.reload(SupportTestsF32::REF_SPLINE_SIN_F32_ID,mgr,33); + splineCoefs.create(3*(9-1),SupportTestsF32::COEFS_SPLINE_F32_ID,mgr); + buffer.create(2*9-1,SupportTestsF32::TEMP_SPLINE_F32_ID,mgr); output.create(33,SupportTestsF32::OUT_F32_ID,mgr); break; @@ -795,6 +805,8 @@ inputY.reload(SupportTestsF32::INPUT_SPLINE_RAM_Y_F32_ID,mgr,3); outputX.reload(SupportTestsF32::OUTPUT_SPLINE_RAM_X_F32_ID,mgr,30); ref.reload(SupportTestsF32::REF_SPLINE_RAM_F32_ID,mgr,30); + splineCoefs.create(3*(3-1),SupportTestsF32::COEFS_SPLINE_F32_ID,mgr); + buffer.create(2*3-1,SupportTestsF32::TEMP_SPLINE_F32_ID,mgr); output.create(30,SupportTestsF32::OUT_F32_ID,mgr); break; diff --git a/Testing/desc.txt b/Testing/desc.txt index 1fd36029..3bec16f0 100644 --- a/Testing/desc.txt +++ b/Testing/desc.txt @@ -313,6 +313,8 @@ group Root { Pattern SAMPLES_Q7_ID : Samples5_q7.txt Output OUT_F32_ID : Output + Output COEFS_SPLINE_F32_ID : SplineCoefs + Output TEMP_SPLINE_F32_ID : SplineTemp Functions { test_weighted_sum_f32 nb=3:test_weighted_sum_f32