CMSIS-DSP: Modified spline f32 to compute coeffs at init time

- Moved coefficients computation to init function - Improved documentation
6 years ago · 1d51b2ba00
parent 1c6bd2c3d6
commit 1d51b2ba00
6 changed files with 194 additions and 145 deletions
--- a/Include/arm_math.h
+++ b/Include/arm_math.h
@ -2357,39 +2357,44 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den)
   */
  typedef struct
  {
-    uint32_t n_x;              /**< Number of known data points */
    arm_spline_type type;      /**< Type (boundary conditions) */
-    float32_t * buffer;
+    const float32_t * x;       /**< x values */
+    const float32_t * y;       /**< y values */
+    uint32_t n_x;              /**< Number of known data points */
+    float32_t * coeffs;        /**< Coefficients buffer (b,c, and d) */
  } arm_spline_instance_f32;

  /**
   * @brief Processing function for the floating-point cubic spline interpolation.
   * @param[in]  S          points to an instance of the floating-point spline structure.
-   * @param[in]  x          points to the x values of the known data points.
-   * @param[in]  y          points to the y values of the known data points.
   * @param[in]  xq         points to the x values ot the interpolated data points.
   * @param[out] pDst       points to the block of output data.
   * @param[in]  blockSize  number of samples of output data.
   */
  void arm_spline_f32(
-          arm_spline_instance_f32 * S,
-    const float32_t * x,
-    const float32_t * y,
-    const float32_t * xq,
-          float32_t * pDst,
-          uint32_t blockSize);
+        arm_spline_instance_f32 * S, 
+  const float32_t * xq,
+        float32_t * pDst,
+        uint32_t blockSize);

  /**
   * @brief Initialization function for the floating-point cubic spline interpolation.
   * @param[in,out] S        points to an instance of the floating-point spline structure.
-   * @param[in]     n        number of known data points.
   * @param[in]     type     type of cubic spline interpolation (boundary conditions)
+   * @param[in]     x        points to the x values of the known data points.
+   * @param[in]     y        points to the y values of the known data points.
+   * @param[in]     n        number of known data points.
+   * @param[in]     coeffs   coefficients array for b, c, and d
+   * @param[in]     tempBuffer   buffer array for internal computations
   */
  void arm_spline_init_f32(
-    arm_spline_instance_f32 * S,
-    uint32_t n,
-    arm_spline_type type,
-    float32_t * buffer);
+          arm_spline_instance_f32 * S,
+          arm_spline_type type,
+    const float32_t * x,
+    const float32_t * y,
+          uint32_t n, 
+          float32_t * coeffs,
+          float32_t * tempBuffer);

  /**
   * @brief Instance structure for the floating-point matrix structure.
--- a/Source/SupportFunctions/arm_spline_interp_f32.c
+++ b/Source/SupportFunctions/arm_spline_interp_f32.c
@ -28,20 +28,10 @@

 #include "arm_math.h"

-/* 
-
-Temporary fix because some arrays are defined on the stack.
-They should be passed as additional arguments to the function.
-
-*/
-#define MAX_DATA_POINTS 40
-
 /**
  @ingroup groupSupport
 */

-
-
 /**
  @defgroup SplineInterpolate Cubic Spline Interpolation
 
@ -130,13 +120,8 @@ They should be passed as additional arguments to the function.
  - b(i) = [y(i+1)-y(i)]/h(i)-h(i)*[c(i+1)+2*c(i)]/3 
  - d(i) = [c(i+1)-c(i)]/[3*h(i)] 
  Moreover, a(i)=y(i).
-  
-  @par Usage

-  The x input array must be strictly sorted in ascending order and it must
-  not contain twice the same value (x(i)<x(i+1)).
- 
-  @par
+ @par Behaviour outside the given intervals

  It is possible to compute the interpolated vector for x values outside the 
  input range (xq<x(1); xq>x(n)). The coefficients used to compute the y values for
@ -149,45 +134,35 @@ They should be passed as additional arguments to the function.
  @addtogroup SplineInterpolate
  @{
 */
+
 /**
 * @brief Processing function for the floating-point cubic spline interpolation.
 * @param[in]  S          points to an instance of the floating-point spline structure.
- * @param[in]  x          points to the x values of the known data points.
- * @param[in]  y          points to the y values of the known data points.
 * @param[in]  xq         points to the x values ot the interpolated data points.
 * @param[out] pDst       points to the block of output data.
 * @param[in]  blockSize  number of samples of output data.
 */

-
-
 void arm_spline_f32(
        arm_spline_instance_f32 * S, 
-  const float32_t * x,
-  const float32_t * y,
  const float32_t * xq,
        float32_t * pDst,
        uint32_t blockSize)
 {
+    const float32_t * x = S->x;
+    const float32_t * y = S->y;
    int32_t n = S->n_x;
-    arm_spline_type type = S->type;
-
-    float32_t hi, hm1;
-    float32_t * u = (S->buffer);     /* (n-1)-long buffer for u elements */
-    float32_t * z = (S->buffer)+(n-1);   /* n-long buffer for z elements */
-    float32_t * c = (S->buffer)+(n+n-1); /* n-long buffer for c elements */
-    float32_t Bi, li;
-    float32_t bi, di;
-    float32_t x_sc;

-    bi = 0.0f;
-    di = 0.0f;
+    /* Coefficients (a==y for i<=n-1) */
+    float32_t * b = (S->coeffs);
+    float32_t * c = (S->coeffs)+(n-1);
+    float32_t * d = (S->coeffs)+(2*(n-1));    

    const float32_t * pXq = xq;
-
    int32_t blkCnt = (int32_t)blockSize;
    int32_t blkCnt2;
-    int32_t i, j;
+    int32_t i;
+    float32_t x_sc;

 #ifdef ARM_MATH_NEON
    float32x4_t xiv;
@ -203,70 +178,16 @@ void arm_spline_f32(
    float32x4_t yv;
 #endif

-    /* === Solve LZ=B to obtain z(i) and u(i) === */
-    /* --- Row 1 --- */
-    /* B(0) = 0, not computed */
-    /* u(1,2) = a(1,2)/a(1,1) = a(1,2) */
-    if(type == ARM_SPLINE_NATURAL)
-        u[0] = 0;  // a(1,2) = 0
-    else if(type == ARM_SPLINE_PARABOLIC_RUNOUT)
-        u[0] = -1; // a(1,2) = -1
-
-    z[0] = 0;  // z(1) = B(1)/a(1,1) = 0
-
-    /* --- Rows 2 to N-1 (N=n+1) --- */
-    hm1 = x[1] - x[0]; // x(2)-x(1)
-
-    for (i=1; i<n-1; i++)
-    {
-        /* Compute B(i) */
-        hi = x[i+1]-x[i];
-        Bi = 3*(y[i+1]-y[i])/hi - 3*(y[i]-y[i-1])/hm1;
-        /* l(i) = a(ii)-a(i,i-1)*u(i-1) = 2[h(i-1)+h(i)]-h(i-1)*u(i-1) */
-        li = 2*(hi+hm1) - hm1*u[i-1];
-        /* u(i) = a(i,i+1)/l(i) = h(i)/l(i) */
-        u[i] = hi/li;
-        /* z(i) = [B(i)-h(i-1)*z(i-1)]/l(i) */
-        z[i] = (Bi-hm1*z[i-1])/li;
-        /* Update h(i-1) */
-        hm1 = hi;
-    }
-
-    /* --- Row N --- */
-    /* l(N) = a(N,N)-a(N,N-1)u(N-1) */
-    /* z(N) = [-a(N,N-1)z(N-1)]/l(N) */
-    if(type == ARM_SPLINE_NATURAL)
-    {
-        //li = 1;   // a(N,N)=1; a(N,N-1)=0
-        z[n-1] = 0; // a(N,N-1)=0
-    }
-    else if(type == ARM_SPLINE_PARABOLIC_RUNOUT)
-    {
-        li = 1+u[n-2];      // a(N,N)=1; a(N,N-1)=-1
-        z[n-1] = z[n-2]/li; // a(N,N-1)=-1
-    }
-
-    /* === Solve UX = Z to obtain c(i) === */
-    /* c(N) = z(N) */
-    c[n-1] = z[n-1]; 
-    /* c(i) = z(i)-u(i+1)c(i+1) */
-    for (j = n-1-1; j >= 0; --j) 
-        c[j] = z[j] - u[j] * c[j + 1];
-
-    /* === Compute b(i) and d(i) from c(i) and create output for x(i)<x<x(i+1) === */
+    /* Create output for x(i)<x<x(i+1) */
    for (i=0; i<n-1; i++)
    {
-        hi = x[i+1]-x[i];
-        bi = (y[i+1]-y[i])/hi-hi*(c[i+1]+2*c[i])/3;
-        di = (c[i+1]-c[i])/(3*hi);
-
 #ifdef ARM_MATH_NEON
        xiv = vdupq_n_f32(x[i]);

        aiv = vdupq_n_f32(y[i]);
-        biv = vdupq_n_f32(bi);
+        biv = vdupq_n_f32(b[i]);
        civ = vdupq_n_f32(c[i]);
-        div = vdupq_n_f32(di);
+        div = vdupq_n_f32(d[i]);

        while( *(pXq+4) <= x[i+1] && blkCnt > 4 )
        {
@ -300,14 +221,14 @@ void arm_spline_f32(
        {
            x_sc = *pXq++;

-            *pDst = y[i]+bi*(x_sc-x[i])+c[i]*(x_sc-x[i])*(x_sc-x[i])+di*(x_sc-x[i])*(x_sc-x[i])*(x_sc-x[i]);
+            *pDst = y[i]+b[i]*(x_sc-x[i])+c[i]*(x_sc-x[i])*(x_sc-x[i])+d[i]*(x_sc-x[i])*(x_sc-x[i])*(x_sc-x[i]);

            pDst++;
            blkCnt--;
        }
    }

-    /* == Create output for remaining samples (x>=x(n)) == */
+    /* Create output for remaining samples (x>=x(n)) */
 #ifdef ARM_MATH_NEON
    /* Compute 4 outputs at a time */
    blkCnt2 = blkCnt >> 2;
@ -350,12 +271,11 @@ void arm_spline_f32(
    { 
        x_sc = *pXq++; 
  
-        *pDst = y[i-1]+bi*(x_sc-x[i-1])+c[i]*(x_sc-x[i-1])*(x_sc-x[i-1])+di*(x_sc-x[i-1])*(x_sc-x[i-1])*(x_sc-x[i-1]);
+        *pDst = y[i-1]+b[i-1]*(x_sc-x[i-1])+c[i-1]*(x_sc-x[i-1])*(x_sc-x[i-1])+d[i-1]*(x_sc-x[i-1])*(x_sc-x[i-1])*(x_sc-x[i-1]);
 
        pDst++; 
        blkCnt2--;   
    }   
-                                                       
 }

 /**
--- a/Source/SupportFunctions/arm_spline_interp_init_f32.c
+++ b/Source/SupportFunctions/arm_spline_interp_init_f32.c
@ -32,33 +32,141 @@
  @ingroup groupSupport
 */

-
 /**
  @addtogroup SplineInterpolate
  @{
- */
+
+  @par Initialization function
+
+  The initialization function takes as input two arrays that the user has to allocate:
+  <code>coeffs</code> will contain the b, c, and d coefficients for the (n-1) intervals 
+  (n is the number of known points), hence its size must be 3*(n-1); <code>tempBuffer</code>
+  is temporally used for internal computations and its size is n+n-1.
+
+  @par
+
+  The x input array must be strictly sorted in ascending order and it must
+  not contain twice the same value (x(i)<x(i+1)).
+ 
+*/

 /**
-  * @brief Initialization function for the floating-point cubic spline interpolation.
-  * @param[in,out] S        points to an instance of the floating-point spline structure.
-  * @param[in]     n        number of known data points (must > 1).
-  * @param[in]     type     type of cubic spline interpolation (boundary conditions)
-  * @param[out]    buffer   Temporary buffer
-  */
+ * @brief Initialization function for the floating-point cubic spline interpolation.
+ * @param[in,out] S           points to an instance of the floating-point spline structure.
+ * @param[in]     type        type of cubic spline interpolation (boundary conditions)
+ * @param[in]     x           points to the x values of the known data points.
+ * @param[in]     y           points to the y values of the known data points.
+ * @param[in]     n           number of known data points.
+ * @param[in]     coeffs      coefficients array for b, c, and d
+ * @param[in]     tempBuffer  buffer array for internal computations
+ *
+ */

 void arm_spline_init_f32(
-    arm_spline_instance_f32 * S, 
-    uint32_t n, 
-    arm_spline_type type,
-    float32_t * buffer)
+        arm_spline_instance_f32 * S,
+        arm_spline_type type,
+  const float32_t * x,
+  const float32_t * y,
+        uint32_t n, 
+        float32_t * coeffs,
+        float32_t * tempBuffer)
 {
-    S->n_x  = n;
-    S->type = type;
+    S->x = x;
+    S->y = y;
+    S->n_x = n;
+
+    /*** COEFFICIENTS COMPUTATION ***/
    /* Type (boundary conditions):
-     *  - Natural spline          ( S1''(x1)=0 ; Sn''(xn)=0 )
-     *  - Parabolic runout spline ( S1''(x1)=S2''(x2) ; Sn-1''(xn-1)=Sn''(xn) )
-     */
-    S->buffer = buffer;
+        - Natural spline          ( S1''(x1) = 0 ; Sn''(xn) = 0 )
+        - Parabolic runout spline ( S1''(x1) = S2''(x2) ; Sn-1''(xn-1) = Sn''(xn) ) */
+    
+    /* (n-1)-long buffers for b, c, and d coefficients */
+    float32_t * b = coeffs;
+    float32_t * c = coeffs+(n-1);
+    float32_t * d = coeffs+(2*(n-1));    
+
+    float32_t * u = tempBuffer;   /* (n-1)-long scratch buffer for u elements */
+    float32_t * z = tempBuffer+(n-1); /* n-long scratch buffer for z elements */
+
+    float32_t hi, hm1; /* h(i) and h(i-1) */
+    float32_t Bi; /* B(i), i-th element of matrix B=LZ */
+    float32_t li; /* l(i), i-th element of matrix L    */
+    float32_t cp1; /* Temporary value for c(i+1) */
+
+    int32_t i; /* Loop counter */
+
+    /* == Solve LZ=B to obtain z(i) and u(i) == */
+
+    /* -- Row 1 -- */
+    /* B(0) = 0, not computed */
+    /* u(1,2) = a(1,2)/a(1,1) = a(1,2) */
+    if(type == ARM_SPLINE_NATURAL)
+        u[0] = 0;  /* a(1,2) = 0 */
+    else if(type == ARM_SPLINE_PARABOLIC_RUNOUT)
+        u[0] = -1; /* a(1,2) = -1 */
+
+    z[0] = 0;  /* z(1) = B(1)/a(1,1) = 0 always */
+
+    /* -- Rows 2 to N-1 (N=n+1) -- */
+    hm1 = x[1] - x[0]; /* Initialize h(i-1) = h(1) = x(2)-x(1) */
+
+    for (i=1; i<(int32_t)n-1; i++)
+    {
+        /* Compute B(i) */
+        hi = x[i+1]-x[i];
+        Bi = 3*(y[i+1]-y[i])/hi - 3*(y[i]-y[i-1])/hm1;
+
+        /* l(i) = a(i)-a(i,i-1)*u(i-1) = 2[h(i-1)+h(i)]-h(i-1)*u(i-1) */
+        li = 2*(hi+hm1) - hm1*u[i-1];
+
+        /* u(i) = a(i,i+1)/l(i) = h(i)/l(i) */
+        u[i] = hi/li;
+
+        /* z(i) = [B(i)-h(i-1)*z(i-1)]/l(i) */
+        z[i] = (Bi-hm1*z[i-1])/li;
+
+        /* Update h(i-1) for next iteration */
+        hm1 = hi;
+    }
+
+    /* -- Row N -- */
+    /* l(N) = a(N,N)-a(N,N-1)u(N-1) */
+    /* z(N) = [-a(N,N-1)z(N-1)]/l(N) */
+    if(type == ARM_SPLINE_NATURAL)
+    {
+        /* li = 1;     a(N,N) = 1; a(N,N-1) = 0 */
+        z[n-1] = 0; /* a(N,N-1) = 0 */
+    }
+    else if(type == ARM_SPLINE_PARABOLIC_RUNOUT)
+    {
+        li = 1+u[n-2];      /* a(N,N) = 1; a(N,N-1) = -1 */
+        z[n-1] = z[n-2]/li; /* a(N,N-1) = -1 */
+    }
+
+    /* == Solve UX = Z to obtain c(i) and    */
+    /*    compute b(i) and d(i) from c(i) == */
+
+    cp1 = z[n-1]; /* Initialize c(i+1) = c(N) = z(N) */
+
+    for (i=n-2; i>=0; i--) 
+    {
+        /* c(i) = z(i)-u(i+1)c(i+1) */
+        c[i] = z[i]-u[i]*cp1;
+
+        hi = x[i+1]-x[i];
+        /* b(i) = [y(i+1)-y(i)]/h(i)-h(i)*[c(i+1)+2*c(i)]/3 */
+        b[i] = (y[i+1]-y[i])/hi-hi*(cp1+2*c[i])/3;
+
+        /* d(i) = [c(i+1)-c(i)]/[3*h(i)] */
+        d[i] = (cp1-c[i])/(3*hi);
+
+        /* Update c(i+1) for next iteration */
+        cp1 = c[i];
+    }
+
+    /* == Finally, store the coefficients in the instance == */
+
+    S->coeffs = coeffs;
 }

 /**
--- a/Testing/Include/Tests/SupportTestsF32.h
+++ b/Testing/Include/Tests/SupportTestsF32.h
@ -15,7 +15,9 @@ class SupportTestsF32:public Client::Suite
            Client::Pattern<float32_t> inputX;
            Client::Pattern<float32_t> inputY;
            Client::Pattern<float32_t> outputX;
-            Client::Pattern<float32_t> buffer;
+
+            Client::LocalPattern<float32_t> buffer;
+            Client::LocalPattern<float32_t> splineCoefs;

            Client::LocalPattern<float32_t> output;
            Client::LocalPattern<q15_t> outputQ15;
--- a/Testing/Source/Tests/SupportTestsF32.cpp
+++ b/Testing/Source/Tests/SupportTestsF32.cpp
@ -427,13 +427,15 @@
       const float32_t *inpY = inputY.ptr();
       const float32_t *outX = outputX.ptr();
       float32_t *outp = output.ptr();
-       float32_t *buf = buffer.ptr();
-       buf=(float32_t*)malloc((3*4-1)*sizeof(float32_t));
-       arm_spline_instance_f32 S;
+       float32_t *buf = buffer.ptr();       // ((2*4-1)*sizeof(float32_t))
+       float32_t *coef = splineCoefs.ptr(); // ((3*(4-1))*sizeof(float32_t))

-       arm_spline_init_f32(&S, 4, ARM_SPLINE_PARABOLIC_RUNOUT, buf);
-       arm_spline_f32(&S, inpX, inpY, outX, outp, 20);
+       arm_spline_instance_f32 S;
+       arm_spline_init_f32(&S, ARM_SPLINE_PARABOLIC_RUNOUT, inpX, inpY, 4, coef, buf);
+       arm_spline_f32(&S, outX, outp, 20);

+       ASSERT_EMPTY_TAIL(buffer);
+       ASSERT_EMPTY_TAIL(splineCoefs);
       ASSERT_EMPTY_TAIL(output);
       ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
    } 
@ -444,13 +446,15 @@
       const float32_t *inpY = inputY.ptr();
       const float32_t *outX = outputX.ptr();
       float32_t *outp = output.ptr();
-       float32_t *buf = buffer.ptr();
-       buf=(float32_t*)malloc((3*9-1)*sizeof(float32_t));
-       arm_spline_instance_f32 S;
+       float32_t *buf = buffer.ptr(); // ((2*9-1)*sizeof(float32_t))
+       float32_t *coef = splineCoefs.ptr(); // ((3*(9-1))*sizeof(float32_t))

-       arm_spline_init_f32(&S, 9, ARM_SPLINE_NATURAL, buf);
-       arm_spline_f32(&S, inpX, inpY, outX, outp, 33);
+       arm_spline_instance_f32 S;
+       arm_spline_init_f32(&S, ARM_SPLINE_NATURAL, inpX, inpY, 9, coef, buf);
+       arm_spline_f32(&S, outX, outp, 33);

+       ASSERT_EMPTY_TAIL(buffer);
+       ASSERT_EMPTY_TAIL(splineCoefs);
       ASSERT_EMPTY_TAIL(output);
       ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
    } 
@ -461,13 +465,15 @@
       const float32_t *inpY = inputY.ptr();
       const float32_t *outX = outputX.ptr();
       float32_t *outp = output.ptr();
-       float32_t *buf = buffer.ptr();
-       buf=(float32_t*)malloc((3*3-1)*sizeof(float32_t));
-       arm_spline_instance_f32 S;
+       float32_t *buf = buffer.ptr(); // ((2*3-1)*sizeof(float32_t))
+       float32_t *coef = splineCoefs.ptr(); // ((3*(3-1))*sizeof(float32_t))

-       arm_spline_init_f32(&S, 3, ARM_SPLINE_PARABOLIC_RUNOUT, buf);
-       arm_spline_f32(&S, inpX, inpY, outX, outp, 30);
+       arm_spline_instance_f32 S;
+       arm_spline_init_f32(&S, ARM_SPLINE_PARABOLIC_RUNOUT, inpX, inpY, 3, coef, buf);
+       arm_spline_f32(&S, outX, outp, 30);

+       ASSERT_EMPTY_TAIL(buffer);
+       ASSERT_EMPTY_TAIL(splineCoefs);
       ASSERT_EMPTY_TAIL(output);
       ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
    } 
@ -779,6 +785,8 @@
 	      inputY.reload(SupportTestsF32::INPUT_SPLINE_SQU_Y_F32_ID,mgr,4);
 	      outputX.reload(SupportTestsF32::OUTPUT_SPLINE_SQU_X_F32_ID,mgr,20);
 	      ref.reload(SupportTestsF32::REF_SPLINE_SQU_F32_ID,mgr,20);
+          splineCoefs.create(3*(4-1),SupportTestsF32::COEFS_SPLINE_F32_ID,mgr);
+          buffer.create(2*4-1,SupportTestsF32::TEMP_SPLINE_F32_ID,mgr);
 	      output.create(20,SupportTestsF32::OUT_F32_ID,mgr);
 	    break;

@ -787,6 +795,8 @@
 	      inputY.reload(SupportTestsF32::INPUT_SPLINE_SIN_Y_F32_ID,mgr,9);
 	      outputX.reload(SupportTestsF32::OUTPUT_SPLINE_SIN_X_F32_ID,mgr,33);
 	      ref.reload(SupportTestsF32::REF_SPLINE_SIN_F32_ID,mgr,33);
+          splineCoefs.create(3*(9-1),SupportTestsF32::COEFS_SPLINE_F32_ID,mgr);
+          buffer.create(2*9-1,SupportTestsF32::TEMP_SPLINE_F32_ID,mgr);
 	      output.create(33,SupportTestsF32::OUT_F32_ID,mgr);
 	    break;

@ -795,6 +805,8 @@
 	      inputY.reload(SupportTestsF32::INPUT_SPLINE_RAM_Y_F32_ID,mgr,3);
 	      outputX.reload(SupportTestsF32::OUTPUT_SPLINE_RAM_X_F32_ID,mgr,30);
 	      ref.reload(SupportTestsF32::REF_SPLINE_RAM_F32_ID,mgr,30);
+          splineCoefs.create(3*(3-1),SupportTestsF32::COEFS_SPLINE_F32_ID,mgr);
+          buffer.create(2*3-1,SupportTestsF32::TEMP_SPLINE_F32_ID,mgr);
 	      output.create(30,SupportTestsF32::OUT_F32_ID,mgr);
 	    break;

--- a/Testing/desc.txt
+++ b/Testing/desc.txt
@ -313,6 +313,8 @@ group Root {
              Pattern SAMPLES_Q7_ID : Samples5_q7.txt 

              Output  OUT_F32_ID : Output
+              Output  COEFS_SPLINE_F32_ID : SplineCoefs
+              Output  TEMP_SPLINE_F32_ID : SplineTemp

              Functions {
                test_weighted_sum_f32 nb=3:test_weighted_sum_f32