Corrected spline interpolation and merge sort

6 years ago · 169877344c
parent 76153d838d
commit 169877344c
9 changed files with 154 additions and 98 deletions
--- a/Include/arm_math.h
+++ b/Include/arm_math.h
@ -2264,11 +2264,9 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den)
             /**< Heap sort      */
    ARM_SORT_INSERTION = 3,
             /**< Insertion sort */
-    ARM_SORT_MERGE     = 4,
+    ARM_SORT_QUICK     = 4,
             /**< Merge sort     */
    ARM_SORT_QUICK     = 5,
             /**< Quick sort     */
-    ARM_SORT_SELECTION = 6
+    ARM_SORT_SELECTION = 5
             /**< Selection sort */
  } arm_sort_alg;
@ -2314,6 +2312,37 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den)
    arm_sort_alg alg, 
    arm_sort_dir dir); 
  /**
   * @brief Instance structure for the sorting algorithms.
   */
  typedef struct            
  {
    arm_sort_dir dir;        /**< Sorting order (direction)  */
    float32_t * buffer;      /**< Working buffer */
  } arm_merge_sort_instance_f32;  
  /**
   * @param[in]      S          points to an instance of the sorting structure.
   * @param[in,out]  pSrc       points to the block of input data.
   * @param[out]     pDst       points to the block of output data
   * @param[in]      blockSize  number of samples to process.
   */
  void arm_merge_sort_f32(
    const arm_merge_sort_instance_f32 * S,
          float32_t *pSrc,
          float32_t *pDst,
          uint32_t blockSize);
  /**
   * @param[in,out]  S            points to an instance of the sorting structure.
   * @param[in]      dir          Sorting order.
   * @param[in]      buffer       Working buffer.
   */
  void arm_merge_sort_init_f32(
    arm_merge_sort_instance_f32 * S,
    arm_sort_dir dir,
    float32_t * buffer);
  /**
   * @brief Struct for specifying cubic spline type
   */
@ -2330,6 +2359,7 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den)
  {
    uint32_t n_x;              /**< Number of known data points */
    arm_spline_type type;      /**< Type (boundary conditions) */
    float32_t * buffer;
  } arm_spline_instance_f32;
  /**
@ -2347,7 +2377,7 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den)
    const float32_t * y,
    const float32_t * xq,
          float32_t * pDst,
-	  uint32_t blockSize);
+          uint32_t blockSize);
  /**
   * @brief Initialization function for the floating-point cubic spline interpolation.
@ -2358,7 +2388,8 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den)
  void arm_spline_init_f32(
    arm_spline_instance_f32 * S,
    uint32_t n,
-    arm_spline_type type);
+    arm_spline_type type,
    float32_t * buffer);
  /**
   * @brief Instance structure for the floating-point matrix structure.
--- a/PrivateInclude/arm_sorting.h
+++ b/PrivateInclude/arm_sorting.h
@ -68,18 +68,6 @@ extern "C"
          float32_t* pDst, 
    uint32_t blockSize);
  /**
   * @param[in]  S          points to an instance of the sorting structure.
   * @param[in]  pSrc       points to the block of input data.
   * @param[out] pDst       points to the block of output data
   * @param[in]  blockSize  number of samples to process.
   */
  void arm_merge_sort_f32(
    const arm_sort_instance_f32 * S, 
          float32_t *pSrc, 
          float32_t *pDst, 
    uint32_t blockSize);
  /**
   * @param[in]  S          points to an instance of the sorting structure.
   * @param[in]  pSrc       points to the block of input data.
--- a/Source/SupportFunctions/arm_merge_sort_f32.c
+++ b/Source/SupportFunctions/arm_merge_sort_f32.c
@ -92,24 +92,35 @@ static void arm_merge_sort_core_f32(float32_t * pB, uint32_t begin, uint32_t end
   *               divide the input array in sublists and merge them to produce
   *               longer sorted sublists until there is only one list remaining.
   *
-   * @par          A work array is always needed, hence pSrc and pDst cannot be
+   * @par          A work array is always needed. It must be allocated by the user 
-   *               equal and the results will be stored in pDst.
+   *               linked to the instance at initialization time.
   *
   * @par          It's an in-place algorithm. In order to obtain an out-of-place
   *               function, a memcpy of the source vector is performed
   */
 void arm_merge_sort_f32(
-  const arm_sort_instance_f32 * S, 
+  const arm_merge_sort_instance_f32 * S, 
        float32_t *pSrc, 
        float32_t *pDst, 
        uint32_t blockSize)
 {
-    // A work array is needed: pDst
+    float32_t * pA;
    if(pSrc != pDst) // out-of-place only
    {
 	memcpy(pDst, pSrc, blockSize*sizeof(float32_t));
-	arm_merge_sort_core_f32(pSrc, 0, blockSize, pDst, S->dir);
+    /* Out-of-place */ 
    if(pSrc != pDst)
    {
        memcpy(pDst, pSrc, blockSize*sizeof(float32_t));
        pA = pDst;
    }
    else
        pA = pSrc;
    /* A working buffer is needed */
    memcpy(S->buffer, pSrc, blockSize*sizeof(float32_t));
    arm_merge_sort_core_f32(S->buffer, 0, blockSize, pA, S->dir);
 }
 /**
  @} end of Sorting group
--- a/Source/SupportFunctions/arm_merge_sort_init_f32.c
+++ b/Source/SupportFunctions/arm_merge_sort_init_f32.c
@ -0,0 +1,35 @@
 /* ----------------------------------------------------------------------
 * Project:      CMSIS DSP Library
 * Title:        arm_merge_sort_init_f32.c
 * Description:  Floating point merge sort initialization function
 *
 * $Date:        2019
 * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M and Cortex-A cores
 * -------------------------------------------------------------------- */
 /*
 * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "arm_math.h"
 void arm_merge_sort_init_f32(arm_merge_sort_instance_f32 * S, arm_sort_dir dir, float32_t * buffer)
 {
    S->dir    = dir;
    S->buffer = buffer;
 }
--- a/Source/SupportFunctions/arm_sort_f32.c
+++ b/Source/SupportFunctions/arm_sort_f32.c
@ -66,10 +66,6 @@ void arm_sort_f32(
        arm_insertion_sort_f32(S, pSrc, pDst, blockSize);
        break;
        case ARM_SORT_MERGE:
        arm_merge_sort_f32(S, pSrc, pDst, blockSize);
        break;
        case ARM_SORT_QUICK:
        arm_quick_sort_f32(S, pSrc, pDst, blockSize);
        break;
--- a/Source/SupportFunctions/arm_spline_interp_f32.c
+++ b/Source/SupportFunctions/arm_spline_interp_f32.c
@ -163,28 +163,16 @@ void arm_spline_f32(
  const float32_t * y,
  const float32_t * xq,
        float32_t * pDst,
-	uint32_t blockSize)
+        uint32_t blockSize)
 {
    /* 
    As explained in arm_spline_interp_init_f32.c, this must be > 1
    */
    int32_t n = S->n_x;
    arm_spline_type type = S->type;
    float32_t hi, hm1;
-
+    float32_t * u = (S->buffer);     /* (n-1)-long buffer for u elements */
-    /* 
+    float32_t * z = (S->buffer)+(n-1);   /* n-long buffer for z elements */
-
+    float32_t * c = (S->buffer)+(n+n-1); /* n-long buffer for c elements */
    Temporary variables for system AX=B.
    This will be replaced by new arguments for the function.
     */
    float32_t u[MAX_DATA_POINTS], z[MAX_DATA_POINTS], c[MAX_DATA_POINTS];
    float32_t Bi, li;
    float32_t bi, di;
    float32_t x_sc;
@ -231,7 +219,7 @@ void arm_spline_f32(
        hi = x[i+1]-x[i];
        Bi = 3*(y[i+1]-y[i])/hi - 3*(y[i]-y[i-1])/hm1;
        /* l(i) = a(ii)-a(i,i-1)*u(i-1) = 2[h(i-1)+h(i)]-h(i-1)*u(i-1) */
-	li = 2*(hi+hm1) - hm1*u[i-1];
+        li = 2*(hi+hm1) - hm1*u[i-1];
        /* u(i) = a(i,i+1)/l(i) = h(i)/l(i) */
        u[i] = hi/li;
        /* z(i) = [B(i)-h(i-1)*z(i-1)]/l(i) */
@ -264,12 +252,12 @@ void arm_spline_f32(
    /* === Compute b(i) and d(i) from c(i) and create output for x(i)<x<x(i+1) === */
    for (i=0; i<n-1; i++)
    {
-	hi = x[i+1]-x[i];
+        hi = x[i+1]-x[i];
        bi = (y[i+1]-y[i])/hi-hi*(c[i+1]+2*c[i])/3;
        di = (c[i+1]-c[i])/(3*hi);
 #ifdef ARM_MATH_NEON
-	xiv = vdupq_n_f32(x[i]);
+        xiv = vdupq_n_f32(x[i]);
        aiv = vdupq_n_f32(y[i]);
        biv = vdupq_n_f32(bi);
@ -277,42 +265,42 @@ void arm_spline_f32(
        div = vdupq_n_f32(di);
        while( *(pXq+4) <= x[i+1] && blkCnt > 4 )
-	{
+        {
-	    /* Load [xq(k) xq(k+1) xq(k+2) xq(k+3)] */
+            /* Load [xq(k) xq(k+1) xq(k+2) xq(k+3)] */
            xqv = vld1q_f32(pXq);
-	    pXq+=4;
+            pXq+=4;
-	    /* Compute [xq(k)-x(i) xq(k+1)-x(i) xq(k+2)-x(i) xq(k+3)-x(i)] */
+            /* Compute [xq(k)-x(i) xq(k+1)-x(i) xq(k+2)-x(i) xq(k+3)-x(i)] */
            diff = vsubq_f32(xqv, xiv);
-	    temp = diff;
+            temp = diff;
-	    /* y(i) = a(i) + ... */
+            /* y(i) = a(i) + ... */
            yv = aiv;
-	    /* ... + b(i)*(x-x(i)) + ... */
+            /* ... + b(i)*(x-x(i)) + ... */
-	    yv = vmlaq_f32(yv, biv, temp);
+            yv = vmlaq_f32(yv, biv, temp);
-	    /* ... + c(i)*(x-x(i))^2 + ... */
+            /* ... + c(i)*(x-x(i))^2 + ... */
-	    temp = vmulq_f32(temp, diff);
+            temp = vmulq_f32(temp, diff);
-	    yv = vmlaq_f32(yv, civ, temp);
+            yv = vmlaq_f32(yv, civ, temp);
            /* ... + d(i)*(x-x(i))^3 */
-	    temp = vmulq_f32(temp, diff);
+            temp = vmulq_f32(temp, diff);
-	    yv = vmlaq_f32(yv, div, temp);
+            yv = vmlaq_f32(yv, div, temp);
            /* Store [y(k) y(k+1) y(k+2) y(k+3)] */
-	    vst1q_f32(pDst, yv);
+            vst1q_f32(pDst, yv);
-	    pDst+=4;
+            pDst+=4;
-	    blkCnt-=4;
+            blkCnt-=4;
-	}
+        }
 #endif
        while( *pXq <= x[i+1] && blkCnt > 0 )
-	{
+        {
-	    x_sc = *pXq++;
+            x_sc = *pXq++;
            *pDst = y[i]+bi*(x_sc-x[i])+c[i]*(x_sc-x[i])*(x_sc-x[i])+di*(x_sc-x[i])*(x_sc-x[i])*(x_sc-x[i]);
            pDst++;
-	    blkCnt--;
+            blkCnt--;
-	}
+        }
    }
    /* == Create output for remaining samples (x>=x(n)) == */
--- a/Source/SupportFunctions/arm_spline_interp_init_f32.c
+++ b/Source/SupportFunctions/arm_spline_interp_init_f32.c
@ -47,15 +47,16 @@
 void arm_spline_init_f32(
    arm_spline_instance_f32 * S, 
    uint32_t n, 
-    arm_spline_type type)
+    arm_spline_type type,
    float32_t * buffer)
 {
-    S->n_x    = n;
+    S->n_x  = n;
-    S->type   = type;
+    S->type = type;
    /* Type (boundary conditions):
-     *   0: Natural spline          ( S1''(x1)=0 ; Sn''(xn)=0 )
+     *  - Natural spline          ( S1''(x1)=0 ; Sn''(xn)=0 )
-     *   1: Parabolic runout spline ( S1''(x1)=S2''(x2) ; Sn-1''(xn-1)=Sn''(xn) )
+     *  - Parabolic runout spline ( S1''(x1)=S2''(x2) ; Sn-1''(xn-1)=Sn''(xn) )
     */
    S->buffer = buffer;
 }
 /**
--- a/Testing/Include/Tests/SupportTestsF32.h
+++ b/Testing/Include/Tests/SupportTestsF32.h
@ -12,9 +12,10 @@ class SupportTestsF32:public Client::Suite
            Client::Pattern<float32_t> input;
            Client::Pattern<float32_t> coefs;
-	    Client::Pattern<float32_t> inputX;
+            Client::Pattern<float32_t> inputX;
-	    Client::Pattern<float32_t> inputY;
+            Client::Pattern<float32_t> inputY;
-	    Client::Pattern<float32_t> outputX;
+            Client::Pattern<float32_t> outputX;
            Client::Pattern<float32_t> buffer;
            Client::LocalPattern<float32_t> output;
            Client::LocalPattern<q15_t> outputQ15;
--- a/Testing/Source/Tests/SupportTestsF32.cpp
+++ b/Testing/Source/Tests/SupportTestsF32.cpp
@ -297,11 +297,12 @@
    {
       float32_t *inp = input.ptr();
       float32_t *outp = output.ptr();
-       arm_sort_instance_f32 S;
+       float32_t *buf = buffer.ptr();
-
+       buf = (float32_t *)malloc((this->nbSamples)*sizeof(float32_t) );
-       arm_sort_init_f32(&S, ARM_SORT_MERGE, ARM_SORT_ASCENDING);
+       arm_merge_sort_instance_f32 S;
-       arm_sort_f32(&S,inp,outp,this->nbSamples);
+       arm_merge_sort_init_f32(&S, ARM_SORT_ASCENDING, buf);
       arm_merge_sort_f32(&S,inp,outp,this->nbSamples);
       ASSERT_EMPTY_TAIL(output);
@ -313,11 +314,12 @@
    {
       float32_t *inp = input.ptr();
       float32_t *outp = output.ptr();
-       arm_sort_instance_f32 S;
+       float32_t *buf = buffer.ptr();
       buf = (float32_t *)malloc((this->nbSamples)*sizeof(float32_t) );
       arm_merge_sort_instance_f32 S;
-       arm_sort_init_f32(&S, ARM_SORT_MERGE, ARM_SORT_ASCENDING);
+       arm_merge_sort_init_f32(&S, ARM_SORT_ASCENDING, buf);
-
+       arm_merge_sort_f32(&S,inp,outp,this->nbSamples);
       arm_sort_f32(&S,inp,outp,this->nbSamples);
       ASSERT_EMPTY_TAIL(output);
@ -424,10 +426,11 @@
       const float32_t *inpY = inputY.ptr();
       const float32_t *outX = outputX.ptr();
       float32_t *outp = output.ptr();
-
+       float32_t *buf = buffer.ptr();
       buf=(float32_t*)malloc((3*4-1)*sizeof(float32_t));
       arm_spline_instance_f32 S;
-       arm_spline_init_f32(&S, 4, ARM_SPLINE_PARABOLIC_RUNOUT);
+       arm_spline_init_f32(&S, 4, ARM_SPLINE_PARABOLIC_RUNOUT, buf);
       arm_spline_f32(&S, inpX, inpY, outX, outp, 20);
       ASSERT_EMPTY_TAIL(output);
@ -440,10 +443,11 @@
       const float32_t *inpY = inputY.ptr();
       const float32_t *outX = outputX.ptr();
       float32_t *outp = output.ptr();
-
+       float32_t *buf = buffer.ptr();
       buf=(float32_t*)malloc((3*9-1)*sizeof(float32_t));
       arm_spline_instance_f32 S;
-       arm_spline_init_f32(&S, 9, ARM_SPLINE_NATURAL);
+       arm_spline_init_f32(&S, 9, ARM_SPLINE_NATURAL, buf);
       arm_spline_f32(&S, inpX, inpY, outX, outp, 33);
       ASSERT_EMPTY_TAIL(output);
@ -456,10 +460,11 @@
       const float32_t *inpY = inputY.ptr();
       const float32_t *outX = outputX.ptr();
       float32_t *outp = output.ptr();
-
+       float32_t *buf = buffer.ptr();
       buf=(float32_t*)malloc((3*3-1)*sizeof(float32_t));
       arm_spline_instance_f32 S;
-       arm_spline_init_f32(&S, 3, ARM_SPLINE_PARABOLIC_RUNOUT);
+       arm_spline_init_f32(&S, 3, ARM_SPLINE_PARABOLIC_RUNOUT, buf);
       arm_spline_f32(&S, inpX, inpY, outX, outp, 30);
       ASSERT_EMPTY_TAIL(output);