CMSIS-DSP: Added new MVE implementation of FIR Q31

5 years ago · a108d6763e
parent 68b219bb1f
commit a108d6763e
7 changed files with 611 additions and 469 deletions
--- a/Include/dsp/filtering_functions_f16.h
+++ b/Include/dsp/filtering_functions_f16.h
@ -40,8 +40,6 @@ extern "C"

 #if defined(ARM_FLOAT16_SUPPORTED)

-#define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S))
-
 /**
   * @brief Instance structure for the floating-point FIR filter.
   */
--- a/Include/dsp/utils.h
+++ b/Include/dsp/utils.h
@ -42,6 +42,7 @@ extern "C"

 #define SQ(x) ((x) * (x))

+#define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S))


  /**
--- a/Source/FilteringFunctions/arm_fir_f32.c
+++ b/Source/FilteringFunctions/arm_fir_f32.c
@ -110,9 +110,11 @@
                 The first A samples are temporary data.
                 The remaining samples are the state of the FIR filter.
  @par                 
-                 So the state buffer has size <code> numTaps + A * blockSize - 1 </code> :
+                 So the state buffer has size <code> numTaps + A + blockSize - 1 </code> :
                 - A is blockSize for f32
                 - A is 8*ceil(blockSize/8) for f16
+                 - A is 8*ceil(blockSize/4) for q31
+

  @par           Fixed-Point Behavior
                   Care must be taken when using the fixed-point versions of the FIR filter functions.
@ -200,6 +202,7 @@ __STATIC_INLINE void arm_fir_f32_1_4_mve(const arm_fir_instance_f32 * S,
    }

    blkCnt = blockSize & 3;
+    if (blkCnt)
    {
        mve_pred16_t    p0 = vctp32q(blkCnt);

--- a/Source/FilteringFunctions/arm_fir_init_q31.c
+++ b/Source/FilteringFunctions/arm_fir_init_q31.c
@ -52,7 +52,23 @@
      {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
  </pre>
                   <code>pState</code> points to the array of state variables.
-                   <code>pState</code> is of length <code>numTaps+blockSize-1</code> samples, where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_q31()</code>.
+                   <code>pState</code> is of length <code>numTaps+blockSize-1</code> samples (except for Helium - see below), where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_q31()</code>.
+
+   @par          Initialization of Helium version
+                   For Helium version the array of coefficients must be a multiple of 16 even if less
+                   then 16 coefficients are used. The additional coefficients must be set to 0.
+                   It does not mean that all the coefficients will be used in the filter (numTaps
+                   is still set to its right value in the init function.) It just means that
+                   the implementation may require to read more coefficients due to the vectorization and
+                   to avoid having to manage too many different cases in the code.
+  
+    @par          Helium state buffer
+                   The state buffer must contain some additional temporary data
+                   used during the computation but which is not the state of the FIR.
+                   The first 2*4*ceil(blockSize/4) samples are temporary data.
+                   The remaining samples are the state of the FIR filter.
+                   So the state buffer has size <code> numTaps + 8*ceil(blockSize/4) + blockSize - 1 </code>
+  
 */

 void arm_fir_init_q31(
@ -69,7 +85,11 @@ void arm_fir_init_q31(
  S->pCoeffs = pCoeffs;

  /* Clear state buffer. The size is always (blockSize + numTaps - 1) */
+  #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+  memset(pState, 0, (numTaps + (blockSize - 1U) + 2*ROUND_UP(blockSize, 4)) * sizeof(q31_t));
+  #else
  memset(pState, 0, (numTaps + (blockSize - 1U)) * sizeof(q31_t));
+  #endif

  /* Assign state pointer */
  S->pState = pState;
--- a/Source/FilteringFunctions/arm_fir_q31.c
+++ b/Source/FilteringFunctions/arm_fir_q31.c
--- a/Testing/Source/Benchmarks/FIRQ31.cpp
+++ b/Testing/Source/Benchmarks/FIRQ31.cpp
@ -1,6 +1,9 @@
 #include "FIRQ31.h"
 #include "Error.h"

+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+static __ALIGNED(8) q31_t coeffArray[64];
+#endif 
   
    void FIRQ31::test_fir_q31()
    {
@ -30,16 +33,28 @@
       samples.reload(FIRQ31::SAMPLES1_Q31_ID,mgr,this->nbSamples);
       coefs.reload(FIRQ31::COEFS1_Q31_ID,mgr,this->nbTaps);

-       state.create(this->nbSamples + this->nbTaps - 1,FIRQ31::STATE_Q31_ID,mgr);
+       state.create(2*ROUND_UP(this->nbSamples,4) + this->nbSamples + this->nbTaps - 1,FIRQ31::STATE_Q31_ID,mgr);
       output.create(this->nbSamples,FIRQ31::OUT_SAMPLES_Q31_ID,mgr);

       switch(id)
       {
           case TEST_FIR_Q31_1:
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+              /* Copy coefficients and pad to zero 
+              */
+              memset(coeffArray,0,32*sizeof(q31_t));
+              q31_t *ptr;
+
+              ptr=coefs.ptr();
+              memcpy(coeffArray,ptr,this->nbTaps*sizeof(q31_t));
+              this->pCoefs = coeffArray;
+#else
+              this->pCoefs=coefs.ptr();
+#endif
+
              arm_fir_init_q31(&instFir,this->nbTaps,coefs.ptr(),state.ptr(),this->nbSamples);

              this->pSrc=samples.ptr();
-              this->pCoefs=coefs.ptr();
              this->pDst=output.ptr();
           break;

--- a/Testing/Source/Tests/FIRQ31.cpp
+++ b/Testing/Source/Tests/FIRQ31.cpp
@ -37,6 +37,7 @@ void checkInnerTail(q31_t *b)
 #endif
        int blockSize;
        int numTaps;
+        int nb=1;

        /*

@ -98,6 +99,8 @@ void checkInnerTail(q31_t *b)
           configp += 2;
           orgcoefsp += numTaps;

+           nb += blockSize + blockSize;
+

        }

@ -129,8 +132,8 @@ void checkInnerTail(q31_t *b)
       ref.reload(FIRQ31::FIRREFS_Q31_ID,mgr);

       output.create(ref.nbSamples(),FIRQ31::OUT_Q31_ID,mgr);
-       /* Max blockSize + numTaps - 1 as generated by Python script */
-       state.create(47,FIRQ31::OUT_Q31_ID,mgr);
+       /* > Max blockSize + numTaps - 1 as generated by Python script */
+       state.create(47 + 47+47,FIRQ31::OUT_Q31_ID,mgr);
    }

    void FIRQ31::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)