CMSIS-DSP: Improvement to FIR documentation and tests.

5 years ago · 630122ae1b
parent e9a8ba6255
commit 630122ae1b
10 changed files with 110 additions and 20 deletions
--- a/Source/FilteringFunctions/arm_fir_init_f16.c
+++ b/Source/FilteringFunctions/arm_fir_init_f16.c
@ -54,10 +54,10 @@
                   <code>pState</code> points to the array of state variables.
                   <code>pState</code> is of length <code>numTaps+blockSize-1</code> samples (except for Helium - see below), where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_f16()</code>.
  @par          Initialization of Helium version
-                 For Helium version the array of coefficients must be a multiple of 16 even if less
-                 then 16 coefficients are used. The additional coefficients must be set to 0.
-                 It does not mean that all the coefficients will be used in the filter (numTaps
-                 is still set to its right value in the init function.) It just means that
+                 For Helium version the array of coefficients must be a multiple of 4 (4a) even if less
+                 then 4a coefficients are defined in the FIR. The additional coefficients 
+                 (4a - numTaps) must be set to 0.
+                 numTaps is still set to its right value in the init function. It means that
                 the implementation may require to read more coefficients due to the vectorization and
                 to avoid having to manage too many different cases in the code.

--- a/Source/FilteringFunctions/arm_fir_init_f32.c
+++ b/Source/FilteringFunctions/arm_fir_init_f32.c
@ -55,10 +55,10 @@
                   <code>pState</code> points to the array of state variables and some working memory for the Helium version.
                   <code>pState</code> is of length <code>numTaps+blockSize-1</code> samples (except for Helium - see below), where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_f32()</code>.
  @par          Initialization of Helium version
-                 For Helium version the array of coefficients must be a multiple of 16 even if less
-                 then 16 coefficients are used. The additional coefficients must be set to 0.
-                 It does not mean that all the coefficients will be used in the filter (numTaps
-                 is still set to its right value in the init function.) It just means that
+                 For Helium version the array of coefficients must be a multiple of 4 (4a) even if less
+                 then 4a coefficients are defined in the FIR. The additional coefficients 
+                 (4a - numTaps) must be set to 0.
+                 numTaps is still set to its right value in the init function. It means that
                 the implementation may require to read more coefficients due to the vectorization and
                 to avoid having to manage too many different cases in the code.

--- a/Source/FilteringFunctions/arm_fir_init_q15.c
+++ b/Source/FilteringFunctions/arm_fir_init_q15.c
@ -73,6 +73,14 @@
  </pre>
                   <code>pState</code> points to the array of state variables.
                   <code>pState</code> is of length <code>numTaps+blockSize</code>, when running on Cortex-M4 and Cortex-M3  and is of length <code>numTaps+blockSize-1</code>, when running on Cortex-M0 where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_q15()</code>.
+ 
+  @par          Initialization of Helium version
+                   For Helium version the array of coefficients must be a multiple of 8 (8a) even if less
+                   then 8a coefficients are defined in the FIR. The additional coefficients 
+                   (8a - numTaps) must be set to 0.
+                   numTaps is still set to its right value in the init function. It means that
+                   the implementation may require to read more coefficients due to the vectorization and
+                   to avoid having to manage too many different cases in the code.
 */

 arm_status arm_fir_init_q15(
--- a/Source/FilteringFunctions/arm_fir_init_q31.c
+++ b/Source/FilteringFunctions/arm_fir_init_q31.c
@ -55,10 +55,10 @@
                   <code>pState</code> is of length <code>numTaps+blockSize-1</code> samples (except for Helium - see below), where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_q31()</code>.

   @par          Initialization of Helium version
-                   For Helium version the array of coefficients must be a multiple of 16 even if less
-                   then 16 coefficients are used. The additional coefficients must be set to 0.
-                   It does not mean that all the coefficients will be used in the filter (numTaps
-                   is still set to its right value in the init function.) It just means that
+                   For Helium version the array of coefficients must be a multiple of 4 (4a) even if less
+                   then 4a coefficients are defined in the FIR. The additional coefficients 
+                   (4a - numTaps) must be set to 0.
+                   numTaps is still set to its right value in the init function. It means that
                   the implementation may require to read more coefficients due to the vectorization and
                   to avoid having to manage too many different cases in the code.
  
--- a/Source/FilteringFunctions/arm_fir_init_q7.c
+++ b/Source/FilteringFunctions/arm_fir_init_q7.c
@ -54,6 +54,15 @@
  @par
                   <code>pState</code> points to the array of state variables.
                   <code>pState</code> is of length <code>numTaps+blockSize-1</code> samples, where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_q7()</code>.
+  
+  @par          Initialization of Helium version
+                   For Helium version the array of coefficients must be a multiple of 16 (16a) even if less
+                   then 16a coefficients are defined in the FIR. The additional coefficients 
+                   (16a - numTaps) must be set to 0.
+                   numTaps is still set to its right value in the init function. It means that
+                   the implementation may require to read more coefficients due to the vectorization and
+                   to avoid having to manage too many different cases in the code.
+
 */

 void arm_fir_init_q7(
--- a/Testing/Source/Tests/FIRF16.cpp
+++ b/Testing/Source/Tests/FIRF16.cpp
@ -24,6 +24,9 @@ static void checkInnerTail(float16_t *b)
    ASSERT_TRUE(b[3] == 0.0f);
 }

+// Coef must be padded to a multiple of 4
+#define FIRCOEFPADDING 2
+
    void FIRF16::test_fir_f16()
    {
        
@ -42,6 +45,7 @@ static void checkInnerTail(float16_t *b)
 #endif
        int blockSize;
        int numTaps;
+        int round;

        

@ -59,9 +63,20 @@ static void checkInnerTail(float16_t *b)
           numTaps = configp[1];

 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-           /* Copy coefficients and pad to zero 
+            /* Copy coefficients and pad to zero 
           */
-           memset(coeffArray,0,32*sizeof(float16_t));
+           memset(coeffArray,127,32*sizeof(float16_t));
+           round = numTaps >> FIRCOEFPADDING;
+           if ((round << FIRCOEFPADDING) < numTaps)
+           {
+             round ++;
+           }
+           round = round<<FIRCOEFPADDING;
+           memset(coeffArray,0,round*sizeof(float16_t));
+
+           //printf("blockSize=%d, numTaps=%d, round=%d (%d)\n",blockSize,numTaps,round,round - numTaps);
+
+
           for(j=0;j < numTaps; j++)
           {
              coeffArray[j] = orgcoefsp[j];
--- a/Testing/Source/Tests/FIRF32.cpp
+++ b/Testing/Source/Tests/FIRF32.cpp
@ -24,6 +24,8 @@ static void checkInnerTail(float32_t *b)
    ASSERT_TRUE(b[3] == 0.0f);
 }

+// Coef must be padded to a multiple of 4
+#define FIRCOEFPADDING 2

    void FIRF32::test_fir_f32()
    {
@ -44,6 +46,7 @@ static void checkInnerTail(float32_t *b)
 #endif
        int blockSize;
        int numTaps;
+        int round;
        int nb=0;

        
@ -61,12 +64,24 @@ static void checkInnerTail(float32_t *b)
           blockSize = configp[0];
           numTaps = configp[1];

+
           nb += 2*blockSize;

 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
           /* Copy coefficients and pad to zero 
           */
-           memset(coeffArray,0,32*sizeof(float32_t));
+           memset(coeffArray,127,32*sizeof(float32_t));
+           round = numTaps >> FIRCOEFPADDING;
+           if ((round << FIRCOEFPADDING) < numTaps)
+           {
+             round ++;
+           }
+           round = round<<FIRCOEFPADDING;
+           memset(coeffArray,0,round*sizeof(float32_t));
+
+           //printf("blockSize=%d, numTaps=%d, round=%d (%d)\n",blockSize,numTaps,round,round - numTaps);
+
+
           for(j=0;j < numTaps; j++)
           {
              coeffArray[j] = orgcoefsp[j];
--- a/Testing/Source/Tests/FIRQ15.cpp
+++ b/Testing/Source/Tests/FIRQ15.cpp
@ -18,6 +18,9 @@ static void checkInnerTail(q15_t *b)
    ASSERT_TRUE(b[3] == 0);
 }

+// Coef must be padded to a multiple of 8
+#define FIRCOEFPADDING 3
+
    void FIRQ15::test_fir_q15()
    {
        
@ -36,6 +39,7 @@ static void checkInnerTail(q15_t *b)
 #endif
        int blockSize;
        int numTaps;
+        int round;

        /*

@ -51,9 +55,19 @@ static void checkInnerTail(q15_t *b)
           numTaps = configp[1];

 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-           /* Copy coefficients and pad to zero 
+            /* Copy coefficients and pad to zero 
           */
-           memset(coeffArray,0,32*sizeof(q15_t));
+           memset(coeffArray,127,32*sizeof(q15_t));
+           round = numTaps >> FIRCOEFPADDING;
+           if ((round << FIRCOEFPADDING) < numTaps)
+           {
+             round ++;
+           }
+           round = round<<FIRCOEFPADDING;
+           memset(coeffArray,0,round*sizeof(q15_t));
+
+           //printf("blockSize=%d, numTaps=%d, round=%d (%d)\n",blockSize,numTaps,round,round - numTaps);
+
           for(j=0;j < numTaps; j++)
           {
              coeffArray[j] = orgcoefsp[j];
--- a/Testing/Source/Tests/FIRQ31.cpp
+++ b/Testing/Source/Tests/FIRQ31.cpp
@ -18,6 +18,8 @@ static void checkInnerTail(q31_t *b)
    ASSERT_TRUE(b[3] == 0);
 }

+// Coef must be padded to a multiple of 4
+#define FIRCOEFPADDING 2

    void FIRQ31::test_fir_q31()
    {
@ -37,6 +39,7 @@ static void checkInnerTail(q31_t *b)
 #endif
        int blockSize;
        int numTaps;
+        int round;
        int nb=1;

        /*
@ -55,7 +58,18 @@ static void checkInnerTail(q31_t *b)
 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
           /* Copy coefficients and pad to zero 
           */
-           memset(coeffArray,0,32*sizeof(q31_t));
+           memset(coeffArray,127,32*sizeof(q31_t));
+           round = numTaps >> FIRCOEFPADDING;
+           if ((round << FIRCOEFPADDING) < numTaps)
+           {
+             round ++;
+           }
+           round = round<<FIRCOEFPADDING;
+           memset(coeffArray,0,round*sizeof(q31_t));
+
+           //printf("blockSize=%d, numTaps=%d, round=%d (%d)\n",blockSize,numTaps,round,round - numTaps);
+
+
           for(j=0;j < numTaps; j++)
           {
              coeffArray[j] = orgcoefsp[j];
--- a/Testing/Source/Tests/FIRQ7.cpp
+++ b/Testing/Source/Tests/FIRQ7.cpp
@ -18,6 +18,9 @@ static void checkInnerTail(q7_t *b)
    ASSERT_TRUE(b[3] == 0);
 }

+// Coef must be padded to a multiple of 16
+#define FIRCOEFPADDING 4
+
    void FIRQ7::test_fir_q7()
    {
        
@ -36,6 +39,7 @@ static void checkInnerTail(q7_t *b)
 #endif
        int blockSize;
        int numTaps;
+        int round;

        /*

@ -51,9 +55,20 @@ static void checkInnerTail(q7_t *b)
           numTaps = configp[1];

 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-           /* Copy coefficients and pad to zero 
+            /* Copy coefficients and pad to zero 
           */
-           memset(coeffArray,0,32*sizeof(q7_t));
+           memset(coeffArray,127,32*sizeof(q7_t));
+           round = numTaps >> FIRCOEFPADDING;
+           if ((round << FIRCOEFPADDING) < numTaps)
+           {
+             round ++;
+           }
+           round = round<<FIRCOEFPADDING;
+           memset(coeffArray,0,round*sizeof(q7_t));
+
+           //printf("blockSize=%d, numTaps=%d, round=%d (%d)\n",blockSize,numTaps,round,round - numTaps);
+
+
           for(j=0;j < numTaps; j++)
           {
              coeffArray[j] = orgcoefsp[j];