CMSIS-DSP: Correction of issue 1217

Wrong initialization code for Neon version of biquad DF2T. Initialization function was trying to modify a const array. Added Neon function to Doxygen output and some correction because of Doxygen.
5 years ago · 2a2f745bd3
parent 5fac45cc96
commit 2a2f745bd3
5 changed files with 181 additions and 142 deletions
--- a/Include/dsp/filtering_functions.h
+++ b/Include/dsp/filtering_functions.h
@ -1173,10 +1173,17 @@ arm_status arm_fir_decimate_init_f32(
 #if defined(ARM_MATH_NEON) 
 /**
  @brief         Compute new coefficient arrays for use in vectorized filter (Neon only).
  @param[in]     numStages         number of 2nd order stages in the filter.
  @param[in]     pCoeffs           points to the original filter coefficients.
  @param[in]     pComputedCoeffs   points to the new computed coefficients for the vectorized version.
  @return        none
 */
 void arm_biquad_cascade_df2T_compute_coefs_f32(
  arm_biquad_cascade_df2T_instance_f32 * S,
  uint8_t numStages,
-  const float32_t * pCoeffs);
+  const float32_t * pCoeffs,
  float32_t * pComputedCoeffs);
 #endif
  /**
   * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
--- a/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c
@ -37,78 +37,32 @@
  @{
 */
 /**
  @brief         Initialization function for the floating-point transposed direct form II Biquad cascade filter.
  @param[in,out] S           points to an instance of the filter data structure.
  @param[in]     numStages   number of 2nd order stages in the filter.
  @param[in]     pCoeffs     points to the filter coefficients.
  @param[in]     pState      points to the state buffer.
  @return        none
  @par           Coefficient and State Ordering
                   The coefficients are stored in the array <code>pCoeffs</code> in the following order
                   in the not Neon version.
  <pre>
      {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
  </pre>
  @par
                   where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
                   <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
                   and so on.  The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
                   For Neon version, this array is bigger. If numstages = 4x + y, then the array has size:
                   32*x + 5*y
                   and it must be initialized using the function
                   arm_biquad_cascade_df2T_compute_coefs_f32 which is taking the
                   standard array coefficient as parameters.
                   But, an array of 8*numstages is a good approximation.
                   Then, the initialization can be done with:
  <pre>
                   arm_biquad_cascade_df2T_init_f32(&SNeon, nbCascade, neonCoefs, stateNeon);
                   arm_biquad_cascade_df2T_compute_coefs_f32(&SNeon,nbCascade,coefs);
  </pre>
  @par             In this example, neonCoefs is a bigger array of size 8 * numStages.
                   coefs is the standard array:
  <pre>
      {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
  </pre>
  @par
                   The <code>pState</code> is a pointer to state array.
                   Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code>.
                   The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
                   The state array has a total length of <code>2*numStages</code> values.
                   The state variables are updated after each block of data is processed; the coefficients are untouched.
 */
 #if defined(ARM_MATH_NEON) 
-/*
+/**
  @brief         Compute new coefficient arrays for use in vectorized filter (Neon only).
  @param[in]     numStages         number of 2nd order stages in the filter.
  @param[in]     pCoeffs           points to the original filter coefficients.
  @param[in]     pComputedCoeffs   points to the new computed coefficients for the vectorized Neon version.
  @return        none
  @par   Size of coefficient arrays:
            pCoeffs has size 5 * numStages 
-Must be called after initializing the biquad instance.
+            pComputedCoeffs has size 8 * numStages
 pCoeffs has size 5 * nbCascade
 Whereas the pCoeffs for the init has size (4*4 + 4*4)* nbCascade 
-So this pCoeffs is the one which would be used for the not Neon version.
+            pComputedCoeffs is the array to be used in arm_biquad_cascade_df2T_init_f32.
 The pCoeffs passed in init is bigger than the one for the not Neon version.
 */
 void arm_biquad_cascade_df2T_compute_coefs_f32(
  arm_biquad_cascade_df2T_instance_f32 * S,
  uint8_t numStages,
-  const float32_t * pCoeffs)
+  const float32_t * pCoeffs,
  float32_t * pComputedCoeffs)
 {
   uint8_t cnt;
   float32_t *pDstCoeffs;
   float32_t b0[4],b1[4],b2[4],a1[4],a2[4];
   pDstCoeffs = (float32_t*)S->pCoeffs;
   cnt = numStages >> 2; 
   while(cnt > 0)
   {
@ -123,52 +77,52 @@ void arm_biquad_cascade_df2T_compute_coefs_f32(
      }
      /* Vec 1 */
-      *pDstCoeffs++ = 0;
+      *pComputedCoeffs++ = 0;
-      *pDstCoeffs++ = b0[1];
+      *pComputedCoeffs++ = b0[1];
-      *pDstCoeffs++ = b0[2];
+      *pComputedCoeffs++ = b0[2];
-      *pDstCoeffs++ = b0[3];
+      *pComputedCoeffs++ = b0[3];
      /* Vec 2 */
-      *pDstCoeffs++ = 0;
+      *pComputedCoeffs++ = 0;
-      *pDstCoeffs++ = 0;
+      *pComputedCoeffs++ = 0;
-      *pDstCoeffs++ = b0[1] * b0[2];
+      *pComputedCoeffs++ = b0[1] * b0[2];
-      *pDstCoeffs++ = b0[2] * b0[3];
+      *pComputedCoeffs++ = b0[2] * b0[3];
      /* Vec 3 */
-      *pDstCoeffs++ = 0;
+      *pComputedCoeffs++ = 0;
-      *pDstCoeffs++ = 0;
+      *pComputedCoeffs++ = 0;
-      *pDstCoeffs++ = 0;
+      *pComputedCoeffs++ = 0;
-      *pDstCoeffs++ = b0[1] * b0[2] * b0[3];
+      *pComputedCoeffs++ = b0[1] * b0[2] * b0[3];
      /* Vec 4 */
-      *pDstCoeffs++ = b0[0];
+      *pComputedCoeffs++ = b0[0];
-      *pDstCoeffs++ = b0[0] * b0[1];
+      *pComputedCoeffs++ = b0[0] * b0[1];
-      *pDstCoeffs++ = b0[0] * b0[1] * b0[2];
+      *pComputedCoeffs++ = b0[0] * b0[1] * b0[2];
-      *pDstCoeffs++ = b0[0] * b0[1] * b0[2] * b0[3];
+      *pComputedCoeffs++ = b0[0] * b0[1] * b0[2] * b0[3];
      /* Vec 5 */
-      *pDstCoeffs++ = b1[0];
+      *pComputedCoeffs++ = b1[0];
-      *pDstCoeffs++ = b1[1];
+      *pComputedCoeffs++ = b1[1];
-      *pDstCoeffs++ = b1[2];
+      *pComputedCoeffs++ = b1[2];
-      *pDstCoeffs++ = b1[3];
+      *pComputedCoeffs++ = b1[3];
      /* Vec 6 */
-      *pDstCoeffs++ = b2[0];
+      *pComputedCoeffs++ = b2[0];
-      *pDstCoeffs++ = b2[1];
+      *pComputedCoeffs++ = b2[1];
-      *pDstCoeffs++ = b2[2];
+      *pComputedCoeffs++ = b2[2];
-      *pDstCoeffs++ = b2[3];
+      *pComputedCoeffs++ = b2[3];
      /* Vec 7 */
-      *pDstCoeffs++ = a1[0];
+      *pComputedCoeffs++ = a1[0];
-      *pDstCoeffs++ = a1[1];
+      *pComputedCoeffs++ = a1[1];
-      *pDstCoeffs++ = a1[2];
+      *pComputedCoeffs++ = a1[2];
-      *pDstCoeffs++ = a1[3];
+      *pComputedCoeffs++ = a1[3];
      /* Vec 8 */
-      *pDstCoeffs++ = a2[0];
+      *pComputedCoeffs++ = a2[0];
-      *pDstCoeffs++ = a2[1];
+      *pComputedCoeffs++ = a2[1];
-      *pDstCoeffs++ = a2[2];
+      *pComputedCoeffs++ = a2[2];
-      *pDstCoeffs++ = a2[3];
+      *pComputedCoeffs++ = a2[3];
      cnt--;
   }
@ -176,17 +130,66 @@ void arm_biquad_cascade_df2T_compute_coefs_f32(
   cnt = numStages & 0x3;
   while(cnt > 0)
   {
-      *pDstCoeffs++ = *pCoeffs++;
+      *pComputedCoeffs++ = *pCoeffs++;
-      *pDstCoeffs++ = *pCoeffs++;
+      *pComputedCoeffs++ = *pCoeffs++;
-      *pDstCoeffs++ = *pCoeffs++;
+      *pComputedCoeffs++ = *pCoeffs++;
-      *pDstCoeffs++ = *pCoeffs++;
+      *pComputedCoeffs++ = *pCoeffs++;
-      *pDstCoeffs++ = *pCoeffs++;
+      *pComputedCoeffs++ = *pCoeffs++;
      cnt--;
   }
 }
 #endif 
 /**
  @brief         Initialization function for the floating-point transposed direct form II Biquad cascade filter.
  @param[in,out] S           points to an instance of the filter data structure.
  @param[in]     numStages   number of 2nd order stages in the filter.
  @param[in]     pCoeffs     points to the filter coefficients.
  @param[in]     pState      points to the state buffer.
  @return        none
  @par           Coefficient and State Ordering
                   The coefficients are stored in the array <code>pCoeffs</code> in the following order
                   in the not Neon version.
  <pre>
      {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
  </pre>
  @par
                   where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
                   <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
                   and so on.  The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
                   For Neon version, this array is bigger. If numstages = 4x + y, then the array has size:
                   32*x + 5*y
                   and it must be initialized using the function
                   arm_biquad_cascade_df2T_compute_coefs_f32 which is taking the
                   standard array coefficient as parameters.
                   But, an array of 8*numstages is a good approximation.
                   Then, the initialization can be done with:
  <pre>
                   arm_biquad_cascade_df2T_compute_coefs_f32(nbCascade,coefs,computedCoefs);
                   arm_biquad_cascade_df2T_init_f32(&SNeon, nbCascade, computedCoefs, stateNeon);
  </pre>
  @par             In this example, computedCoefs is a bigger array of size 8 * numStages.
                   coefs is the standard array:
  <pre>
      {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
  </pre>
  @par
                   The <code>pState</code> is a pointer to state array.
                   Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code>.
                   The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
                   The state array has a total length of <code>2*numStages</code> values.
                   The state variables are updated after each block of data is processed; the coefficients are untouched.
 */
 void arm_biquad_cascade_df2T_init_f32(
        arm_biquad_cascade_df2T_instance_f32 * S,
        uint8_t numStages,
--- a/Source/MatrixFunctions/arm_mat_mult_f32.c
+++ b/Source/MatrixFunctions/arm_mat_mult_f32.c
@ -28,6 +28,10 @@
 #include "dsp/matrix_functions.h"
 #if defined(ARM_MATH_NEON)
 #define GROUPOFROWS 8
 #endif
 /**
 * @ingroup groupMatrix
 */
@ -54,14 +58,7 @@
 * @{
 */
-/**
+
 * @brief Floating-point matrix multiplication.
 * @param[in]       *pSrcA points to the first input matrix structure
 * @param[in]       *pSrcB points to the second input matrix structure
 * @param[out]      *pDst points to output matrix structure
 * @return     		The function returns either
 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
 */
 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
@ -258,6 +255,14 @@ __STATIC_INLINE arm_status arm_mat_mult_f32_4x4_mve(
 }
 /**
 * @brief Floating-point matrix multiplication.
 * @param[in]       *pSrcA points to the first input matrix structure
 * @param[in]       *pSrcB points to the second input matrix structure
 * @param[out]      *pDst points to output matrix structure
 * @return          The function returns either
 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
 */
 arm_status arm_mat_mult_f32(
  const arm_matrix_instance_f32 * pSrcA,
  const arm_matrix_instance_f32 * pSrcB,
@ -512,9 +517,14 @@ arm_status arm_mat_mult_f32(
 #else
 #if defined(ARM_MATH_NEON)
-
+/**
-#define GROUPOFROWS 8
+ * @brief Floating-point matrix multiplication.
-
+ * @param[in]       *pSrcA points to the first input matrix structure
 * @param[in]       *pSrcB points to the second input matrix structure
 * @param[out]      *pDst points to output matrix structure
 * @return          The function returns either
 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
 */
 arm_status arm_mat_mult_f32(
  const arm_matrix_instance_f32 * pSrcA,
  const arm_matrix_instance_f32 * pSrcB,
@ -843,6 +853,14 @@ arm_status arm_mat_mult_f32(
  return (status);
 }
 #else
 /**
 * @brief Floating-point matrix multiplication.
 * @param[in]       *pSrcA points to the first input matrix structure
 * @param[in]       *pSrcB points to the second input matrix structure
 * @param[out]      *pDst points to output matrix structure
 * @return          The function returns either
 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
 */
 arm_status arm_mat_mult_f32(
  const arm_matrix_instance_f32 * pSrcA,
  const arm_matrix_instance_f32 * pSrcB,
--- a/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c
+++ b/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c
@ -33,8 +33,28 @@
 #include <limits.h>
 #include <math.h>
 #if !defined(ARM_MATH_MVE_FLOAT16) || defined(ARM_MATH_AUTOVECTORIZE)
 /*
 _Float16 is not supported in g++ so we avoid putting _Float16 definitions
 in the public headers.
 This function should at some point be moved in FastMath.
 */
 __STATIC_INLINE float16_t arm_exponent_f16(float16_t x, int32_t nb)
 {
    float16_t r = x;
    nb --;
    while(nb > 0)
    {
        r = (_Float16)r * (_Float16)x;
        nb--;
    }
    return(r);
 }
 #endif
 /**
 * @addtogroup polysvm
@ -42,6 +62,13 @@
 */
 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
 #include "arm_helium_utils.h"
 #include "arm_vec_math_f16.h"
 /**
 * @brief SVM polynomial prediction
 * @param[in]    S          Pointer to an instance of the polynomial SVM structure.
@ -50,12 +77,6 @@
 * @return none.
 *
 */
 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
 #include "arm_helium_utils.h"
 #include "arm_vec_math_f16.h"
 void arm_svm_polynomial_predict_f16(
    const arm_svm_polynomial_instance_f16 *S,
    const float16_t * in,
@ -306,26 +327,15 @@ void arm_svm_polynomial_predict_f16(
 #else
 /*
 _Float16 is not supported in g++ so we avoid putting _Float16 definitions
 in the public headers.
 This function should at some point be moved in FastMath.
 */
 __STATIC_INLINE float16_t arm_exponent_f16(float16_t x, int32_t nb)
 {
    float16_t r = x;
    nb --;
    while(nb > 0)
    {
        r = (_Float16)r * (_Float16)x;
        nb--;
    }
    return(r);
 }
 /**
 * @brief SVM polynomial prediction
 * @param[in]    S          Pointer to an instance of the polynomial SVM structure.
 * @param[in]    in         Pointer to input vector
 * @param[out]   pResult    Decision value
 * @return none.
 *
 */
 void arm_svm_polynomial_predict_f16(
    const arm_svm_polynomial_instance_f16 *S,
    const float16_t * in,
--- a/Testing/Source/Tests/BIQUADF32.cpp
+++ b/Testing/Source/Tests/BIQUADF32.cpp
@ -92,11 +92,8 @@ a double precision computation.
        float32_t *statep = state.ptr();
 #if !defined(ARM_MATH_NEON) 
        const float32_t *coefsp = coefs.ptr();
-#else
+
        float32_t *coefsp = coefs.ptr();
 #endif
        const float32_t *inputp = inputs.ptr();
        float32_t *outp = output.ptr();
@ -126,13 +123,15 @@ a double precision computation.
 #else
           float32_t *vecCoefsPtr = vecCoefs.ptr();
           // Those Neon coefs must be computed from original coefs
           arm_biquad_cascade_df2T_compute_coefs_f32(3,coefsp,vecCoefsPtr);
           arm_biquad_cascade_df2T_init_f32(&this->Sdf2T,
                    3,
                    vecCoefsPtr,
                    statep);
-           // Those Neon coefs must be computed from original coefs
+           
           arm_biquad_cascade_df2T_compute_coefs_f32(&this->Sdf2T,3,coefsp);
 #endif
           /*
@ -290,13 +289,15 @@ a double precision computation.
 #else
           float32_t *vecCoefsPtr = vecCoefs.ptr();
           // Those Neon coefs must be computed from original coefs
           arm_biquad_cascade_df2T_compute_coefs_f32(numStages,coefsp,vecCoefsPtr);
           arm_biquad_cascade_df2T_init_f32(&this->Sdf2T,
                    numStages,
                    vecCoefsPtr,
                    statep);
-           // Those Neon coefs must be computed from original coefs
+           
           arm_biquad_cascade_df2T_compute_coefs_f32(&this->Sdf2T,numStages,coefsp);
 #endif
           coefsp += numStages * 5;