CMSIS-DSP: Correction of issue 1217

Wrong initialization code for Neon version of biquad DF2T.
Initialization function was trying to modify a const array.
Added Neon function to Doxygen output and some correction because of Doxygen.
pull/19/head
Christophe Favergeon 5 years ago
parent 5fac45cc96
commit 2a2f745bd3

@ -1173,10 +1173,17 @@ arm_status arm_fir_decimate_init_f32(
#if defined(ARM_MATH_NEON)
/**
@brief Compute new coefficient arrays for use in vectorized filter (Neon only).
@param[in] numStages number of 2nd order stages in the filter.
@param[in] pCoeffs points to the original filter coefficients.
@param[in] pComputedCoeffs points to the new computed coefficients for the vectorized version.
@return none
*/
void arm_biquad_cascade_df2T_compute_coefs_f32(
arm_biquad_cascade_df2T_instance_f32 * S,
uint8_t numStages,
const float32_t * pCoeffs);
const float32_t * pCoeffs,
float32_t * pComputedCoeffs);
#endif
/**
* @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.

@ -37,78 +37,32 @@
@{
*/
#if defined(ARM_MATH_NEON)
/**
@brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
@param[in,out] S points to an instance of the filter data structure.
@brief Compute new coefficient arrays for use in vectorized filter (Neon only).
@param[in] numStages number of 2nd order stages in the filter.
@param[in] pCoeffs points to the filter coefficients.
@param[in] pState points to the state buffer.
@param[in] pCoeffs points to the original filter coefficients.
@param[in] pComputedCoeffs points to the new computed coefficients for the vectorized Neon version.
@return none
@par Coefficient and State Ordering
The coefficients are stored in the array <code>pCoeffs</code> in the following order
in the not Neon version.
<pre>
{b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
</pre>
@par Size of coefficient arrays:
pCoeffs has size 5 * numStages
@par
where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
<code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
For Neon version, this array is bigger. If numstages = 4x + y, then the array has size:
32*x + 5*y
and it must be initialized using the function
arm_biquad_cascade_df2T_compute_coefs_f32 which is taking the
standard array coefficient as parameters.
pComputedCoeffs has size 8 * numStages
But, an array of 8*numstages is a good approximation.
Then, the initialization can be done with:
<pre>
arm_biquad_cascade_df2T_init_f32(&SNeon, nbCascade, neonCoefs, stateNeon);
arm_biquad_cascade_df2T_compute_coefs_f32(&SNeon,nbCascade,coefs);
</pre>
@par In this example, neonCoefs is a bigger array of size 8 * numStages.
coefs is the standard array:
<pre>
{b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
</pre>
@par
The <code>pState</code> is a pointer to state array.
Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code>.
The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
The state array has a total length of <code>2*numStages</code> values.
The state variables are updated after each block of data is processed; the coefficients are untouched.
*/
#if defined(ARM_MATH_NEON)
/*
Must be called after initializing the biquad instance.
pCoeffs has size 5 * nbCascade
Whereas the pCoeffs for the init has size (4*4 + 4*4)* nbCascade
So this pCoeffs is the one which would be used for the not Neon version.
The pCoeffs passed in init is bigger than the one for the not Neon version.
pComputedCoeffs is the array to be used in arm_biquad_cascade_df2T_init_f32.
*/
void arm_biquad_cascade_df2T_compute_coefs_f32(
arm_biquad_cascade_df2T_instance_f32 * S,
uint8_t numStages,
const float32_t * pCoeffs)
const float32_t * pCoeffs,
float32_t * pComputedCoeffs)
{
uint8_t cnt;
float32_t *pDstCoeffs;
float32_t b0[4],b1[4],b2[4],a1[4],a2[4];
pDstCoeffs = (float32_t*)S->pCoeffs;
cnt = numStages >> 2;
while(cnt > 0)
{
@ -123,52 +77,52 @@ void arm_biquad_cascade_df2T_compute_coefs_f32(
}
/* Vec 1 */
*pDstCoeffs++ = 0;
*pDstCoeffs++ = b0[1];
*pDstCoeffs++ = b0[2];
*pDstCoeffs++ = b0[3];
*pComputedCoeffs++ = 0;
*pComputedCoeffs++ = b0[1];
*pComputedCoeffs++ = b0[2];
*pComputedCoeffs++ = b0[3];
/* Vec 2 */
*pDstCoeffs++ = 0;
*pDstCoeffs++ = 0;
*pDstCoeffs++ = b0[1] * b0[2];
*pDstCoeffs++ = b0[2] * b0[3];
*pComputedCoeffs++ = 0;
*pComputedCoeffs++ = 0;
*pComputedCoeffs++ = b0[1] * b0[2];
*pComputedCoeffs++ = b0[2] * b0[3];
/* Vec 3 */
*pDstCoeffs++ = 0;
*pDstCoeffs++ = 0;
*pDstCoeffs++ = 0;
*pDstCoeffs++ = b0[1] * b0[2] * b0[3];
*pComputedCoeffs++ = 0;
*pComputedCoeffs++ = 0;
*pComputedCoeffs++ = 0;
*pComputedCoeffs++ = b0[1] * b0[2] * b0[3];
/* Vec 4 */
*pDstCoeffs++ = b0[0];
*pDstCoeffs++ = b0[0] * b0[1];
*pDstCoeffs++ = b0[0] * b0[1] * b0[2];
*pDstCoeffs++ = b0[0] * b0[1] * b0[2] * b0[3];
*pComputedCoeffs++ = b0[0];
*pComputedCoeffs++ = b0[0] * b0[1];
*pComputedCoeffs++ = b0[0] * b0[1] * b0[2];
*pComputedCoeffs++ = b0[0] * b0[1] * b0[2] * b0[3];
/* Vec 5 */
*pDstCoeffs++ = b1[0];
*pDstCoeffs++ = b1[1];
*pDstCoeffs++ = b1[2];
*pDstCoeffs++ = b1[3];
*pComputedCoeffs++ = b1[0];
*pComputedCoeffs++ = b1[1];
*pComputedCoeffs++ = b1[2];
*pComputedCoeffs++ = b1[3];
/* Vec 6 */
*pDstCoeffs++ = b2[0];
*pDstCoeffs++ = b2[1];
*pDstCoeffs++ = b2[2];
*pDstCoeffs++ = b2[3];
*pComputedCoeffs++ = b2[0];
*pComputedCoeffs++ = b2[1];
*pComputedCoeffs++ = b2[2];
*pComputedCoeffs++ = b2[3];
/* Vec 7 */
*pDstCoeffs++ = a1[0];
*pDstCoeffs++ = a1[1];
*pDstCoeffs++ = a1[2];
*pDstCoeffs++ = a1[3];
*pComputedCoeffs++ = a1[0];
*pComputedCoeffs++ = a1[1];
*pComputedCoeffs++ = a1[2];
*pComputedCoeffs++ = a1[3];
/* Vec 8 */
*pDstCoeffs++ = a2[0];
*pDstCoeffs++ = a2[1];
*pDstCoeffs++ = a2[2];
*pDstCoeffs++ = a2[3];
*pComputedCoeffs++ = a2[0];
*pComputedCoeffs++ = a2[1];
*pComputedCoeffs++ = a2[2];
*pComputedCoeffs++ = a2[3];
cnt--;
}
@ -176,17 +130,66 @@ void arm_biquad_cascade_df2T_compute_coefs_f32(
cnt = numStages & 0x3;
while(cnt > 0)
{
*pDstCoeffs++ = *pCoeffs++;
*pDstCoeffs++ = *pCoeffs++;
*pDstCoeffs++ = *pCoeffs++;
*pDstCoeffs++ = *pCoeffs++;
*pDstCoeffs++ = *pCoeffs++;
*pComputedCoeffs++ = *pCoeffs++;
*pComputedCoeffs++ = *pCoeffs++;
*pComputedCoeffs++ = *pCoeffs++;
*pComputedCoeffs++ = *pCoeffs++;
*pComputedCoeffs++ = *pCoeffs++;
cnt--;
}
}
#endif
/**
@brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
@param[in,out] S points to an instance of the filter data structure.
@param[in] numStages number of 2nd order stages in the filter.
@param[in] pCoeffs points to the filter coefficients.
@param[in] pState points to the state buffer.
@return none
@par Coefficient and State Ordering
The coefficients are stored in the array <code>pCoeffs</code> in the following order
in the not Neon version.
<pre>
{b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
</pre>
@par
where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
<code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
For Neon version, this array is bigger. If numstages = 4x + y, then the array has size:
32*x + 5*y
and it must be initialized using the function
arm_biquad_cascade_df2T_compute_coefs_f32 which is taking the
standard array coefficient as parameters.
But, an array of 8*numstages is a good approximation.
Then, the initialization can be done with:
<pre>
arm_biquad_cascade_df2T_compute_coefs_f32(nbCascade,coefs,computedCoefs);
arm_biquad_cascade_df2T_init_f32(&SNeon, nbCascade, computedCoefs, stateNeon);
</pre>
@par In this example, computedCoefs is a bigger array of size 8 * numStages.
coefs is the standard array:
<pre>
{b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
</pre>
@par
The <code>pState</code> is a pointer to state array.
Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code>.
The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
The state array has a total length of <code>2*numStages</code> values.
The state variables are updated after each block of data is processed; the coefficients are untouched.
*/
void arm_biquad_cascade_df2T_init_f32(
arm_biquad_cascade_df2T_instance_f32 * S,
uint8_t numStages,

@ -28,6 +28,10 @@
#include "dsp/matrix_functions.h"
#if defined(ARM_MATH_NEON)
#define GROUPOFROWS 8
#endif
/**
* @ingroup groupMatrix
*/
@ -54,14 +58,7 @@
* @{
*/
/**
* @brief Floating-point matrix multiplication.
* @param[in] *pSrcA points to the first input matrix structure
* @param[in] *pSrcB points to the second input matrix structure
* @param[out] *pDst points to output matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
@ -258,6 +255,14 @@ __STATIC_INLINE arm_status arm_mat_mult_f32_4x4_mve(
}
/**
* @brief Floating-point matrix multiplication.
* @param[in] *pSrcA points to the first input matrix structure
* @param[in] *pSrcB points to the second input matrix structure
* @param[out] *pDst points to output matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/
arm_status arm_mat_mult_f32(
const arm_matrix_instance_f32 * pSrcA,
const arm_matrix_instance_f32 * pSrcB,
@ -512,9 +517,14 @@ arm_status arm_mat_mult_f32(
#else
#if defined(ARM_MATH_NEON)
#define GROUPOFROWS 8
/**
* @brief Floating-point matrix multiplication.
* @param[in] *pSrcA points to the first input matrix structure
* @param[in] *pSrcB points to the second input matrix structure
* @param[out] *pDst points to output matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/
arm_status arm_mat_mult_f32(
const arm_matrix_instance_f32 * pSrcA,
const arm_matrix_instance_f32 * pSrcB,
@ -843,6 +853,14 @@ arm_status arm_mat_mult_f32(
return (status);
}
#else
/**
* @brief Floating-point matrix multiplication.
* @param[in] *pSrcA points to the first input matrix structure
* @param[in] *pSrcB points to the second input matrix structure
* @param[out] *pDst points to output matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/
arm_status arm_mat_mult_f32(
const arm_matrix_instance_f32 * pSrcA,
const arm_matrix_instance_f32 * pSrcB,

@ -33,8 +33,28 @@
#include <limits.h>
#include <math.h>
#if !defined(ARM_MATH_MVE_FLOAT16) || defined(ARM_MATH_AUTOVECTORIZE)
/*
_Float16 is not supported in g++ so we avoid putting _Float16 definitions
in the public headers.
This function should at some point be moved in FastMath.
*/
__STATIC_INLINE float16_t arm_exponent_f16(float16_t x, int32_t nb)
{
float16_t r = x;
nb --;
while(nb > 0)
{
r = (_Float16)r * (_Float16)x;
nb--;
}
return(r);
}
#endif
/**
* @addtogroup polysvm
@ -42,6 +62,13 @@
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math_f16.h"
/**
* @brief SVM polynomial prediction
* @param[in] S Pointer to an instance of the polynomial SVM structure.
@ -50,12 +77,6 @@
* @return none.
*
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math_f16.h"
void arm_svm_polynomial_predict_f16(
const arm_svm_polynomial_instance_f16 *S,
const float16_t * in,
@ -306,26 +327,15 @@ void arm_svm_polynomial_predict_f16(
#else
/*
_Float16 is not supported in g++ so we avoid putting _Float16 definitions
in the public headers.
This function should at some point be moved in FastMath.
/**
* @brief SVM polynomial prediction
* @param[in] S Pointer to an instance of the polynomial SVM structure.
* @param[in] in Pointer to input vector
* @param[out] pResult Decision value
* @return none.
*
*/
__STATIC_INLINE float16_t arm_exponent_f16(float16_t x, int32_t nb)
{
float16_t r = x;
nb --;
while(nb > 0)
{
r = (_Float16)r * (_Float16)x;
nb--;
}
return(r);
}
void arm_svm_polynomial_predict_f16(
const arm_svm_polynomial_instance_f16 *S,
const float16_t * in,

@ -92,11 +92,8 @@ a double precision computation.
float32_t *statep = state.ptr();
#if !defined(ARM_MATH_NEON)
const float32_t *coefsp = coefs.ptr();
#else
float32_t *coefsp = coefs.ptr();
#endif
const float32_t *inputp = inputs.ptr();
float32_t *outp = output.ptr();
@ -126,13 +123,15 @@ a double precision computation.
#else
float32_t *vecCoefsPtr = vecCoefs.ptr();
// Those Neon coefs must be computed from original coefs
arm_biquad_cascade_df2T_compute_coefs_f32(3,coefsp,vecCoefsPtr);
arm_biquad_cascade_df2T_init_f32(&this->Sdf2T,
3,
vecCoefsPtr,
statep);
// Those Neon coefs must be computed from original coefs
arm_biquad_cascade_df2T_compute_coefs_f32(&this->Sdf2T,3,coefsp);
#endif
/*
@ -290,13 +289,15 @@ a double precision computation.
#else
float32_t *vecCoefsPtr = vecCoefs.ptr();
// Those Neon coefs must be computed from original coefs
arm_biquad_cascade_df2T_compute_coefs_f32(numStages,coefsp,vecCoefsPtr);
arm_biquad_cascade_df2T_init_f32(&this->Sdf2T,
numStages,
vecCoefsPtr,
statep);
// Those Neon coefs must be computed from original coefs
arm_biquad_cascade_df2T_compute_coefs_f32(&this->Sdf2T,numStages,coefsp);
#endif
coefsp += numStages * 5;

Loading…
Cancel
Save