CMSIS-DSP: Correction of issue 1217

Wrong initialization code for Neon version of biquad DF2T.
Initialization function was trying to modify a const array.
Added Neon function to Doxygen output and some correction because of Doxygen.
pull/19/head
Christophe Favergeon 5 years ago
parent 5fac45cc96
commit 2a2f745bd3

@ -1173,10 +1173,17 @@ arm_status arm_fir_decimate_init_f32(
#if defined(ARM_MATH_NEON) #if defined(ARM_MATH_NEON)
/**
@brief Compute new coefficient arrays for use in vectorized filter (Neon only).
@param[in] numStages number of 2nd order stages in the filter.
@param[in] pCoeffs points to the original filter coefficients.
@param[in] pComputedCoeffs points to the new computed coefficients for the vectorized version.
@return none
*/
void arm_biquad_cascade_df2T_compute_coefs_f32( void arm_biquad_cascade_df2T_compute_coefs_f32(
arm_biquad_cascade_df2T_instance_f32 * S,
uint8_t numStages, uint8_t numStages,
const float32_t * pCoeffs); const float32_t * pCoeffs,
float32_t * pComputedCoeffs);
#endif #endif
/** /**
* @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter. * @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.

@ -37,78 +37,32 @@
@{ @{
*/ */
/**
@brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
@param[in,out] S points to an instance of the filter data structure.
@param[in] numStages number of 2nd order stages in the filter.
@param[in] pCoeffs points to the filter coefficients.
@param[in] pState points to the state buffer.
@return none
@par Coefficient and State Ordering
The coefficients are stored in the array <code>pCoeffs</code> in the following order
in the not Neon version.
<pre>
{b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
</pre>
@par
where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
<code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
For Neon version, this array is bigger. If numstages = 4x + y, then the array has size:
32*x + 5*y
and it must be initialized using the function
arm_biquad_cascade_df2T_compute_coefs_f32 which is taking the
standard array coefficient as parameters.
But, an array of 8*numstages is a good approximation.
Then, the initialization can be done with:
<pre>
arm_biquad_cascade_df2T_init_f32(&SNeon, nbCascade, neonCoefs, stateNeon);
arm_biquad_cascade_df2T_compute_coefs_f32(&SNeon,nbCascade,coefs);
</pre>
@par In this example, neonCoefs is a bigger array of size 8 * numStages.
coefs is the standard array:
<pre>
{b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
</pre>
@par
The <code>pState</code> is a pointer to state array.
Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code>.
The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
The state array has a total length of <code>2*numStages</code> values.
The state variables are updated after each block of data is processed; the coefficients are untouched.
*/
#if defined(ARM_MATH_NEON) #if defined(ARM_MATH_NEON)
/* /**
@brief Compute new coefficient arrays for use in vectorized filter (Neon only).
@param[in] numStages number of 2nd order stages in the filter.
@param[in] pCoeffs points to the original filter coefficients.
@param[in] pComputedCoeffs points to the new computed coefficients for the vectorized Neon version.
@return none
@par Size of coefficient arrays:
pCoeffs has size 5 * numStages
Must be called after initializing the biquad instance. pComputedCoeffs has size 8 * numStages
pCoeffs has size 5 * nbCascade
Whereas the pCoeffs for the init has size (4*4 + 4*4)* nbCascade
So this pCoeffs is the one which would be used for the not Neon version. pComputedCoeffs is the array to be used in arm_biquad_cascade_df2T_init_f32.
The pCoeffs passed in init is bigger than the one for the not Neon version.
*/ */
void arm_biquad_cascade_df2T_compute_coefs_f32( void arm_biquad_cascade_df2T_compute_coefs_f32(
arm_biquad_cascade_df2T_instance_f32 * S,
uint8_t numStages, uint8_t numStages,
const float32_t * pCoeffs) const float32_t * pCoeffs,
float32_t * pComputedCoeffs)
{ {
uint8_t cnt; uint8_t cnt;
float32_t *pDstCoeffs;
float32_t b0[4],b1[4],b2[4],a1[4],a2[4]; float32_t b0[4],b1[4],b2[4],a1[4],a2[4];
pDstCoeffs = (float32_t*)S->pCoeffs;
cnt = numStages >> 2; cnt = numStages >> 2;
while(cnt > 0) while(cnt > 0)
{ {
@ -123,52 +77,52 @@ void arm_biquad_cascade_df2T_compute_coefs_f32(
} }
/* Vec 1 */ /* Vec 1 */
*pDstCoeffs++ = 0; *pComputedCoeffs++ = 0;
*pDstCoeffs++ = b0[1]; *pComputedCoeffs++ = b0[1];
*pDstCoeffs++ = b0[2]; *pComputedCoeffs++ = b0[2];
*pDstCoeffs++ = b0[3]; *pComputedCoeffs++ = b0[3];
/* Vec 2 */ /* Vec 2 */
*pDstCoeffs++ = 0; *pComputedCoeffs++ = 0;
*pDstCoeffs++ = 0; *pComputedCoeffs++ = 0;
*pDstCoeffs++ = b0[1] * b0[2]; *pComputedCoeffs++ = b0[1] * b0[2];
*pDstCoeffs++ = b0[2] * b0[3]; *pComputedCoeffs++ = b0[2] * b0[3];
/* Vec 3 */ /* Vec 3 */
*pDstCoeffs++ = 0; *pComputedCoeffs++ = 0;
*pDstCoeffs++ = 0; *pComputedCoeffs++ = 0;
*pDstCoeffs++ = 0; *pComputedCoeffs++ = 0;
*pDstCoeffs++ = b0[1] * b0[2] * b0[3]; *pComputedCoeffs++ = b0[1] * b0[2] * b0[3];
/* Vec 4 */ /* Vec 4 */
*pDstCoeffs++ = b0[0]; *pComputedCoeffs++ = b0[0];
*pDstCoeffs++ = b0[0] * b0[1]; *pComputedCoeffs++ = b0[0] * b0[1];
*pDstCoeffs++ = b0[0] * b0[1] * b0[2]; *pComputedCoeffs++ = b0[0] * b0[1] * b0[2];
*pDstCoeffs++ = b0[0] * b0[1] * b0[2] * b0[3]; *pComputedCoeffs++ = b0[0] * b0[1] * b0[2] * b0[3];
/* Vec 5 */ /* Vec 5 */
*pDstCoeffs++ = b1[0]; *pComputedCoeffs++ = b1[0];
*pDstCoeffs++ = b1[1]; *pComputedCoeffs++ = b1[1];
*pDstCoeffs++ = b1[2]; *pComputedCoeffs++ = b1[2];
*pDstCoeffs++ = b1[3]; *pComputedCoeffs++ = b1[3];
/* Vec 6 */ /* Vec 6 */
*pDstCoeffs++ = b2[0]; *pComputedCoeffs++ = b2[0];
*pDstCoeffs++ = b2[1]; *pComputedCoeffs++ = b2[1];
*pDstCoeffs++ = b2[2]; *pComputedCoeffs++ = b2[2];
*pDstCoeffs++ = b2[3]; *pComputedCoeffs++ = b2[3];
/* Vec 7 */ /* Vec 7 */
*pDstCoeffs++ = a1[0]; *pComputedCoeffs++ = a1[0];
*pDstCoeffs++ = a1[1]; *pComputedCoeffs++ = a1[1];
*pDstCoeffs++ = a1[2]; *pComputedCoeffs++ = a1[2];
*pDstCoeffs++ = a1[3]; *pComputedCoeffs++ = a1[3];
/* Vec 8 */ /* Vec 8 */
*pDstCoeffs++ = a2[0]; *pComputedCoeffs++ = a2[0];
*pDstCoeffs++ = a2[1]; *pComputedCoeffs++ = a2[1];
*pDstCoeffs++ = a2[2]; *pComputedCoeffs++ = a2[2];
*pDstCoeffs++ = a2[3]; *pComputedCoeffs++ = a2[3];
cnt--; cnt--;
} }
@ -176,17 +130,66 @@ void arm_biquad_cascade_df2T_compute_coefs_f32(
cnt = numStages & 0x3; cnt = numStages & 0x3;
while(cnt > 0) while(cnt > 0)
{ {
*pDstCoeffs++ = *pCoeffs++; *pComputedCoeffs++ = *pCoeffs++;
*pDstCoeffs++ = *pCoeffs++; *pComputedCoeffs++ = *pCoeffs++;
*pDstCoeffs++ = *pCoeffs++; *pComputedCoeffs++ = *pCoeffs++;
*pDstCoeffs++ = *pCoeffs++; *pComputedCoeffs++ = *pCoeffs++;
*pDstCoeffs++ = *pCoeffs++; *pComputedCoeffs++ = *pCoeffs++;
cnt--; cnt--;
} }
} }
#endif #endif
/**
@brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
@param[in,out] S points to an instance of the filter data structure.
@param[in] numStages number of 2nd order stages in the filter.
@param[in] pCoeffs points to the filter coefficients.
@param[in] pState points to the state buffer.
@return none
@par Coefficient and State Ordering
The coefficients are stored in the array <code>pCoeffs</code> in the following order
in the not Neon version.
<pre>
{b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
</pre>
@par
where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
<code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
For Neon version, this array is bigger. If numstages = 4x + y, then the array has size:
32*x + 5*y
and it must be initialized using the function
arm_biquad_cascade_df2T_compute_coefs_f32 which is taking the
standard array coefficient as parameters.
But, an array of 8*numstages is a good approximation.
Then, the initialization can be done with:
<pre>
arm_biquad_cascade_df2T_compute_coefs_f32(nbCascade,coefs,computedCoefs);
arm_biquad_cascade_df2T_init_f32(&SNeon, nbCascade, computedCoefs, stateNeon);
</pre>
@par In this example, computedCoefs is a bigger array of size 8 * numStages.
coefs is the standard array:
<pre>
{b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
</pre>
@par
The <code>pState</code> is a pointer to state array.
Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code>.
The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
The state array has a total length of <code>2*numStages</code> values.
The state variables are updated after each block of data is processed; the coefficients are untouched.
*/
void arm_biquad_cascade_df2T_init_f32( void arm_biquad_cascade_df2T_init_f32(
arm_biquad_cascade_df2T_instance_f32 * S, arm_biquad_cascade_df2T_instance_f32 * S,
uint8_t numStages, uint8_t numStages,

@ -28,6 +28,10 @@
#include "dsp/matrix_functions.h" #include "dsp/matrix_functions.h"
#if defined(ARM_MATH_NEON)
#define GROUPOFROWS 8
#endif
/** /**
* @ingroup groupMatrix * @ingroup groupMatrix
*/ */
@ -54,14 +58,7 @@
* @{ * @{
*/ */
/**
* @brief Floating-point matrix multiplication.
* @param[in] *pSrcA points to the first input matrix structure
* @param[in] *pSrcB points to the second input matrix structure
* @param[out] *pDst points to output matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
@ -258,6 +255,14 @@ __STATIC_INLINE arm_status arm_mat_mult_f32_4x4_mve(
} }
/**
* @brief Floating-point matrix multiplication.
* @param[in] *pSrcA points to the first input matrix structure
* @param[in] *pSrcB points to the second input matrix structure
* @param[out] *pDst points to output matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/
arm_status arm_mat_mult_f32( arm_status arm_mat_mult_f32(
const arm_matrix_instance_f32 * pSrcA, const arm_matrix_instance_f32 * pSrcA,
const arm_matrix_instance_f32 * pSrcB, const arm_matrix_instance_f32 * pSrcB,
@ -512,9 +517,14 @@ arm_status arm_mat_mult_f32(
#else #else
#if defined(ARM_MATH_NEON) #if defined(ARM_MATH_NEON)
/**
#define GROUPOFROWS 8 * @brief Floating-point matrix multiplication.
* @param[in] *pSrcA points to the first input matrix structure
* @param[in] *pSrcB points to the second input matrix structure
* @param[out] *pDst points to output matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/
arm_status arm_mat_mult_f32( arm_status arm_mat_mult_f32(
const arm_matrix_instance_f32 * pSrcA, const arm_matrix_instance_f32 * pSrcA,
const arm_matrix_instance_f32 * pSrcB, const arm_matrix_instance_f32 * pSrcB,
@ -843,6 +853,14 @@ arm_status arm_mat_mult_f32(
return (status); return (status);
} }
#else #else
/**
* @brief Floating-point matrix multiplication.
* @param[in] *pSrcA points to the first input matrix structure
* @param[in] *pSrcB points to the second input matrix structure
* @param[out] *pDst points to output matrix structure
* @return The function returns either
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/
arm_status arm_mat_mult_f32( arm_status arm_mat_mult_f32(
const arm_matrix_instance_f32 * pSrcA, const arm_matrix_instance_f32 * pSrcA,
const arm_matrix_instance_f32 * pSrcB, const arm_matrix_instance_f32 * pSrcB,

@ -33,8 +33,28 @@
#include <limits.h> #include <limits.h>
#include <math.h> #include <math.h>
#if !defined(ARM_MATH_MVE_FLOAT16) || defined(ARM_MATH_AUTOVECTORIZE)
/*
_Float16 is not supported in g++ so we avoid putting _Float16 definitions
in the public headers.
This function should at some point be moved in FastMath.
*/
__STATIC_INLINE float16_t arm_exponent_f16(float16_t x, int32_t nb)
{
float16_t r = x;
nb --;
while(nb > 0)
{
r = (_Float16)r * (_Float16)x;
nb--;
}
return(r);
}
#endif
/** /**
* @addtogroup polysvm * @addtogroup polysvm
@ -42,6 +62,13 @@
*/ */
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math_f16.h"
/** /**
* @brief SVM polynomial prediction * @brief SVM polynomial prediction
* @param[in] S Pointer to an instance of the polynomial SVM structure. * @param[in] S Pointer to an instance of the polynomial SVM structure.
@ -50,12 +77,6 @@
* @return none. * @return none.
* *
*/ */
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math_f16.h"
void arm_svm_polynomial_predict_f16( void arm_svm_polynomial_predict_f16(
const arm_svm_polynomial_instance_f16 *S, const arm_svm_polynomial_instance_f16 *S,
const float16_t * in, const float16_t * in,
@ -306,26 +327,15 @@ void arm_svm_polynomial_predict_f16(
#else #else
/*
_Float16 is not supported in g++ so we avoid putting _Float16 definitions
in the public headers.
This function should at some point be moved in FastMath.
*/
__STATIC_INLINE float16_t arm_exponent_f16(float16_t x, int32_t nb)
{
float16_t r = x;
nb --;
while(nb > 0)
{
r = (_Float16)r * (_Float16)x;
nb--;
}
return(r);
}
/**
* @brief SVM polynomial prediction
* @param[in] S Pointer to an instance of the polynomial SVM structure.
* @param[in] in Pointer to input vector
* @param[out] pResult Decision value
* @return none.
*
*/
void arm_svm_polynomial_predict_f16( void arm_svm_polynomial_predict_f16(
const arm_svm_polynomial_instance_f16 *S, const arm_svm_polynomial_instance_f16 *S,
const float16_t * in, const float16_t * in,

@ -92,11 +92,8 @@ a double precision computation.
float32_t *statep = state.ptr(); float32_t *statep = state.ptr();
#if !defined(ARM_MATH_NEON)
const float32_t *coefsp = coefs.ptr(); const float32_t *coefsp = coefs.ptr();
#else
float32_t *coefsp = coefs.ptr();
#endif
const float32_t *inputp = inputs.ptr(); const float32_t *inputp = inputs.ptr();
float32_t *outp = output.ptr(); float32_t *outp = output.ptr();
@ -126,13 +123,15 @@ a double precision computation.
#else #else
float32_t *vecCoefsPtr = vecCoefs.ptr(); float32_t *vecCoefsPtr = vecCoefs.ptr();
// Those Neon coefs must be computed from original coefs
arm_biquad_cascade_df2T_compute_coefs_f32(3,coefsp,vecCoefsPtr);
arm_biquad_cascade_df2T_init_f32(&this->Sdf2T, arm_biquad_cascade_df2T_init_f32(&this->Sdf2T,
3, 3,
vecCoefsPtr, vecCoefsPtr,
statep); statep);
// Those Neon coefs must be computed from original coefs
arm_biquad_cascade_df2T_compute_coefs_f32(&this->Sdf2T,3,coefsp);
#endif #endif
/* /*
@ -290,13 +289,15 @@ a double precision computation.
#else #else
float32_t *vecCoefsPtr = vecCoefs.ptr(); float32_t *vecCoefsPtr = vecCoefs.ptr();
// Those Neon coefs must be computed from original coefs
arm_biquad_cascade_df2T_compute_coefs_f32(numStages,coefsp,vecCoefsPtr);
arm_biquad_cascade_df2T_init_f32(&this->Sdf2T, arm_biquad_cascade_df2T_init_f32(&this->Sdf2T,
numStages, numStages,
vecCoefsPtr, vecCoefsPtr,
statep); statep);
// Those Neon coefs must be computed from original coefs
arm_biquad_cascade_df2T_compute_coefs_f32(&this->Sdf2T,numStages,coefsp);
#endif #endif
coefsp += numStages * 5; coefsp += numStages * 5;

Loading…
Cancel
Save