diff --git a/Include/dsp/filtering_functions.h b/Include/dsp/filtering_functions.h
index ef7f2dd5..0d2f9ce8 100755
--- a/Include/dsp/filtering_functions.h
+++ b/Include/dsp/filtering_functions.h
@@ -1173,10 +1173,17 @@ arm_status arm_fir_decimate_init_f32(
#if defined(ARM_MATH_NEON)
+/**
+ @brief Compute new coefficient arrays for use in vectorized filter (Neon only).
+ @param[in] numStages number of 2nd order stages in the filter.
+ @param[in] pCoeffs points to the original filter coefficients.
+ @param[in] pComputedCoeffs points to the new computed coefficients for the vectorized version.
+ @return none
+*/
void arm_biquad_cascade_df2T_compute_coefs_f32(
- arm_biquad_cascade_df2T_instance_f32 * S,
uint8_t numStages,
- const float32_t * pCoeffs);
+ const float32_t * pCoeffs,
+ float32_t * pComputedCoeffs);
#endif
/**
* @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
diff --git a/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c b/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c
index 27d1eef8..e3b350c3 100644
--- a/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c
@@ -37,78 +37,32 @@
@{
*/
-/**
- @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
- @param[in,out] S points to an instance of the filter data structure.
- @param[in] numStages number of 2nd order stages in the filter.
- @param[in] pCoeffs points to the filter coefficients.
- @param[in] pState points to the state buffer.
- @return none
- @par Coefficient and State Ordering
- The coefficients are stored in the array pCoeffs in the following order
- in the not Neon version.
-
- {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
-
-
- @par
- where b1x and a1x are the coefficients for the first stage,
- b2x and a2x are the coefficients for the second stage,
- and so on. The pCoeffs array contains a total of 5*numStages values.
-
- For Neon version, this array is bigger. If numstages = 4x + y, then the array has size:
- 32*x + 5*y
- and it must be initialized using the function
- arm_biquad_cascade_df2T_compute_coefs_f32 which is taking the
- standard array coefficient as parameters.
-
- But, an array of 8*numstages is a good approximation.
-
- Then, the initialization can be done with:
- - arm_biquad_cascade_df2T_init_f32(&SNeon, nbCascade, neonCoefs, stateNeon); - arm_biquad_cascade_df2T_compute_coefs_f32(&SNeon,nbCascade,coefs); -- - @par In this example, neonCoefs is a bigger array of size 8 * numStages. - coefs is the standard array: - -
- {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
-
-
-
- @par
- The pState is a pointer to state array.
- Each Biquad stage has 2 state variables d1, and d2.
- The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
- The state array has a total length of 2*numStages values.
- The state variables are updated after each block of data is processed; the coefficients are untouched.
- */
#if defined(ARM_MATH_NEON)
-/*
+/**
+ @brief Compute new coefficient arrays for use in vectorized filter (Neon only).
+ @param[in] numStages number of 2nd order stages in the filter.
+ @param[in] pCoeffs points to the original filter coefficients.
+ @param[in] pComputedCoeffs points to the new computed coefficients for the vectorized Neon version.
+ @return none
+
+ @par Size of coefficient arrays:
+ pCoeffs has size 5 * numStages
-Must be called after initializing the biquad instance.
-pCoeffs has size 5 * nbCascade
-Whereas the pCoeffs for the init has size (4*4 + 4*4)* nbCascade
+ pComputedCoeffs has size 8 * numStages
-So this pCoeffs is the one which would be used for the not Neon version.
-The pCoeffs passed in init is bigger than the one for the not Neon version.
+ pComputedCoeffs is the array to be used in arm_biquad_cascade_df2T_init_f32.
*/
void arm_biquad_cascade_df2T_compute_coefs_f32(
- arm_biquad_cascade_df2T_instance_f32 * S,
uint8_t numStages,
- const float32_t * pCoeffs)
+ const float32_t * pCoeffs,
+ float32_t * pComputedCoeffs)
{
uint8_t cnt;
- float32_t *pDstCoeffs;
float32_t b0[4],b1[4],b2[4],a1[4],a2[4];
- pDstCoeffs = (float32_t*)S->pCoeffs;
-
cnt = numStages >> 2;
while(cnt > 0)
{
@@ -123,52 +77,52 @@ void arm_biquad_cascade_df2T_compute_coefs_f32(
}
/* Vec 1 */
- *pDstCoeffs++ = 0;
- *pDstCoeffs++ = b0[1];
- *pDstCoeffs++ = b0[2];
- *pDstCoeffs++ = b0[3];
+ *pComputedCoeffs++ = 0;
+ *pComputedCoeffs++ = b0[1];
+ *pComputedCoeffs++ = b0[2];
+ *pComputedCoeffs++ = b0[3];
/* Vec 2 */
- *pDstCoeffs++ = 0;
- *pDstCoeffs++ = 0;
- *pDstCoeffs++ = b0[1] * b0[2];
- *pDstCoeffs++ = b0[2] * b0[3];
+ *pComputedCoeffs++ = 0;
+ *pComputedCoeffs++ = 0;
+ *pComputedCoeffs++ = b0[1] * b0[2];
+ *pComputedCoeffs++ = b0[2] * b0[3];
/* Vec 3 */
- *pDstCoeffs++ = 0;
- *pDstCoeffs++ = 0;
- *pDstCoeffs++ = 0;
- *pDstCoeffs++ = b0[1] * b0[2] * b0[3];
+ *pComputedCoeffs++ = 0;
+ *pComputedCoeffs++ = 0;
+ *pComputedCoeffs++ = 0;
+ *pComputedCoeffs++ = b0[1] * b0[2] * b0[3];
/* Vec 4 */
- *pDstCoeffs++ = b0[0];
- *pDstCoeffs++ = b0[0] * b0[1];
- *pDstCoeffs++ = b0[0] * b0[1] * b0[2];
- *pDstCoeffs++ = b0[0] * b0[1] * b0[2] * b0[3];
+ *pComputedCoeffs++ = b0[0];
+ *pComputedCoeffs++ = b0[0] * b0[1];
+ *pComputedCoeffs++ = b0[0] * b0[1] * b0[2];
+ *pComputedCoeffs++ = b0[0] * b0[1] * b0[2] * b0[3];
/* Vec 5 */
- *pDstCoeffs++ = b1[0];
- *pDstCoeffs++ = b1[1];
- *pDstCoeffs++ = b1[2];
- *pDstCoeffs++ = b1[3];
+ *pComputedCoeffs++ = b1[0];
+ *pComputedCoeffs++ = b1[1];
+ *pComputedCoeffs++ = b1[2];
+ *pComputedCoeffs++ = b1[3];
/* Vec 6 */
- *pDstCoeffs++ = b2[0];
- *pDstCoeffs++ = b2[1];
- *pDstCoeffs++ = b2[2];
- *pDstCoeffs++ = b2[3];
+ *pComputedCoeffs++ = b2[0];
+ *pComputedCoeffs++ = b2[1];
+ *pComputedCoeffs++ = b2[2];
+ *pComputedCoeffs++ = b2[3];
/* Vec 7 */
- *pDstCoeffs++ = a1[0];
- *pDstCoeffs++ = a1[1];
- *pDstCoeffs++ = a1[2];
- *pDstCoeffs++ = a1[3];
+ *pComputedCoeffs++ = a1[0];
+ *pComputedCoeffs++ = a1[1];
+ *pComputedCoeffs++ = a1[2];
+ *pComputedCoeffs++ = a1[3];
/* Vec 8 */
- *pDstCoeffs++ = a2[0];
- *pDstCoeffs++ = a2[1];
- *pDstCoeffs++ = a2[2];
- *pDstCoeffs++ = a2[3];
+ *pComputedCoeffs++ = a2[0];
+ *pComputedCoeffs++ = a2[1];
+ *pComputedCoeffs++ = a2[2];
+ *pComputedCoeffs++ = a2[3];
cnt--;
}
@@ -176,17 +130,66 @@ void arm_biquad_cascade_df2T_compute_coefs_f32(
cnt = numStages & 0x3;
while(cnt > 0)
{
- *pDstCoeffs++ = *pCoeffs++;
- *pDstCoeffs++ = *pCoeffs++;
- *pDstCoeffs++ = *pCoeffs++;
- *pDstCoeffs++ = *pCoeffs++;
- *pDstCoeffs++ = *pCoeffs++;
+ *pComputedCoeffs++ = *pCoeffs++;
+ *pComputedCoeffs++ = *pCoeffs++;
+ *pComputedCoeffs++ = *pCoeffs++;
+ *pComputedCoeffs++ = *pCoeffs++;
+ *pComputedCoeffs++ = *pCoeffs++;
cnt--;
}
}
#endif
+/**
+ @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
+ @param[in,out] S points to an instance of the filter data structure.
+ @param[in] numStages number of 2nd order stages in the filter.
+ @param[in] pCoeffs points to the filter coefficients.
+ @param[in] pState points to the state buffer.
+ @return none
+
+ @par Coefficient and State Ordering
+ The coefficients are stored in the array pCoeffs in the following order
+ in the not Neon version.
+
+ {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
+
+
+ @par
+ where b1x and a1x are the coefficients for the first stage,
+ b2x and a2x are the coefficients for the second stage,
+ and so on. The pCoeffs array contains a total of 5*numStages values.
+
+ For Neon version, this array is bigger. If numstages = 4x + y, then the array has size:
+ 32*x + 5*y
+ and it must be initialized using the function
+ arm_biquad_cascade_df2T_compute_coefs_f32 which is taking the
+ standard array coefficient as parameters.
+
+ But, an array of 8*numstages is a good approximation.
+
+ Then, the initialization can be done with:
+ + arm_biquad_cascade_df2T_compute_coefs_f32(nbCascade,coefs,computedCoefs); + arm_biquad_cascade_df2T_init_f32(&SNeon, nbCascade, computedCoefs, stateNeon); ++ + @par In this example, computedCoefs is a bigger array of size 8 * numStages. + coefs is the standard array: + +
+ {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
+
+
+
+ @par
+ The pState is a pointer to state array.
+ Each Biquad stage has 2 state variables d1, and d2.
+ The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
+ The state array has a total length of 2*numStages values.
+ The state variables are updated after each block of data is processed; the coefficients are untouched.
+ */
void arm_biquad_cascade_df2T_init_f32(
arm_biquad_cascade_df2T_instance_f32 * S,
uint8_t numStages,
diff --git a/Source/MatrixFunctions/arm_mat_mult_f32.c b/Source/MatrixFunctions/arm_mat_mult_f32.c
index 54481187..d1fd9eac 100644
--- a/Source/MatrixFunctions/arm_mat_mult_f32.c
+++ b/Source/MatrixFunctions/arm_mat_mult_f32.c
@@ -28,6 +28,10 @@
#include "dsp/matrix_functions.h"
+#if defined(ARM_MATH_NEON)
+#define GROUPOFROWS 8
+#endif
+
/**
* @ingroup groupMatrix
*/
@@ -54,14 +58,7 @@
* @{
*/
-/**
- * @brief Floating-point matrix multiplication.
- * @param[in] *pSrcA points to the first input matrix structure
- * @param[in] *pSrcB points to the second input matrix structure
- * @param[out] *pDst points to output matrix structure
- * @return The function returns either
- * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.
- */
+
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
@@ -258,6 +255,14 @@ __STATIC_INLINE arm_status arm_mat_mult_f32_4x4_mve(
}
+/**
+ * @brief Floating-point matrix multiplication.
+ * @param[in] *pSrcA points to the first input matrix structure
+ * @param[in] *pSrcB points to the second input matrix structure
+ * @param[out] *pDst points to output matrix structure
+ * @return The function returns either
+ * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.
+ */
arm_status arm_mat_mult_f32(
const arm_matrix_instance_f32 * pSrcA,
const arm_matrix_instance_f32 * pSrcB,
@@ -512,9 +517,14 @@ arm_status arm_mat_mult_f32(
#else
#if defined(ARM_MATH_NEON)
-
-#define GROUPOFROWS 8
-
+/**
+ * @brief Floating-point matrix multiplication.
+ * @param[in] *pSrcA points to the first input matrix structure
+ * @param[in] *pSrcB points to the second input matrix structure
+ * @param[out] *pDst points to output matrix structure
+ * @return The function returns either
+ * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.
+ */
arm_status arm_mat_mult_f32(
const arm_matrix_instance_f32 * pSrcA,
const arm_matrix_instance_f32 * pSrcB,
@@ -843,6 +853,14 @@ arm_status arm_mat_mult_f32(
return (status);
}
#else
+/**
+ * @brief Floating-point matrix multiplication.
+ * @param[in] *pSrcA points to the first input matrix structure
+ * @param[in] *pSrcB points to the second input matrix structure
+ * @param[out] *pDst points to output matrix structure
+ * @return The function returns either
+ * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.
+ */
arm_status arm_mat_mult_f32(
const arm_matrix_instance_f32 * pSrcA,
const arm_matrix_instance_f32 * pSrcB,
diff --git a/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c b/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c
index 46bc689f..724f286d 100755
--- a/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c
+++ b/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c
@@ -33,8 +33,28 @@
#include