Updated the indentation and added the optimized var function

pull/39/head
Silfurion 4 years ago
parent c6b18e9e92
commit 433ecc8b22

@ -51,7 +51,7 @@ void arm_dot_prod_f64(
const float64_t * pSrcB, const float64_t * pSrcB,
uint32_t blockSize, uint32_t blockSize,
float64_t * result) float64_t * result)
{ {
uint32_t blkCnt; /* Loop counter */ uint32_t blkCnt; /* Loop counter */
float64_t sum = 0.; /* Temporary return variable */ float64_t sum = 0.; /* Temporary return variable */
float64x2_t sumV ; /* Neon buffer for sum variable */ float64x2_t sumV ; /* Neon buffer for sum variable */
@ -99,7 +99,7 @@ void arm_dot_prod_f64(
/* Store result in destination buffer */ /* Store result in destination buffer */
*result = sum; *result = sum;
} }
#else #else
void arm_dot_prod_f64( void arm_dot_prod_f64(
const float64_t * pSrcA, const float64_t * pSrcA,

@ -30,7 +30,7 @@
/** /**
@ingroup groupFilters @ingroup groupFilters
*/ */
/** /**
@defgroup BiquadCascadeDF2T Biquad Cascade IIR Filters Using a Direct Form II Transposed Structure @defgroup BiquadCascadeDF2T Biquad Cascade IIR Filters Using a Direct Form II Transposed Structure
@ -117,7 +117,7 @@
where <code>numStages</code> is the number of Biquad stages in the filter; where <code>numStages</code> is the number of Biquad stages in the filter;
<code>pState</code> is the address of the state buffer. <code>pState</code> is the address of the state buffer.
<code>pCoeffs</code> is the address of the coefficient buffer; <code>pCoeffs</code> is the address of the coefficient buffer;
*/ */
/** /**
@addtogroup BiquadCascadeDF2T @addtogroup BiquadCascadeDF2T
@ -335,7 +335,7 @@ void arm_biquad_cascade_df2T_f64(
/* d1 = b1 * x[n] + a1 * y[n] + d2 */ /* d1 = b1 * x[n] + a1 * y[n] + d2 */
/* d2 = b2 * x[n] + a2 * y[n] */ /* d2 = b2 * x[n] + a2 * y[n] */
/* 1 */ /* 1 */
Xn1 = *pIn++; Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1; acc1 = b0 * Xn1 + d1;
@ -349,7 +349,7 @@ void arm_biquad_cascade_df2T_f64(
*pOut++ = acc1; *pOut++ = acc1;
/* 2 */ /* 2 */
Xn1 = *pIn++; Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1; acc1 = b0 * Xn1 + d1;
@ -362,7 +362,7 @@ void arm_biquad_cascade_df2T_f64(
*pOut++ = acc1; *pOut++ = acc1;
/* 3 */ /* 3 */
Xn1 = *pIn++; Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1; acc1 = b0 * Xn1 + d1;
@ -375,7 +375,7 @@ void arm_biquad_cascade_df2T_f64(
*pOut++ = acc1; *pOut++ = acc1;
/* 4 */ /* 4 */
Xn1 = *pIn++; Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1; acc1 = b0 * Xn1 + d1;
@ -388,7 +388,7 @@ void arm_biquad_cascade_df2T_f64(
*pOut++ = acc1; *pOut++ = acc1;
/* 5 */ /* 5 */
Xn1 = *pIn++; Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1; acc1 = b0 * Xn1 + d1;
@ -401,7 +401,7 @@ void arm_biquad_cascade_df2T_f64(
*pOut++ = acc1; *pOut++ = acc1;
/* 6 */ /* 6 */
Xn1 = *pIn++; Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1; acc1 = b0 * Xn1 + d1;
@ -414,7 +414,7 @@ void arm_biquad_cascade_df2T_f64(
*pOut++ = acc1; *pOut++ = acc1;
/* 7 */ /* 7 */
Xn1 = *pIn++; Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1; acc1 = b0 * Xn1 + d1;
@ -427,7 +427,7 @@ void arm_biquad_cascade_df2T_f64(
*pOut++ = acc1; *pOut++ = acc1;
/* 8 */ /* 8 */
Xn1 = *pIn++; Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1; acc1 = b0 * Xn1 + d1;
@ -440,7 +440,7 @@ void arm_biquad_cascade_df2T_f64(
*pOut++ = acc1; *pOut++ = acc1;
/* 9 */ /* 9 */
Xn1 = *pIn++; Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1; acc1 = b0 * Xn1 + d1;
@ -453,7 +453,7 @@ void arm_biquad_cascade_df2T_f64(
*pOut++ = acc1; *pOut++ = acc1;
/* 10 */ /* 10 */
Xn1 = *pIn++; Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1; acc1 = b0 * Xn1 + d1;
@ -466,7 +466,7 @@ void arm_biquad_cascade_df2T_f64(
*pOut++ = acc1; *pOut++ = acc1;
/* 11 */ /* 11 */
Xn1 = *pIn++; Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1; acc1 = b0 * Xn1 + d1;
@ -479,7 +479,7 @@ void arm_biquad_cascade_df2T_f64(
*pOut++ = acc1; *pOut++ = acc1;
/* 12 */ /* 12 */
Xn1 = *pIn++; Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1; acc1 = b0 * Xn1 + d1;
@ -492,7 +492,7 @@ void arm_biquad_cascade_df2T_f64(
*pOut++ = acc1; *pOut++ = acc1;
/* 13 */ /* 13 */
Xn1 = *pIn++; Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1; acc1 = b0 * Xn1 + d1;
@ -505,7 +505,7 @@ void arm_biquad_cascade_df2T_f64(
*pOut++ = acc1; *pOut++ = acc1;
/* 14 */ /* 14 */
Xn1 = *pIn++; Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1; acc1 = b0 * Xn1 + d1;
@ -518,7 +518,7 @@ void arm_biquad_cascade_df2T_f64(
*pOut++ = acc1; *pOut++ = acc1;
/* 15 */ /* 15 */
Xn1 = *pIn++; Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1; acc1 = b0 * Xn1 + d1;
@ -531,7 +531,7 @@ void arm_biquad_cascade_df2T_f64(
*pOut++ = acc1; *pOut++ = acc1;
/* 16 */ /* 16 */
Xn1 = *pIn++; Xn1 = *pIn++;
acc1 = b0 * Xn1 + d1; acc1 = b0 * Xn1 + d1;

@ -48,11 +48,11 @@
*/ */
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_status arm_mat_solve_lower_triangular_f64( arm_status arm_mat_solve_lower_triangular_f64(
const arm_matrix_instance_f64 * lt, const arm_matrix_instance_f64 * lt,
const arm_matrix_instance_f64 * a, const arm_matrix_instance_f64 * a,
arm_matrix_instance_f64 * dst) arm_matrix_instance_f64 * dst)
{ {
arm_status status; /* status of matrix inverse */ arm_status status; /* status of matrix inverse */
@ -149,11 +149,11 @@
} }
#else #else
arm_status arm_mat_solve_lower_triangular_f64( arm_status arm_mat_solve_lower_triangular_f64(
const arm_matrix_instance_f64 * lt, const arm_matrix_instance_f64 * lt,
const arm_matrix_instance_f64 * a, const arm_matrix_instance_f64 * a,
arm_matrix_instance_f64 * dst) arm_matrix_instance_f64 * dst)
{ {
arm_status status; /* status of matrix inverse */ arm_status status; /* status of matrix inverse */

@ -48,12 +48,12 @@
*/ */
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_status arm_mat_solve_upper_triangular_f64( arm_status arm_mat_solve_upper_triangular_f64(
const arm_matrix_instance_f64 * ut, const arm_matrix_instance_f64 * ut,
const arm_matrix_instance_f64 * a, const arm_matrix_instance_f64 * a,
arm_matrix_instance_f64 * dst) arm_matrix_instance_f64 * dst)
{ {
arm_status status; /* status of matrix inverse */ arm_status status; /* status of matrix inverse */
#ifdef ARM_MATH_MATRIX_CHECK #ifdef ARM_MATH_MATRIX_CHECK
@ -144,12 +144,12 @@ arm_status status; /* status of matrix inverse */
} }
#else #else
arm_status arm_mat_solve_upper_triangular_f64( arm_status arm_mat_solve_upper_triangular_f64(
const arm_matrix_instance_f64 * ut, const arm_matrix_instance_f64 * ut,
const arm_matrix_instance_f64 * a, const arm_matrix_instance_f64 * a,
arm_matrix_instance_f64 * dst) arm_matrix_instance_f64 * dst)
{ {
arm_status status; /* status of matrix inverse */ arm_status status; /* status of matrix inverse */
#ifdef ARM_MATH_MATRIX_CHECK #ifdef ARM_MATH_MATRIX_CHECK

@ -111,7 +111,7 @@ void arm_mse_f64(
blkCnt = blockSize; blkCnt = blockSize;
#endif #endif
#endif #endif
//#pragma clang loop vectorize(enable) unroll(disable) //#pragma clang loop vectorize(enable) unroll(disable)
while (blkCnt > 0U) while (blkCnt > 0U)
{ {
inA = *pSrcA++; inA = *pSrcA++;

@ -49,6 +49,7 @@ void arm_var_f64(
uint32_t blockSize, uint32_t blockSize,
float64_t * pResult) float64_t * pResult)
{ {
uint32_t blkCnt; /* Loop counter */ uint32_t blkCnt; /* Loop counter */
float64_t sum = 0.; /* Temporary result storage */ float64_t sum = 0.; /* Temporary result storage */
float64_t fSum = 0.; float64_t fSum = 0.;
@ -60,28 +61,29 @@ void arm_var_f64(
*pResult = 0; *pResult = 0;
return; return;
} }
arm_mean_f64(pInput, blockSize, &fMean);
/* Initialize blkCnt with number of samples */ #if defined(ARM_MATH_NEON)
blkCnt = blockSize; float64x2_t fValueV ,fsumV , pInputV , fMeanV;
fsumV = vdupq_n_f64(0.0f);
while (blkCnt > 0U) fMeanV = vdupq_n_f64(fMean);
blkCnt = blockSize >> 1U;
while(blkCnt > 0U)
{ {
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ pInputV = vld1q_f64(pInput);
fValueV = vsubq_f64(pInputV, fMeanV);
sum += *pInput++; fsumV = vmlaq_f64(fsumV, fValueV, fValueV);
pInput += 2 ;
/* Decrement loop counter */
blkCnt--; blkCnt--;
} }
fSum = vaddvq_f64(fsumV);
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ blkCnt = blockSize & 1 ;
fMean = sum / (float64_t) blockSize;
pInput = pSrc;
#else
/* Initialize blkCnt with number of samples */ /* Initialize blkCnt with number of samples */
blkCnt = blockSize; blkCnt = blockSize;
#endif
while (blkCnt > 0U) while (blkCnt > 0U)
{ {
fValue = *pInput++ - fMean; fValue = *pInput++ - fMean;

Loading…
Cancel
Save