Updated the indentation and added the optimized var function

4 years ago · 433ecc8b22
parent c6b18e9e92
commit 433ecc8b22
26 changed files with 2557 additions and 2555 deletions
--- a/Source/BasicMathFunctions/arm_dot_prod_f64.c
+++ b/Source/BasicMathFunctions/arm_dot_prod_f64.c
@ -51,7 +51,7 @@ void arm_dot_prod_f64(
 	const float64_t * pSrcB,
 	uint32_t blockSize,
 	float64_t * result)
-    {
+{
 	uint32_t blkCnt;                               /* Loop counter */
 	float64_t sum = 0.;                            /* Temporary return variable */
 	float64x2_t sumV ;                              /* Neon buffer for sum variable */
@ -99,7 +99,7 @@ void arm_dot_prod_f64(
 	
 	/* Store result in destination buffer */
 	*result = sum;
-    }
+}
 #else
 void arm_dot_prod_f64(
 	const float64_t * pSrcA,
--- a/Source/FilteringFunctions/arm_biquad_cascade_df2T_f64.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df2T_f64.c
@ -30,7 +30,7 @@

 /**
 @ingroup groupFilters
-*/
+ */

 /**
 @defgroup BiquadCascadeDF2T Biquad Cascade IIR Filters Using a Direct Form II Transposed Structure
@ -117,7 +117,7 @@
 where <code>numStages</code> is the number of Biquad stages in the filter;
 <code>pState</code> is the address of the state buffer.
 <code>pCoeffs</code> is the address of the coefficient buffer;
-*/
+ */

 /**
 @addtogroup BiquadCascadeDF2T
@ -335,7 +335,7 @@ void arm_biquad_cascade_df2T_f64(
 			/* d1 = b1 * x[n] + a1 * y[n] + d2 */
 			/* d2 = b2 * x[n] + a2 * y[n] */
 			
-/*  1 */
+			/*  1 */
 			Xn1 = *pIn++;
 			
 			acc1 = b0 * Xn1 + d1;
@ -349,7 +349,7 @@ void arm_biquad_cascade_df2T_f64(
 			*pOut++ = acc1;
 			
 			
-/*  2 */
+			/*  2 */
 			Xn1 = *pIn++;
 			
 			acc1 = b0 * Xn1 + d1;
@ -362,7 +362,7 @@ void arm_biquad_cascade_df2T_f64(
 			
 			*pOut++ = acc1;
 			
-/*  3 */
+			/*  3 */
 			Xn1 = *pIn++;
 			
 			acc1 = b0 * Xn1 + d1;
@ -375,7 +375,7 @@ void arm_biquad_cascade_df2T_f64(
 			
 			*pOut++ = acc1;
 			
-/*  4 */
+			/*  4 */
 			Xn1 = *pIn++;
 			
 			acc1 = b0 * Xn1 + d1;
@ -388,7 +388,7 @@ void arm_biquad_cascade_df2T_f64(
 			
 			*pOut++ = acc1;
 			
-/*  5 */
+			/*  5 */
 			Xn1 = *pIn++;
 			
 			acc1 = b0 * Xn1 + d1;
@ -401,7 +401,7 @@ void arm_biquad_cascade_df2T_f64(
 			
 			*pOut++ = acc1;
 			
-/*  6 */
+			/*  6 */
 			Xn1 = *pIn++;
 			
 			acc1 = b0 * Xn1 + d1;
@ -414,7 +414,7 @@ void arm_biquad_cascade_df2T_f64(
 			
 			*pOut++ = acc1;
 			
-/*  7 */
+			/*  7 */
 			Xn1 = *pIn++;
 			
 			acc1 = b0 * Xn1 + d1;
@ -427,7 +427,7 @@ void arm_biquad_cascade_df2T_f64(
 			
 			*pOut++ = acc1;
 			
-/*  8 */
+			/*  8 */
 			Xn1 = *pIn++;
 			
 			acc1 = b0 * Xn1 + d1;
@ -440,7 +440,7 @@ void arm_biquad_cascade_df2T_f64(
 			
 			*pOut++ = acc1;
 			
-/*  9 */
+			/*  9 */
 			Xn1 = *pIn++;
 			
 			acc1 = b0 * Xn1 + d1;
@ -453,7 +453,7 @@ void arm_biquad_cascade_df2T_f64(
 			
 			*pOut++ = acc1;
 			
-/* 10 */
+			/* 10 */
 			Xn1 = *pIn++;
 			
 			acc1 = b0 * Xn1 + d1;
@ -466,7 +466,7 @@ void arm_biquad_cascade_df2T_f64(
 			
 			*pOut++ = acc1;
 			
-/* 11 */
+			/* 11 */
 			Xn1 = *pIn++;
 			
 			acc1 = b0 * Xn1 + d1;
@ -479,7 +479,7 @@ void arm_biquad_cascade_df2T_f64(
 			
 			*pOut++ = acc1;
 			
-/* 12 */
+			/* 12 */
 			Xn1 = *pIn++;
 			
 			acc1 = b0 * Xn1 + d1;
@ -492,7 +492,7 @@ void arm_biquad_cascade_df2T_f64(
 			
 			*pOut++ = acc1;
 			
-/* 13 */
+			/* 13 */
 			Xn1 = *pIn++;
 			
 			acc1 = b0 * Xn1 + d1;
@ -505,7 +505,7 @@ void arm_biquad_cascade_df2T_f64(
 			
 			*pOut++ = acc1;
 			
-/* 14 */
+			/* 14 */
 			Xn1 = *pIn++;
 			
 			acc1 = b0 * Xn1 + d1;
@ -518,7 +518,7 @@ void arm_biquad_cascade_df2T_f64(
 			
 			*pOut++ = acc1;
 			
-/* 15 */
+			/* 15 */
 			Xn1 = *pIn++;
 			
 			acc1 = b0 * Xn1 + d1;
@ -531,7 +531,7 @@ void arm_biquad_cascade_df2T_f64(
 			
 			*pOut++ = acc1;
 			
-/* 16 */
+			/* 16 */
 			Xn1 = *pIn++;
 			
 			acc1 = b0 * Xn1 + d1;
--- a/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f64.c
+++ b/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f64.c
@ -48,11 +48,11 @@
   */

 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
-  arm_status arm_mat_solve_lower_triangular_f64(
+arm_status arm_mat_solve_lower_triangular_f64(
 	const arm_matrix_instance_f64 * lt,
 	const arm_matrix_instance_f64 * a,
 	arm_matrix_instance_f64 * dst)
-  {
+{
 	arm_status status;                             /* status of matrix inverse */
 	
 	
@ -149,11 +149,11 @@
 }

 #else
-  arm_status arm_mat_solve_lower_triangular_f64(
+arm_status arm_mat_solve_lower_triangular_f64(
 	const arm_matrix_instance_f64 * lt,
 	const arm_matrix_instance_f64 * a,
 	arm_matrix_instance_f64 * dst)
-  {
+{
 	arm_status status;                             /* status of matrix inverse */
 	
 	
--- a/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f64.c
+++ b/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f64.c
@ -48,12 +48,12 @@
  */

 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
-  arm_status arm_mat_solve_upper_triangular_f64(
+arm_status arm_mat_solve_upper_triangular_f64(
 	const arm_matrix_instance_f64 * ut,
 	const arm_matrix_instance_f64 * a,
 	arm_matrix_instance_f64 * dst)
-  {
-arm_status status;                             /* status of matrix inverse */
+{
+	arm_status status;                             /* status of matrix inverse */
 	
 	
 #ifdef ARM_MATH_MATRIX_CHECK
@ -144,12 +144,12 @@ arm_status status;                             /* status of matrix inverse */
 }

 #else
-  arm_status arm_mat_solve_upper_triangular_f64(
+arm_status arm_mat_solve_upper_triangular_f64(
 	const arm_matrix_instance_f64 * ut,
 	const arm_matrix_instance_f64 * a,
 	arm_matrix_instance_f64 * dst)
-  {
-arm_status status;                             /* status of matrix inverse */
+{
+	arm_status status;                             /* status of matrix inverse */
 	
 	
 #ifdef ARM_MATH_MATRIX_CHECK
--- a/Source/StatisticsFunctions/arm_mse_f64.c
+++ b/Source/StatisticsFunctions/arm_mse_f64.c
@ -111,7 +111,7 @@ void arm_mse_f64(
 	blkCnt = blockSize;
 #endif
 #endif
-//#pragma clang loop vectorize(enable) unroll(disable)
+	//#pragma clang loop vectorize(enable) unroll(disable)
 	while (blkCnt > 0U)
 	{
 		inA = *pSrcA++;
--- a/Source/StatisticsFunctions/arm_var_f64.c
+++ b/Source/StatisticsFunctions/arm_var_f64.c
@ -49,6 +49,7 @@ void arm_var_f64(
 	uint32_t blockSize,
 	float64_t * pResult)
 {
+	
 	uint32_t blkCnt;                               /* Loop counter */
 	float64_t sum = 0.;                          /* Temporary result storage */
 	float64_t fSum = 0.;
@ -60,28 +61,29 @@ void arm_var_f64(
 		*pResult = 0;
 		return;
 	}
-
-  /* Initialize blkCnt with number of samples */
-  blkCnt = blockSize;
-
-  while (blkCnt > 0U)
+	arm_mean_f64(pInput, blockSize, &fMean);
+#if defined(ARM_MATH_NEON)
+	float64x2_t fValueV ,fsumV , pInputV , fMeanV;
+	fsumV = vdupq_n_f64(0.0f);
+	fMeanV = vdupq_n_f64(fMean);
+	blkCnt = blockSize >> 1U;
+	
+	while(blkCnt > 0U)
 	{
-    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
-
-    sum += *pInput++;
-
-    /* Decrement loop counter */
+		pInputV = vld1q_f64(pInput);
+		fValueV = vsubq_f64(pInputV, fMeanV);
+		fsumV = vmlaq_f64(fsumV, fValueV, fValueV);
+		pInput += 2 ;
 		blkCnt--;
 	}
+	fSum = vaddvq_f64(fsumV);
 	
-  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
-  fMean = sum / (float64_t) blockSize;
-
-  pInput = pSrc;
+	blkCnt = blockSize & 1 ;
 	
+#else
 	/* Initialize blkCnt with number of samples */
 	blkCnt = blockSize;
-
+#endif
 	while (blkCnt > 0U)
 	{
 		fValue = *pInput++ - fMean;