Updated the indentation and added the optimized var function

4 years ago · 433ecc8b22
parent c6b18e9e92
commit 433ecc8b22
26 changed files with 2557 additions and 2555 deletions
--- a/Source/BasicMathFunctions/arm_dot_prod_f64.c
+++ b/Source/BasicMathFunctions/arm_dot_prod_f64.c
@ -47,85 +47,85 @@
 */
 #if defined(ARM_MATH_NEON)
 void arm_dot_prod_f64(
-    const float64_t * pSrcA,
-    const float64_t * pSrcB,
-        uint32_t blockSize,
-        float64_t * result)
-    {
-    uint32_t blkCnt;                               /* Loop counter */
-    float64_t sum = 0.;                            /* Temporary return variable */
-    float64x2_t sumV ;                              /* Neon buffer for sum variable */
-    
-    /* Neon Buffer Initialisation */
-    sumV = vsetq_lane_f64(0.0f, sumV, 0);
-    sumV = vsetq_lane_f64(0.0f, sumV, 1);
-    
-    /* Neon Buffer for sources */
-    float64x2_t pSrcAV;
-    float64x2_t pSrcBV;
-
-    /* Initialize blkCnt with number of samples */
-    blkCnt = blockSize >> 1U;
-
-    while (blkCnt > 0U)
-    {
-    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
-        
-    /* Load source value in Neon Buffer */
-      pSrcAV = vld1q_f64(pSrcA);
-      pSrcBV = vld1q_f64(pSrcB);
-    /* Calculate dot product and store result in a temporary buffer. */
-      sumV = vmlaq_f64(sumV, pSrcAV, pSrcBV);
-      
-      pSrcA+=2;
-      pSrcB+=2;
-    /* Decrement loop counter */
-      blkCnt--;
-    }
-    /* Sum both 64 bits part in the float64x2 */
-    sum = vaddvq_f64(sumV);
-
-    
-    /* Tail */
-    blkCnt = blockSize & 1 ;
-
-    while(blkCnt > 0U)
-    {
-        sum += (*pSrcA++) * (*pSrcB++);
-
-        /* Decrement loop counter */
-        blkCnt--;
-    }
-
-    /* Store result in destination buffer */
-    *result = sum;
-    }
+	const float64_t * pSrcA,
+	const float64_t * pSrcB,
+	uint32_t blockSize,
+	float64_t * result)
+{
+	uint32_t blkCnt;                               /* Loop counter */
+	float64_t sum = 0.;                            /* Temporary return variable */
+	float64x2_t sumV ;                              /* Neon buffer for sum variable */
+	
+	/* Neon Buffer Initialisation */
+	sumV = vsetq_lane_f64(0.0f, sumV, 0);
+	sumV = vsetq_lane_f64(0.0f, sumV, 1);
+	
+	/* Neon Buffer for sources */
+	float64x2_t pSrcAV;
+	float64x2_t pSrcBV;
+	
+	/* Initialize blkCnt with number of samples */
+	blkCnt = blockSize >> 1U;
+	
+	while (blkCnt > 0U)
+	{
+		/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
+		
+		/* Load source value in Neon Buffer */
+		pSrcAV = vld1q_f64(pSrcA);
+		pSrcBV = vld1q_f64(pSrcB);
+		/* Calculate dot product and store result in a temporary buffer. */
+		sumV = vmlaq_f64(sumV, pSrcAV, pSrcBV);
+		
+		pSrcA+=2;
+		pSrcB+=2;
+		/* Decrement loop counter */
+		blkCnt--;
+	}
+	/* Sum both 64 bits part in the float64x2 */
+	sum = vaddvq_f64(sumV);
+	
+	
+	/* Tail */
+	blkCnt = blockSize & 1 ;
+	
+	while(blkCnt > 0U)
+	{
+		sum += (*pSrcA++) * (*pSrcB++);
+		
+		/* Decrement loop counter */
+		blkCnt--;
+	}
+	
+	/* Store result in destination buffer */
+	*result = sum;
+}
 #else
 void arm_dot_prod_f64(
-  const float64_t * pSrcA,
-  const float64_t * pSrcB,
-        uint32_t blockSize,
-        float64_t * result)
+	const float64_t * pSrcA,
+	const float64_t * pSrcB,
+	uint32_t blockSize,
+	float64_t * result)
 {
-  uint32_t blkCnt;                               /* Loop counter */
-  float64_t sum = 0.;                            /* Temporary return variable */
-
-  /* Initialize blkCnt with number of samples */
-  blkCnt = blockSize;
-
-  while (blkCnt > 0U)
-  {
-    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
-
-    /* Calculate dot product and store result in a temporary buffer. */
-    sum += (*pSrcA++) * (*pSrcB++);
-
-    /* Decrement loop counter */
-    blkCnt--;
-  }
-
-  /* Store result in destination buffer */
-  *result = sum;
+	uint32_t blkCnt;                               /* Loop counter */
+	float64_t sum = 0.;                            /* Temporary return variable */
+	
+	/* Initialize blkCnt with number of samples */
+	blkCnt = blockSize;
+	
+	while (blkCnt > 0U)
+	{
+		/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
+		
+		/* Calculate dot product and store result in a temporary buffer. */
+		sum += (*pSrcA++) * (*pSrcB++);
+		
+		/* Decrement loop counter */
+		blkCnt--;
+	}
+	
+	/* Store result in destination buffer */
+	*result = sum;
 }
 #endif

--- a/Source/DistanceFunctions/arm_chebyshev_distance_f64.c
+++ b/Source/DistanceFunctions/arm_chebyshev_distance_f64.c
@ -48,49 +48,49 @@
 */
 float64_t arm_chebyshev_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize)
 {
-
-   float64_t diff=0.,  maxVal,tmpA, tmpB;
-  uint32_t blkCnt;
-  maxVal = F64_MIN;
+	
+	float64_t diff=0.,  maxVal,tmpA, tmpB;
+	uint32_t blkCnt;
+	maxVal = F64_MIN;
 #if defined(ARM_MATH_NEON)
-  float64x2_t diffV , tmpAV , tmpBV , maxValV ;
-  maxValV = vdupq_n_f64(maxVal);
-  blkCnt = blockSize >> 1U ;
-  while(blkCnt > 0U)
-  {
-    tmpAV = vld1q_f64(pA);
-    tmpBV = vld1q_f64(pB);
-    diffV = vabsq_f64((vsubq_f64(tmpAV, tmpBV)));
-    maxValV = vmaxq_f64(maxValV, diffV);
-    pA+=2;
-    pB+=2;
-    blkCnt--;
-  }
-  maxVal =vgetq_lane_f64(maxValV, 0);
-  if(maxVal < vgetq_lane_f64(maxValV, 1))
-  {
-      maxVal = vgetq_lane_f64(maxValV, 1);
-  }
-  blkCnt = blockSize & 1;
-  
-  
+	float64x2_t diffV , tmpAV , tmpBV , maxValV ;
+	maxValV = vdupq_n_f64(maxVal);
+	blkCnt = blockSize >> 1U ;
+	while(blkCnt > 0U)
+	{
+		tmpAV = vld1q_f64(pA);
+		tmpBV = vld1q_f64(pB);
+		diffV = vabsq_f64((vsubq_f64(tmpAV, tmpBV)));
+		maxValV = vmaxq_f64(maxValV, diffV);
+		pA+=2;
+		pB+=2;
+		blkCnt--;
+	}
+	maxVal =vgetq_lane_f64(maxValV, 0);
+	if(maxVal < vgetq_lane_f64(maxValV, 1))
+	{
+		maxVal = vgetq_lane_f64(maxValV, 1);
+	}
+	blkCnt = blockSize & 1;
+	
+	
 #else
-  blkCnt = blockSize;
+	blkCnt = blockSize;
 #endif
-
-   while(blkCnt > 0)
-   {
-      tmpA = *pA++;
-      tmpB = *pB++;
-      diff = fabs(tmpA - tmpB);
-      if (diff > maxVal)
-      {
-        maxVal = diff;
-      }
-      blkCnt --;
-   }
-  
-   return(maxVal);
+	
+	while(blkCnt > 0)
+	{
+		tmpA = *pA++;
+		tmpB = *pB++;
+		diff = fabs(tmpA - tmpB);
+		if (diff > maxVal)
+		{
+			maxVal = diff;
+		}
+		blkCnt --;
+	}
+	
+	return(maxVal);
 }

 /**
--- a/Source/DistanceFunctions/arm_cityblock_distance_f64.c
+++ b/Source/DistanceFunctions/arm_cityblock_distance_f64.c
@ -47,41 +47,41 @@
 */
 float64_t arm_cityblock_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize)
 {
-   float64_t accum,tmpA, tmpB;
-  uint32_t blkCnt;
-   accum = 0.;
-  
+	float64_t accum,tmpA, tmpB;
+	uint32_t blkCnt;
+	accum = 0.;
+	
 #if defined(ARM_MATH_NEON)
-  float64x2_t tmpAV, tmpBV,accumV , subV;
-  accumV = vdupq_n_f64(0.0f);
-  blkCnt = blockSize >> 1U;
-  
-  while(blkCnt > 0U)
-  {
-    tmpAV = vld1q_f64(pA);
-    tmpBV = vld1q_f64(pB);
-    subV = vabdq_f64(tmpAV, tmpBV);
-    accumV = vaddq_f64(accumV, subV);
-    pA+=2;
-    pB+=2;
-    blkCnt--;
-  }
-  accum = vaddvq_f64(accumV);
-  blkCnt = blockSize & 1 ; 
-  
+	float64x2_t tmpAV, tmpBV,accumV , subV;
+	accumV = vdupq_n_f64(0.0f);
+	blkCnt = blockSize >> 1U;
+	
+	while(blkCnt > 0U)
+	{
+		tmpAV = vld1q_f64(pA);
+		tmpBV = vld1q_f64(pB);
+		subV = vabdq_f64(tmpAV, tmpBV);
+		accumV = vaddq_f64(accumV, subV);
+		pA+=2;
+		pB+=2;
+		blkCnt--;
+	}
+	accum = vaddvq_f64(accumV);
+	blkCnt = blockSize & 1 ; 
+	
 #else
-  blkCnt = blockSize;
+	blkCnt = blockSize;
 #endif
-   while(blkCnt > 0)
-   {
-      tmpA = *pA++;
-      tmpB = *pB++;
-      accum  += fabs(tmpA - tmpB);
-      
-      blkCnt--;
-   }
-  
-   return(accum);
+	while(blkCnt > 0)
+	{
+		tmpA = *pA++;
+		tmpB = *pB++;
+		accum  += fabs(tmpA - tmpB);
+		
+		blkCnt--;
+	}
+	
+	return(accum);
 }

 /**
--- a/Source/DistanceFunctions/arm_euclidean_distance_f64.c
+++ b/Source/DistanceFunctions/arm_euclidean_distance_f64.c
@ -49,35 +49,35 @@
 */
 float64_t arm_euclidean_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize)
 {
-   float64_t accum=0.,tmp;
-  uint32_t blkCnt;
+	float64_t accum=0.,tmp;
+	uint32_t blkCnt;
 #if defined(ARM_MATH_NEON)
-  float64x2_t accumV,tmpV , pAV ,pBV;
-  accumV = vdupq_n_f64(0.0f);
-  blkCnt = blockSize >> 1U;
-  while(blkCnt > 0U)
-  {
-    pAV = vld1q_f64(pA);
-    pBV = vld1q_f64(pB);
-    tmpV = vsubq_f64(pAV, pBV);
-    accumV = vmlaq_f64(accumV, tmpV, tmpV);
-    pA+=2;
-    pB+=2;
-    blkCnt--;
-  }
-  accum = vaddvq_f64(accumV);
-  blkCnt = blockSize & 1;
+	float64x2_t accumV,tmpV , pAV ,pBV;
+	accumV = vdupq_n_f64(0.0f);
+	blkCnt = blockSize >> 1U;
+	while(blkCnt > 0U)
+	{
+		pAV = vld1q_f64(pA);
+		pBV = vld1q_f64(pB);
+		tmpV = vsubq_f64(pAV, pBV);
+		accumV = vmlaq_f64(accumV, tmpV, tmpV);
+		pA+=2;
+		pB+=2;
+		blkCnt--;
+	}
+	accum = vaddvq_f64(accumV);
+	blkCnt = blockSize & 1;
 #else
-  blkCnt = blockSize;
+	blkCnt = blockSize;
 #endif
-   while(blkCnt > 0)
-   {
-      tmp = *pA++ - *pB++;
-      accum += SQ(tmp);
-      blkCnt --;
-   }
-   tmp = sqrt(accum);
-   return(tmp);
+	while(blkCnt > 0)
+	{
+		tmp = *pA++ - *pB++;
+		accum += SQ(tmp);
+		blkCnt --;
+	}
+	tmp = sqrt(accum);
+	return(tmp);
 }

 /**
--- a/Source/FastMathFunctions/arm_vexp_f64.c
+++ b/Source/FastMathFunctions/arm_vexp_f64.c
@ -33,44 +33,44 @@
 #endif

 void arm_vexp_f64(
-  const float64_t * pSrc,
-        float64_t * pDst,
-        uint32_t blockSize)
+	const float64_t * pSrc,
+	float64_t * pDst,
+	uint32_t blockSize)
 {
-   uint32_t blkCnt;
+	uint32_t blkCnt;
 #if defined(ARM_MATH_NEON)
- 
-  
-  float64x2_t src;
-  float64x2_t dst;
-
-  blkCnt = blockSize >> 1U;
-
-  while (blkCnt > 0U)
-  {
-     src = vld1q_f64(pSrc);
-     dst = vexpq_f64(src);
-     vst1q_f64(pDst, dst);
-
-     pSrc += 2;
-     pDst += 2;
-
-     blkCnt--;
-  }
-  
-  blkCnt = blockSize & 1;
+	
+	
+	float64x2_t src;
+	float64x2_t dst;
+	
+	blkCnt = blockSize >> 1U;
+	
+	while (blkCnt > 0U)
+	{
+		src = vld1q_f64(pSrc);
+		dst = vexpq_f64(src);
+		vst1q_f64(pDst, dst);
+		
+		pSrc += 2;
+		pDst += 2;
+		
+		blkCnt--;
+	}
+	
+	blkCnt = blockSize & 1;
 #else
-   blkCnt = blockSize;
+	blkCnt = blockSize;
 #endif
-   while (blkCnt > 0U)
-   {
-      /* C = log(A) */
-     
-  
-      /* Calculate log and store result in destination buffer. */
-      *pDst++ = exp(*pSrc++);
-  
-      /* Decrement loop counter */
-      blkCnt--;
-   }
+	while (blkCnt > 0U)
+	{
+		/* C = log(A) */
+		
+		
+		/* Calculate log and store result in destination buffer. */
+		*pDst++ = exp(*pSrc++);
+		
+		/* Decrement loop counter */
+		blkCnt--;
+	}
 }
--- a/Source/FastMathFunctions/arm_vlog_f64.c
+++ b/Source/FastMathFunctions/arm_vlog_f64.c
@ -34,43 +34,43 @@
 #endif

 void arm_vlog_f64(
-  const float64_t * pSrc,
-        float64_t * pDst,
-        uint32_t blockSize)
+	const float64_t * pSrc,
+	float64_t * pDst,
+	uint32_t blockSize)
 {
-   uint32_t blkCnt; 
+	uint32_t blkCnt;
 #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
-   float64x2_t src;
-   float64x2_t dst;
-
-   blkCnt = blockSize >> 1U;
-
-   while (blkCnt > 0U)
-   {
-      src = vld1q_f64(pSrc);
-      dst = vlogq_f64(src);
-      vst1q_f64(pDst, dst);
-
-      pSrc += 2;
-      pDst += 2;
-      /* Decrement loop counter */
-      blkCnt--;
-   }
-
-   blkCnt = blockSize & 1;
+	float64x2_t src;
+	float64x2_t dst;
+	
+	blkCnt = blockSize >> 1U;
+	
+	while (blkCnt > 0U)
+	{
+		src = vld1q_f64(pSrc);
+		dst = vlogq_f64(src);
+		vst1q_f64(pDst, dst);
+		
+		pSrc += 2;
+		pDst += 2;
+		/* Decrement loop counter */
+		blkCnt--;
+	}
+	
+	blkCnt = blockSize & 1;
 #else
-   blkCnt = blockSize;
+	blkCnt = blockSize;
 #endif
-
-
-   while (blkCnt > 0U)
-   {
-      /* C = log(A) */
-  
-      /* Calculate log and store result in destination buffer. */
-      *pDst++ = log(*pSrc++);
-  
-      /* Decrement loop counter */
-      blkCnt--;
-   }
+	
+	
+	while (blkCnt > 0U)
+	{
+		/* C = log(A) */
+		
+		/* Calculate log and store result in destination buffer. */
+		*pDst++ = log(*pSrc++);
+		
+		/* Decrement loop counter */
+		blkCnt--;
+	}
 }
--- a/Source/FilteringFunctions/arm_biquad_cascade_df2T_f64.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df2T_f64.c
--- a/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f64.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f64.c
@ -63,22 +63,22 @@
 */

 void arm_biquad_cascade_df2T_init_f64(
-        arm_biquad_cascade_df2T_instance_f64 * S,
-        uint8_t numStages,
-        const float64_t * pCoeffs,
-        float64_t * pState)
+	arm_biquad_cascade_df2T_instance_f64 * S,
+	uint8_t numStages,
+	const float64_t * pCoeffs,
+	float64_t * pState)
 {
-  /* Assign filter stages */
-  S->numStages = numStages;
-
-  /* Assign coefficient pointer */
-  S->pCoeffs = pCoeffs;
-
-  /* Clear state buffer and size is always 2 * numStages */
-  memset(pState, 0, (2U * (uint32_t) numStages) * sizeof(float64_t));
-
-  /* Assign state pointer */
-  S->pState = pState;
+	/* Assign filter stages */
+	S->numStages = numStages;
+	
+	/* Assign coefficient pointer */
+	S->pCoeffs = pCoeffs;
+	
+	/* Clear state buffer and size is always 2 * numStages */
+	memset(pState, 0, (2U * (uint32_t) numStages) * sizeof(float64_t));
+	
+	/* Assign state pointer */
+	S->pState = pState;
 }

 /**
--- a/Source/FilteringFunctions/arm_correlate_f64.c
+++ b/Source/FilteringFunctions/arm_correlate_f64.c
@ -48,384 +48,384 @@
 */

 void arm_correlate_f64(
-  const float64_t * pSrcA,
-        uint32_t srcALen,
-  const float64_t * pSrcB,
-        uint32_t srcBLen,
-        float64_t * pDst)
+	const float64_t * pSrcA,
+	uint32_t srcALen,
+	const float64_t * pSrcB,
+	uint32_t srcBLen,
+	float64_t * pDst)
 {
-  const float64_t *pIn1;                               /* InputA pointer */
-  const float64_t *pIn2;                               /* InputB pointer */
-        float64_t *pOut = pDst;                        /* Output pointer */
-  const float64_t *px;                                 /* Intermediate inputA pointer */
-  const float64_t *py;                                 /* Intermediate inputB pointer */
-  const float64_t *pSrc1;
-        float64_t sum;
-        uint32_t blockSize1, blockSize2, blockSize3;   /* Loop counters */
-        uint32_t j, k, count, blkCnt;                  /* Loop counters */
-        uint32_t outBlockSize;                         /* Loop counter */
-        int32_t inc = 1;                               /* Destination address modifier */
+	const float64_t *pIn1;                               /* InputA pointer */
+	const float64_t *pIn2;                               /* InputB pointer */
+	float64_t *pOut = pDst;                        /* Output pointer */
+	const float64_t *px;                                 /* Intermediate inputA pointer */
+	const float64_t *py;                                 /* Intermediate inputB pointer */
+	const float64_t *pSrc1;
+	float64_t sum;
+	uint32_t blockSize1, blockSize2, blockSize3;   /* Loop counters */
+	uint32_t j, k, count, blkCnt;                  /* Loop counters */
+	uint32_t outBlockSize;                         /* Loop counter */
+	int32_t inc = 1;                               /* Destination address modifier */
 #if defined(ARM_MATH_NEON)
-  float64x2_t sumV,pxV,pyV ;
+	float64x2_t sumV,pxV,pyV ;
 #endif
-
-  /* The algorithm implementation is based on the lengths of the inputs. */
-  /* srcB is always made to slide across srcA. */
-  /* So srcBLen is always considered as shorter or equal to srcALen */
-  /* But CORR(x, y) is reverse of CORR(y, x) */
-  /* So, when srcBLen > srcALen, output pointer is made to point to the end of the output buffer */
-  /* and the destination pointer modifier, inc is set to -1 */
-  /* If srcALen > srcBLen, zero pad has to be done to srcB to make the two inputs of same length */
-  /* But to improve the performance,
-   * we assume zeroes in the output instead of zero padding either of the the inputs*/
-  /* If srcALen > srcBLen,
-   * (srcALen - srcBLen) zeroes has to included in the starting of the output buffer */
-  /* If srcALen < srcBLen,
-   * (srcALen - srcBLen) zeroes has to included in the ending of the output buffer */
-  if (srcALen >= srcBLen)
-  {
-    /* Initialization of inputA pointer */
-    pIn1 = pSrcA;
-
-    /* Initialization of inputB pointer */
-    pIn2 = pSrcB;
-
-    /* Number of output samples is calculated */
-    outBlockSize = (2U * srcALen) - 1U;
-
-    /* When srcALen > srcBLen, zero padding has to be done to srcB
-     * to make their lengths equal.
-     * Instead, (outBlockSize - (srcALen + srcBLen - 1))
-     * number of output samples are made zero */
-    j = outBlockSize - (srcALen + (srcBLen - 1U));
-
-    /* Updating the pointer position to non zero value */
-    pOut += j;
-  }
-  else
-  {
-    /* Initialization of inputA pointer */
-    pIn1 = pSrcB;
-
-    /* Initialization of inputB pointer */
-    pIn2 = pSrcA;
-
-    /* srcBLen is always considered as shorter or equal to srcALen */
-    j = srcBLen;
-    srcBLen = srcALen;
-    srcALen = j;
-
-    /* CORR(x, y) = Reverse order(CORR(y, x)) */
-    /* Hence set the destination pointer to point to the last output sample */
-    pOut = pDst + ((srcALen + srcBLen) - 2U);
-
-    /* Destination address modifier is set to -1 */
-    inc = -1;
-  }
-
-  /* The function is internally
-   * divided into three stages according to the number of multiplications that has to be
-   * taken place between inputA samples and inputB samples. In the first stage of the
-   * algorithm, the multiplications increase by one for every iteration.
-   * In the second stage of the algorithm, srcBLen number of multiplications are done.
-   * In the third stage of the algorithm, the multiplications decrease by one
-   * for every iteration. */
-
-  /* The algorithm is implemented in three stages.
-     The loop counters of each stage is initiated here. */
-  blockSize1 = srcBLen - 1U;
-  blockSize2 = srcALen - (srcBLen - 1U);
-  blockSize3 = blockSize1;
-
-  /* --------------------------
-   * Initializations of stage1
-   * -------------------------*/
-
-  /* sum = x[0] * y[srcBlen - 1]
-   * sum = x[0] * y[srcBlen-2] + x[1] * y[srcBlen - 1]
-   * ....
-   * sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen - 1] * y[srcBLen - 1]
-   */
-
-  /* In this stage the MAC operations are increased by 1 for every iteration.
-     The count variable holds the number of MAC operations performed */
-  count = 1U;
-
-  /* Working pointer of inputA */
-  px = pIn1;
-
-  /* Working pointer of inputB */
-  pSrc1 = pIn2 + (srcBLen - 1U);
-  py = pSrc1;
-
-  /* ------------------------
-   * Stage1 process
-   * ----------------------*/
-
-  /* The first stage starts here */
-  while (blockSize1 > 0U)
-  {
-    /* Accumulator is made zero for every iteration */
-    sum = 0.;
+	
+	/* The algorithm implementation is based on the lengths of the inputs. */
+	/* srcB is always made to slide across srcA. */
+	/* So srcBLen is always considered as shorter or equal to srcALen */
+	/* But CORR(x, y) is reverse of CORR(y, x) */
+	/* So, when srcBLen > srcALen, output pointer is made to point to the end of the output buffer */
+	/* and the destination pointer modifier, inc is set to -1 */
+	/* If srcALen > srcBLen, zero pad has to be done to srcB to make the two inputs of same length */
+	/* But to improve the performance,
+	 * we assume zeroes in the output instead of zero padding either of the the inputs*/
+	/* If srcALen > srcBLen,
+	 * (srcALen - srcBLen) zeroes has to included in the starting of the output buffer */
+	/* If srcALen < srcBLen,
+	 * (srcALen - srcBLen) zeroes has to included in the ending of the output buffer */
+	if (srcALen >= srcBLen)
+	{
+		/* Initialization of inputA pointer */
+		pIn1 = pSrcA;
+		
+		/* Initialization of inputB pointer */
+		pIn2 = pSrcB;
+		
+		/* Number of output samples is calculated */
+		outBlockSize = (2U * srcALen) - 1U;
+		
+		/* When srcALen > srcBLen, zero padding has to be done to srcB
+		 * to make their lengths equal.
+		 * Instead, (outBlockSize - (srcALen + srcBLen - 1))
+		 * number of output samples are made zero */
+		j = outBlockSize - (srcALen + (srcBLen - 1U));
+		
+		/* Updating the pointer position to non zero value */
+		pOut += j;
+	}
+	else
+	{
+		/* Initialization of inputA pointer */
+		pIn1 = pSrcB;
+		
+		/* Initialization of inputB pointer */
+		pIn2 = pSrcA;
+		
+		/* srcBLen is always considered as shorter or equal to srcALen */
+		j = srcBLen;
+		srcBLen = srcALen;
+		srcALen = j;
+		
+		/* CORR(x, y) = Reverse order(CORR(y, x)) */
+		/* Hence set the destination pointer to point to the last output sample */
+		pOut = pDst + ((srcALen + srcBLen) - 2U);
+		
+		/* Destination address modifier is set to -1 */
+		inc = -1;
+	}
+	
+	/* The function is internally
+	 * divided into three stages according to the number of multiplications that has to be
+	 * taken place between inputA samples and inputB samples. In the first stage of the
+	 * algorithm, the multiplications increase by one for every iteration.
+	 * In the second stage of the algorithm, srcBLen number of multiplications are done.
+	 * In the third stage of the algorithm, the multiplications decrease by one
+	 * for every iteration. */
+	
+	/* The algorithm is implemented in three stages.
+	 The loop counters of each stage is initiated here. */
+	blockSize1 = srcBLen - 1U;
+	blockSize2 = srcALen - (srcBLen - 1U);
+	blockSize3 = blockSize1;
+	
+	/* --------------------------
+	 * Initializations of stage1
+	 * -------------------------*/
+	
+	/* sum = x[0] * y[srcBlen - 1]
+	 * sum = x[0] * y[srcBlen-2] + x[1] * y[srcBlen - 1]
+	 * ....
+	 * sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen - 1] * y[srcBLen - 1]
+	 */
+	
+	/* In this stage the MAC operations are increased by 1 for every iteration.
+	 The count variable holds the number of MAC operations performed */
+	count = 1U;
+	
+	/* Working pointer of inputA */
+	px = pIn1;
+	
+	/* Working pointer of inputB */
+	pSrc1 = pIn2 + (srcBLen - 1U);
+	py = pSrc1;
+	
+	/* ------------------------
+	 * Stage1 process
+	 * ----------------------*/
+	
+	/* The first stage starts here */
+	while (blockSize1 > 0U)
+	{
+		/* Accumulator is made zero for every iteration */
+		sum = 0.;
 #if defined(ARM_MATH_NEON)
-    sumV = vdupq_n_f64(0.0f);
-    k = count >> 1U ;
-    
-    while(k > 0U)
-    {
-      pxV = vld1q_f64(px);
-      pyV = vld1q_f64(py);
-      sumV = vmlaq_f64(sumV, pxV, pyV);
-      px+=2;
-      py+=2;
-      k--;
-    }
-    sum =vaddvq_f64(sumV);
-    k = count & 1 ;
+		sumV = vdupq_n_f64(0.0f);
+		k = count >> 1U ;
+		
+		while(k > 0U)
+		{
+			pxV = vld1q_f64(px);
+			pyV = vld1q_f64(py);
+			sumV = vmlaq_f64(sumV, pxV, pyV);
+			px+=2;
+			py+=2;
+			k--;
+		}
+		sum =vaddvq_f64(sumV);
+		k = count & 1 ;
 #else
-    /* Initialize k with number of samples */
-    k = count;
+		/* Initialize k with number of samples */
+		k = count;
 #endif
-    while (k > 0U)
-    {
-      /* Perform the multiply-accumulate */
-      /* x[0] * y[srcBLen - 1] */
-      sum += *px++ * *py++;
-
-      /* Decrement loop counter */
-      k--;
-    }
-
-    /* Store the result in the accumulator in the destination buffer. */
-
-    *pOut = sum;
-    /* Destination pointer is updated according to the address modifier, inc */
-    pOut += inc;
-
-    /* Update the inputA and inputB pointers for next MAC calculation */
-    py = pSrc1 - count;
-    px = pIn1;
-
-    /* Increment MAC count */
-    count++;
-
-    /* Decrement loop counter */
-    blockSize1--;
-
-  }
-
-  /* --------------------------
-   * Initializations of stage2
-   * ------------------------*/
-
-  /* sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen-1] * y[srcBLen-1]
-   * sum = x[1] * y[0] + x[2] * y[1] +...+ x[srcBLen]   * y[srcBLen-1]
-   * ....
-   * sum = x[srcALen-srcBLen-2] * y[0] + x[srcALen-srcBLen-1] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]
-   */
-
-  /* Working pointer of inputA */
-  px = pIn1;
-
-  /* Working pointer of inputB */
-  py = pIn2;
-
-  /* count is index by which the pointer pIn1 to be incremented */
-  count = 0U;
-
-  /* -------------------
-   * Stage2 process
-   * ------------------*/
-
-  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
-   * So, to loop unroll over blockSize2,
-   * srcBLen should be greater than or equal to 4 */
-  if (srcBLen >= 4U)
-  {
-    /* Initialize blkCnt with number of samples */
-    blkCnt = blockSize2;
-
-    while (blkCnt > 0U)
-    {
-      /* Accumulator is made zero for every iteration */
-      sum = 0.;
+		while (k > 0U)
+		{
+			/* Perform the multiply-accumulate */
+			/* x[0] * y[srcBLen - 1] */
+			sum += *px++ * *py++;
+			
+			/* Decrement loop counter */
+			k--;
+		}
+		
+		/* Store the result in the accumulator in the destination buffer. */
+		
+		*pOut = sum;
+		/* Destination pointer is updated according to the address modifier, inc */
+		pOut += inc;
+		
+		/* Update the inputA and inputB pointers for next MAC calculation */
+		py = pSrc1 - count;
+		px = pIn1;
+		
+		/* Increment MAC count */
+		count++;
+		
+		/* Decrement loop counter */
+		blockSize1--;
+		
+	}
+	
+	/* --------------------------
+	 * Initializations of stage2
+	 * ------------------------*/
+	
+	/* sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen-1] * y[srcBLen-1]
+	 * sum = x[1] * y[0] + x[2] * y[1] +...+ x[srcBLen]   * y[srcBLen-1]
+	 * ....
+	 * sum = x[srcALen-srcBLen-2] * y[0] + x[srcALen-srcBLen-1] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]
+	 */
+	
+	/* Working pointer of inputA */
+	px = pIn1;
+	
+	/* Working pointer of inputB */
+	py = pIn2;
+	
+	/* count is index by which the pointer pIn1 to be incremented */
+	count = 0U;
+	
+	/* -------------------
+	 * Stage2 process
+	 * ------------------*/
+	
+	/* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
+	 * So, to loop unroll over blockSize2,
+	 * srcBLen should be greater than or equal to 4 */
+	if (srcBLen >= 4U)
+	{
+		/* Initialize blkCnt with number of samples */
+		blkCnt = blockSize2;
+		
+		while (blkCnt > 0U)
+		{
+			/* Accumulator is made zero for every iteration */
+			sum = 0.;
 #if defined(ARM_MATH_NEON)
-      sumV = vdupq_n_f64(0.0f);
-      k = srcBLen >> 1U ;
-      while(k > 0U)
-      {
-        pxV = vld1q_f64(px);
-        pyV = vld1q_f64(py);
-        sumV = vmlaq_f64(sumV, pxV, pyV);
-        px+=2;
-        py+=2;
-        k--;
-      }
-      sum =vaddvq_f64(sumV);
-      k = srcBLen & 1 ;
-      
+			sumV = vdupq_n_f64(0.0f);
+			k = srcBLen >> 1U ;
+			while(k > 0U)
+			{
+				pxV = vld1q_f64(px);
+				pyV = vld1q_f64(py);
+				sumV = vmlaq_f64(sumV, pxV, pyV);
+				px+=2;
+				py+=2;
+				k--;
+			}
+			sum =vaddvq_f64(sumV);
+			k = srcBLen & 1 ;
+			
 #else
-
-      /* Initialize blkCnt with number of samples */
-      k = srcBLen;
+			
+			/* Initialize blkCnt with number of samples */
+			k = srcBLen;
 #endif
-      while (k > 0U)
-      {
-        /* Perform the multiply-accumulate */
-        sum += *px++ * *py++;
-
-        /* Decrement the loop counter */
-        k--;
-      }
-
-      /* Store the result in the accumulator in the destination buffer. */
-      *pOut = sum;
-
-      /* Destination pointer is updated according to the address modifier, inc */
-      pOut += inc;
-
-      /* Increment the pointer pIn1 index, count by 1 */
-      count++;
-
-      /* Update the inputA and inputB pointers for next MAC calculation */
-      px = pIn1 + count;
-      py = pIn2;
-
-      /* Decrement the loop counter */
-      blkCnt--;
-    }
-  }
-  else
-  {
-    /* If the srcBLen is not a multiple of 4,
-     * the blockSize2 loop cannot be unrolled by 4 */
-    blkCnt = blockSize2;
-
-    while (blkCnt > 0U)
-    {
-      /* Accumulator is made zero for every iteration */
-      sum = 0.;
+			while (k > 0U)
+			{
+				/* Perform the multiply-accumulate */
+				sum += *px++ * *py++;
+				
+				/* Decrement the loop counter */
+				k--;
+			}
+			
+			/* Store the result in the accumulator in the destination buffer. */
+			*pOut = sum;
+			
+			/* Destination pointer is updated according to the address modifier, inc */
+			pOut += inc;
+			
+			/* Increment the pointer pIn1 index, count by 1 */
+			count++;
+			
+			/* Update the inputA and inputB pointers for next MAC calculation */
+			px = pIn1 + count;
+			py = pIn2;
+			
+			/* Decrement the loop counter */
+			blkCnt--;
+		}
+	}
+	else
+	{
+		/* If the srcBLen is not a multiple of 4,
+		 * the blockSize2 loop cannot be unrolled by 4 */
+		blkCnt = blockSize2;
+		
+		while (blkCnt > 0U)
+		{
+			/* Accumulator is made zero for every iteration */
+			sum = 0.;
 #if defined(ARM_MATH_NEON)
-      sumV = vdupq_n_f64(0.0f);
-      k = srcBLen >> 1U ;
-      while(k > 0U)
-      {
-        pxV = vld1q_f64(px);
-        pyV = vld1q_f64(py);
-        sumV = vmlaq_f64(sumV, pxV, pyV);
-        px+=2;
-        py+=2;
-        k--;
-      }
-      sum =vaddvq_f64(sumV);
-      k = srcBLen & 1 ;
-      
+			sumV = vdupq_n_f64(0.0f);
+			k = srcBLen >> 1U ;
+			while(k > 0U)
+			{
+				pxV = vld1q_f64(px);
+				pyV = vld1q_f64(py);
+				sumV = vmlaq_f64(sumV, pxV, pyV);
+				px+=2;
+				py+=2;
+				k--;
+			}
+			sum =vaddvq_f64(sumV);
+			k = srcBLen & 1 ;
+			
 #else
-
-      /* Loop over srcBLen */
-      k = srcBLen;
+			
+			/* Loop over srcBLen */
+			k = srcBLen;
 #endif
-      while (k > 0U)
-      {
-        /* Perform the multiply-accumulate */
-        sum += *px++ * *py++;
-
-        /* Decrement the loop counter */
-        k--;
-      }
-
-      /* Store the result in the accumulator in the destination buffer. */
-      *pOut = sum;
-      /* Destination pointer is updated according to the address modifier, inc */
-      pOut += inc;
-
-      /* Increment the pointer pIn1 index, count by 1 */
-      count++;
-
-      /* Update the inputA and inputB pointers for next MAC calculation */
-      px = pIn1 + count;
-      py = pIn2;
-
-      /* Decrement the loop counter */
-      blkCnt--;
-    }
-  }
-
-
-  /* --------------------------
-   * Initializations of stage3
-   * -------------------------*/
-
-  /* sum += x[srcALen-srcBLen+1] * y[0] + x[srcALen-srcBLen+2] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]
-   * sum += x[srcALen-srcBLen+2] * y[0] + x[srcALen-srcBLen+3] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]
-   * ....
-   * sum +=  x[srcALen-2] * y[0] + x[srcALen-1] * y[1]
-   * sum +=  x[srcALen-1] * y[0]
-   */
-
-  /* In this stage the MAC operations are decreased by 1 for every iteration.
-     The count variable holds the number of MAC operations performed */
-  count = srcBLen - 1U;
-
-  /* Working pointer of inputA */
-  pSrc1 = pIn1 + (srcALen - (srcBLen - 1U));
-  px = pSrc1;
-
-  /* Working pointer of inputB */
-  py = pIn2;
-
-  /* -------------------
-   * Stage3 process
-   * ------------------*/
-
-  while (blockSize3 > 0U)
-  {
-    /* Accumulator is made zero for every iteration */
-    sum = 0.;
+			while (k > 0U)
+			{
+				/* Perform the multiply-accumulate */
+				sum += *px++ * *py++;
+				
+				/* Decrement the loop counter */
+				k--;
+			}
+			
+			/* Store the result in the accumulator in the destination buffer. */
+			*pOut = sum;
+			/* Destination pointer is updated according to the address modifier, inc */
+			pOut += inc;
+			
+			/* Increment the pointer pIn1 index, count by 1 */
+			count++;
+			
+			/* Update the inputA and inputB pointers for next MAC calculation */
+			px = pIn1 + count;
+			py = pIn2;
+			
+			/* Decrement the loop counter */
+			blkCnt--;
+		}
+	}
+	
+	
+	/* --------------------------
+	 * Initializations of stage3
+	 * -------------------------*/
+	
+	/* sum += x[srcALen-srcBLen+1] * y[0] + x[srcALen-srcBLen+2] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]
+	 * sum += x[srcALen-srcBLen+2] * y[0] + x[srcALen-srcBLen+3] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]
+	 * ....
+	 * sum +=  x[srcALen-2] * y[0] + x[srcALen-1] * y[1]
+	 * sum +=  x[srcALen-1] * y[0]
+	 */
+	
+	/* In this stage the MAC operations are decreased by 1 for every iteration.
+	 The count variable holds the number of MAC operations performed */
+	count = srcBLen - 1U;
+	
+	/* Working pointer of inputA */
+	pSrc1 = pIn1 + (srcALen - (srcBLen - 1U));
+	px = pSrc1;
+	
+	/* Working pointer of inputB */
+	py = pIn2;
+	
+	/* -------------------
+	 * Stage3 process
+	 * ------------------*/
+	
+	while (blockSize3 > 0U)
+	{
+		/* Accumulator is made zero for every iteration */
+		sum = 0.;
 #if defined(ARM_MATH_NEON)
-    sumV = vdupq_n_f64(0.0f);
-    k = count >> 1U ;
-    
-    while(k > 0U)
-    {
-      pxV = vld1q_f64(px);
-      pyV = vld1q_f64(py);
-      sumV = vmlaq_f64(sumV, pxV, pyV);
-      px+=2;
-      py+=2;
-      k--;
-    }
-    sum =vaddvq_f64(sumV);
-    k = count & 1 ;
+		sumV = vdupq_n_f64(0.0f);
+		k = count >> 1U ;
+		
+		while(k > 0U)
+		{
+			pxV = vld1q_f64(px);
+			pyV = vld1q_f64(py);
+			sumV = vmlaq_f64(sumV, pxV, pyV);
+			px+=2;
+			py+=2;
+			k--;
+		}
+		sum =vaddvq_f64(sumV);
+		k = count & 1 ;
 #else
-
-    /* Initialize blkCnt with number of samples */
-    k = count;
+		
+		/* Initialize blkCnt with number of samples */
+		k = count;
 #endif
-    while (k > 0U)
-    {
-      /* Perform the multiply-accumulate */
-      sum += *px++ * *py++;
-
-      /* Decrement loop counter */
-      k--;
-    }
-
-    /* Store the result in the accumulator in the destination buffer. */
-    *pOut = sum;
-    /* Destination pointer is updated according to the address modifier, inc */
-    pOut += inc;
-
-    /* Update the inputA and inputB pointers for next MAC calculation */
-    px = ++pSrc1;
-    py = pIn2;
-
-    /* Decrement MAC count */
-    count--;
-
-    /* Decrement the loop counter */
-    blockSize3--;
-  }
+		while (k > 0U)
+		{
+			/* Perform the multiply-accumulate */
+			sum += *px++ * *py++;
+			
+			/* Decrement loop counter */
+			k--;
+		}
+		
+		/* Store the result in the accumulator in the destination buffer. */
+		*pOut = sum;
+		/* Destination pointer is updated according to the address modifier, inc */
+		pOut += inc;
+		
+		/* Update the inputA and inputB pointers for next MAC calculation */
+		px = ++pSrc1;
+		py = pIn2;
+		
+		/* Decrement MAC count */
+		count--;
+		
+		/* Decrement the loop counter */
+		blockSize3--;
+	}
 }

 /**
--- a/Source/FilteringFunctions/arm_fir_f64.c
+++ b/Source/FilteringFunctions/arm_fir_f64.c
@ -47,177 +47,177 @@
 */
 #if defined(ARM_MATH_NEON)
 void arm_fir_f64(
-  const arm_fir_instance_f64 * S,
-  const float64_t * pSrc,
-        float64_t * pDst,
-        uint32_t blockSize)
+	const arm_fir_instance_f64 * S,
+	const float64_t * pSrc,
+	float64_t * pDst,
+	uint32_t blockSize)
 {
-        float64_t *pState = S->pState;                 /* State pointer */
-  const float64_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
-        float64_t *pStateCurnt;                        /* Points to the current sample of the state */
-        float64_t *px;                                 /* Temporary pointer for state buffer */
-  const float64_t *pb;                                 /* Temporary pointer for coefficient buffer */
-    float64x2_t pxV;
-    float64x2_t pbV;
-    float64x2_t acc0V;
-        float64_t acc0;                                /* Accumulator */
-        uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
-        uint32_t i, tapCnt, blkCnt;                    /* Loop counters */
-
-  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
-  /* pStateCurnt points to the location where the new input data should be written */
-  pStateCurnt = &(S->pState[(numTaps - 1U)]);
-
-  /* Initialize blkCnt with number of taps */
-  blkCnt = blockSize;
-
-  while (blkCnt > 0U)
-  {
-    /* Copy one sample at a time into state buffer */
-    *pStateCurnt++ = *pSrc++;
-
-    /* Set the accumulator to zero */
-    acc0 = 0.;
-    acc0V = vdupq_n_f64(0.0f);
-
-    /* Initialize state pointer */
-    px = pState;
-
-    /* Initialize Coefficient pointer */
-    pb = pCoeffs;
-
-    i = numTaps >> 1U;
-
-    /* Perform the multiply-accumulates */
-    while (i > 0U)
-    {
-      /* acc =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
-        pxV = vld1q_f64(px);
-        pbV = vld1q_f64(pb);
-        acc0V = vmlaq_f64(acc0V, pxV, pbV);
-        px+=2;
-        pb+=2;
-
-      i--;
-    }
-      
-      acc0 = vaddvq_f64(acc0V);
-      i = numTaps%2 ;
-    while(i >0U)
-    {
-        acc0+= *px++ * *pb++ ;
-        i--;
-    }
-
-    /* Store result in destination buffer. */
-
-    *pDst++ = acc0;
-
-    /* Advance state pointer by 1 for the next sample */
-    pState = pState + 1U;
-
-    /* Decrement loop counter */
-    blkCnt--;
-  }
-
-  /* Processing is complete.
-     Now copy the last numTaps - 1 samples to the start of the state buffer.
-     This prepares the state buffer for the next function call. */
-
-  /* Points to the start of the state buffer */
-  pStateCurnt = S->pState;
-
-  /* Initialize tapCnt with number of taps */
-  tapCnt = (numTaps - 1U);
-
-  /* Copy remaining data */
-  while (tapCnt > 0U)
-  {
-    *pStateCurnt++ = *pState++;
-
-    /* Decrement loop counter */
-    tapCnt--;
-  }
-
+	float64_t *pState = S->pState;                 /* State pointer */
+	const float64_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
+	float64_t *pStateCurnt;                        /* Points to the current sample of the state */
+	float64_t *px;                                 /* Temporary pointer for state buffer */
+	const float64_t *pb;                                 /* Temporary pointer for coefficient buffer */
+	float64x2_t pxV;
+	float64x2_t pbV;
+	float64x2_t acc0V;
+	float64_t acc0;                                /* Accumulator */
+	uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
+	uint32_t i, tapCnt, blkCnt;                    /* Loop counters */
+	
+	/* S->pState points to state array which contains previous frame (numTaps - 1) samples */
+	/* pStateCurnt points to the location where the new input data should be written */
+	pStateCurnt = &(S->pState[(numTaps - 1U)]);
+	
+	/* Initialize blkCnt with number of taps */
+	blkCnt = blockSize;
+	
+	while (blkCnt > 0U)
+	{
+		/* Copy one sample at a time into state buffer */
+		*pStateCurnt++ = *pSrc++;
+		
+		/* Set the accumulator to zero */
+		acc0 = 0.;
+		acc0V = vdupq_n_f64(0.0f);
+		
+		/* Initialize state pointer */
+		px = pState;
+		
+		/* Initialize Coefficient pointer */
+		pb = pCoeffs;
+		
+		i = numTaps >> 1U;
+		
+		/* Perform the multiply-accumulates */
+		while (i > 0U)
+		{
+			/* acc =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
+			pxV = vld1q_f64(px);
+			pbV = vld1q_f64(pb);
+			acc0V = vmlaq_f64(acc0V, pxV, pbV);
+			px+=2;
+			pb+=2;
+			
+			i--;
+		}
+		
+		acc0 = vaddvq_f64(acc0V);
+		i = numTaps%2 ;
+		while(i >0U)
+		{
+			acc0+= *px++ * *pb++ ;
+			i--;
+		}
+		
+		/* Store result in destination buffer. */
+		
+		*pDst++ = acc0;
+		
+		/* Advance state pointer by 1 for the next sample */
+		pState = pState + 1U;
+		
+		/* Decrement loop counter */
+		blkCnt--;
+	}
+	
+	/* Processing is complete.
+	 Now copy the last numTaps - 1 samples to the start of the state buffer.
+	 This prepares the state buffer for the next function call. */
+	
+	/* Points to the start of the state buffer */
+	pStateCurnt = S->pState;
+	
+	/* Initialize tapCnt with number of taps */
+	tapCnt = (numTaps - 1U);
+	
+	/* Copy remaining data */
+	while (tapCnt > 0U)
+	{
+		*pStateCurnt++ = *pState++;
+		
+		/* Decrement loop counter */
+		tapCnt--;
+	}
+	
 }

 #else
 void arm_fir_f64(
-  const arm_fir_instance_f64 * S,
-  const float64_t * pSrc,
-        float64_t * pDst,
-        uint32_t blockSize)
+	const arm_fir_instance_f64 * S,
+	const float64_t * pSrc,
+	float64_t * pDst,
+	uint32_t blockSize)
 {
-        float64_t *pState = S->pState;                 /* State pointer */
-  const float64_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
-        float64_t *pStateCurnt;                        /* Points to the current sample of the state */
-        float64_t *px;                                 /* Temporary pointer for state buffer */
-  const float64_t *pb;                                 /* Temporary pointer for coefficient buffer */
-        float64_t acc0;                                /* Accumulator */
-        uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
-        uint32_t i, tapCnt, blkCnt;                    /* Loop counters */
-
-  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
-  /* pStateCurnt points to the location where the new input data should be written */
-  pStateCurnt = &(S->pState[(numTaps - 1U)]);
-
-  /* Initialize blkCnt with number of taps */
-  blkCnt = blockSize;
-
-  while (blkCnt > 0U)
-  {
-    /* Copy one sample at a time into state buffer */
-    *pStateCurnt++ = *pSrc++;
-
-    /* Set the accumulator to zero */
-    acc0 = 0.;
-
-    /* Initialize state pointer */
-    px = pState;
-
-    /* Initialize Coefficient pointer */
-    pb = pCoeffs;
-
-    i = numTaps;
-
-    /* Perform the multiply-accumulates */
-    while (i > 0U)
-    {
-      /* acc =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
-      acc0 += *px++ * *pb++;
-
-      i--;
-    }
-
-    /* Store result in destination buffer. */
-    *pDst++ = acc0;
-
-    /* Advance state pointer by 1 for the next sample */
-    pState = pState + 1U;
-
-    /* Decrement loop counter */
-    blkCnt--;
-  }
-
-  /* Processing is complete.
-     Now copy the last numTaps - 1 samples to the start of the state buffer.
-     This prepares the state buffer for the next function call. */
-
-  /* Points to the start of the state buffer */
-  pStateCurnt = S->pState;
-
-  /* Initialize tapCnt with number of taps */
-  tapCnt = (numTaps - 1U);
-
-  /* Copy remaining data */
-  while (tapCnt > 0U)
-  {
-    *pStateCurnt++ = *pState++;
-
-    /* Decrement loop counter */
-    tapCnt--;
-  }
-
+	float64_t *pState = S->pState;                 /* State pointer */
+	const float64_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
+	float64_t *pStateCurnt;                        /* Points to the current sample of the state */
+	float64_t *px;                                 /* Temporary pointer for state buffer */
+	const float64_t *pb;                                 /* Temporary pointer for coefficient buffer */
+	float64_t acc0;                                /* Accumulator */
+	uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
+	uint32_t i, tapCnt, blkCnt;                    /* Loop counters */
+	
+	/* S->pState points to state array which contains previous frame (numTaps - 1) samples */
+	/* pStateCurnt points to the location where the new input data should be written */
+	pStateCurnt = &(S->pState[(numTaps - 1U)]);
+	
+	/* Initialize blkCnt with number of taps */
+	blkCnt = blockSize;
+	
+	while (blkCnt > 0U)
+	{
+		/* Copy one sample at a time into state buffer */
+		*pStateCurnt++ = *pSrc++;
+		
+		/* Set the accumulator to zero */
+		acc0 = 0.;
+		
+		/* Initialize state pointer */
+		px = pState;
+		
+		/* Initialize Coefficient pointer */
+		pb = pCoeffs;
+		
+		i = numTaps;
+		
+		/* Perform the multiply-accumulates */
+		while (i > 0U)
+		{
+			/* acc =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
+			acc0 += *px++ * *pb++;
+			
+			i--;
+		}
+		
+		/* Store result in destination buffer. */
+		*pDst++ = acc0;
+		
+		/* Advance state pointer by 1 for the next sample */
+		pState = pState + 1U;
+		
+		/* Decrement loop counter */
+		blkCnt--;
+	}
+	
+	/* Processing is complete.
+	 Now copy the last numTaps - 1 samples to the start of the state buffer.
+	 This prepares the state buffer for the next function call. */
+	
+	/* Points to the start of the state buffer */
+	pStateCurnt = S->pState;
+	
+	/* Initialize tapCnt with number of taps */
+	tapCnt = (numTaps - 1U);
+	
+	/* Copy remaining data */
+	while (tapCnt > 0U)
+	{
+		*pStateCurnt++ = *pState++;
+		
+		/* Decrement loop counter */
+		tapCnt--;
+	}
+	
 }
 #endif

--- a/Source/FilteringFunctions/arm_fir_init_f64.c
+++ b/Source/FilteringFunctions/arm_fir_init_f64.c
@ -61,22 +61,22 @@
 */

 void arm_fir_init_f64(
-        arm_fir_instance_f64 * S,
-        uint16_t numTaps,
-  const float64_t * pCoeffs,
-        float64_t * pState,
-        uint32_t blockSize)
+	arm_fir_instance_f64 * S,
+	uint16_t numTaps,
+	const float64_t * pCoeffs,
+	float64_t * pState,
+	uint32_t blockSize)
 {
-  /* Assign filter taps */
-  S->numTaps = numTaps;
-
-  /* Assign coefficient pointer */
-  S->pCoeffs = pCoeffs;
-
-  /* Clear state buffer. The size is always (blockSize + numTaps - 1) */
-  memset(pState, 0, (numTaps + (blockSize - 1U)) * sizeof(float64_t));
-  /* Assign state pointer */
-  S->pState = pState;
+	/* Assign filter taps */
+	S->numTaps = numTaps;
+	
+	/* Assign coefficient pointer */
+	S->pCoeffs = pCoeffs;
+	
+	/* Clear state buffer. The size is always (blockSize + numTaps - 1) */
+	memset(pState, 0, (numTaps + (blockSize - 1U)) * sizeof(float64_t));
+	/* Assign state pointer */
+	S->pState = pState;
 }

 /**
--- a/Source/MatrixFunctions/arm_mat_cholesky_f64.c
+++ b/Source/MatrixFunctions/arm_mat_cholesky_f64.c
@ -55,223 +55,223 @@
 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)

 arm_status arm_mat_cholesky_f64(
-  const arm_matrix_instance_f64 * pSrc,
-        arm_matrix_instance_f64 * pDst)
+	const arm_matrix_instance_f64 * pSrc,
+	arm_matrix_instance_f64 * pDst)
 {
-
-  arm_status status;                             /* status of matrix inverse */
-
-
+	
+	arm_status status;                             /* status of matrix inverse */
+	
+	
 #ifdef ARM_MATH_MATRIX_CHECK
-
-  /* Check for matrix mismatch condition */
-  if ((pSrc->numRows != pSrc->numCols) ||
-      (pDst->numRows != pDst->numCols) ||
-      (pSrc->numRows != pDst->numRows)   )
-  {
-    /* Set status as ARM_MATH_SIZE_MISMATCH */
-    status = ARM_MATH_SIZE_MISMATCH;
-  }
-  else
-
+	
+	/* Check for matrix mismatch condition */
+	if ((pSrc->numRows != pSrc->numCols) ||
+		(pDst->numRows != pDst->numCols) ||
+		(pSrc->numRows != pDst->numRows)   )
+	{
+		/* Set status as ARM_MATH_SIZE_MISMATCH */
+		status = ARM_MATH_SIZE_MISMATCH;
+	}
+	else
+		
 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
-
-  {
-    int i,j,k;
-    int n = pSrc->numRows;
-    float64_t invSqrtVj;
-    float64_t *pA,*pG;
-    int kCnt;
-
-
-    float64x2_t acc, acc0, acc1, acc2, acc3;
-    float64x2_t vecGi;
-    float64x2_t vecGj,vecGj0,vecGj1,vecGj2,vecGj3;
-
-    float64x2_t tmp = vdupq_n_f64(0.0f);
-
-    float64_t sum=0.0f;
-    float64_t sum0=0.0f,sum1=0.0f,sum2=0.0f,sum3=0.0f;
-
-
-    pA = pSrc->pData;
-    pG = pDst->pData;
-    
-    for(i=0 ;i < n ; i++)
-    {
-       for(j=i ; j+3 < n ; j+=4)
-       {
-          pG[(j + 0) * n + i] = pA[(j + 0) * n + i];
-          pG[(j + 1) * n + i] = pA[(j + 1) * n + i];
-          pG[(j + 2) * n + i] = pA[(j + 2) * n + i];
-          pG[(j + 3) * n + i] = pA[(j + 3) * n + i];
-
-          acc0 = vdupq_n_f64(0.0f);
-          acc1 = vdupq_n_f64(0.0f);
-          acc2 = vdupq_n_f64(0.0f);
-          acc3 = vdupq_n_f64(0.0f);
-
-          kCnt = i >> 1U;
-          k=0;
-          while(kCnt > 0)
-          {
-
-             vecGi=vld1q_f64(&pG[i * n + k]);
-             
-             vecGj0=vld1q_f64(&pG[(j + 0) * n + k]);
-             vecGj1=vld1q_f64(&pG[(j + 1) * n + k]);
-             vecGj2=vld1q_f64(&pG[(j + 2) * n + k]);
-             vecGj3=vld1q_f64(&pG[(j + 3) * n + k]);
-
-             acc0 = vfmaq_f64(acc0, vecGi, vecGj0);
-             acc1 = vfmaq_f64(acc1, vecGi, vecGj1);
-             acc2 = vfmaq_f64(acc2, vecGi, vecGj2);
-             acc3 = vfmaq_f64(acc3, vecGi, vecGj3);
-
-             kCnt--;
-             k+=2;
-          }
-
-
-         sum0 = vaddvq_f64(acc0);
-         sum1 = vaddvq_f64(acc1);
-         sum2 = vaddvq_f64(acc2);
-         sum3 = vaddvq_f64(acc3);
-
-
-          kCnt = i & 1;
-          while(kCnt > 0)
-          {
-
-             sum0 = sum0 + pG[i * n + k] * pG[(j + 0) * n + k];
-             sum1 = sum1 + pG[i * n + k] * pG[(j + 1) * n + k];
-             sum2 = sum2 + pG[i * n + k] * pG[(j + 2) * n + k];
-             sum3 = sum3 + pG[i * n + k] * pG[(j + 3) * n + k];
-             kCnt--;
-             k++;
-          }
-
-          pG[(j + 0) * n + i] -= sum0;
-          pG[(j + 1) * n + i] -= sum1;
-          pG[(j + 2) * n + i] -= sum2;
-          pG[(j + 3) * n + i] -= sum3;
-       }
-
-       for(; j < n ; j++)
-       {
-          pG[j * n + i] = pA[j * n + i];
-
-          acc = vdupq_n_f64(0.0f);
-
-          kCnt = i >> 1U;
-          k=0;
-          while(kCnt > 0)
-          {
-
-             vecGi=vld1q_f64(&pG[i * n + k]);
-             vecGj=vld1q_f64(&pG[j * n + k]);
-
-             acc = vfmaq_f64(acc, vecGi, vecGj);
-
-             kCnt--;
-             k+=2;
-          }
-
-
-         sum = vaddvq_f64(acc);
-
-          kCnt = i & 1;
-          while(kCnt > 0)
-          {
-             sum = sum + pG[i * n + k] * pG[(j + 0) * n + k];
-
-            
-             kCnt--;
-             k++;
-          }
-
-          pG[j * n + i] -= sum;
-       }
-
-       if (pG[i * n + i] <= 0.0f)
-       {
-         return(ARM_MATH_DECOMPOSITION_FAILURE);
-       }
-
-       invSqrtVj = 1.0f/sqrtf(pG[i * n + i]);
-       SCALE_COL_F64(pDst,i,invSqrtVj,i);
-    }
-
-    status = ARM_MATH_SUCCESS;
-
-  }
-
-  
-  /* Return to application */
-  return (status);
+		
+	{
+		int i,j,k;
+		int n = pSrc->numRows;
+		float64_t invSqrtVj;
+		float64_t *pA,*pG;
+		int kCnt;
+		
+		
+		float64x2_t acc, acc0, acc1, acc2, acc3;
+		float64x2_t vecGi;
+		float64x2_t vecGj,vecGj0,vecGj1,vecGj2,vecGj3;
+		
+		float64x2_t tmp = vdupq_n_f64(0.0f);
+		
+		float64_t sum=0.0f;
+		float64_t sum0=0.0f,sum1=0.0f,sum2=0.0f,sum3=0.0f;
+		
+		
+		pA = pSrc->pData;
+		pG = pDst->pData;
+		
+		for(i=0 ;i < n ; i++)
+		{
+			for(j=i ; j+3 < n ; j+=4)
+			{
+				pG[(j + 0) * n + i] = pA[(j + 0) * n + i];
+				pG[(j + 1) * n + i] = pA[(j + 1) * n + i];
+				pG[(j + 2) * n + i] = pA[(j + 2) * n + i];
+				pG[(j + 3) * n + i] = pA[(j + 3) * n + i];
+				
+				acc0 = vdupq_n_f64(0.0f);
+				acc1 = vdupq_n_f64(0.0f);
+				acc2 = vdupq_n_f64(0.0f);
+				acc3 = vdupq_n_f64(0.0f);
+				
+				kCnt = i >> 1U;
+				k=0;
+				while(kCnt > 0)
+				{
+					
+					vecGi=vld1q_f64(&pG[i * n + k]);
+					
+					vecGj0=vld1q_f64(&pG[(j + 0) * n + k]);
+					vecGj1=vld1q_f64(&pG[(j + 1) * n + k]);
+					vecGj2=vld1q_f64(&pG[(j + 2) * n + k]);
+					vecGj3=vld1q_f64(&pG[(j + 3) * n + k]);
+					
+					acc0 = vfmaq_f64(acc0, vecGi, vecGj0);
+					acc1 = vfmaq_f64(acc1, vecGi, vecGj1);
+					acc2 = vfmaq_f64(acc2, vecGi, vecGj2);
+					acc3 = vfmaq_f64(acc3, vecGi, vecGj3);
+					
+					kCnt--;
+					k+=2;
+				}
+				
+				
+				sum0 = vaddvq_f64(acc0);
+				sum1 = vaddvq_f64(acc1);
+				sum2 = vaddvq_f64(acc2);
+				sum3 = vaddvq_f64(acc3);
+				
+				
+				kCnt = i & 1;
+				while(kCnt > 0)
+				{
+					
+					sum0 = sum0 + pG[i * n + k] * pG[(j + 0) * n + k];
+					sum1 = sum1 + pG[i * n + k] * pG[(j + 1) * n + k];
+					sum2 = sum2 + pG[i * n + k] * pG[(j + 2) * n + k];
+					sum3 = sum3 + pG[i * n + k] * pG[(j + 3) * n + k];
+					kCnt--;
+					k++;
+				}
+				
+				pG[(j + 0) * n + i] -= sum0;
+				pG[(j + 1) * n + i] -= sum1;
+				pG[(j + 2) * n + i] -= sum2;
+				pG[(j + 3) * n + i] -= sum3;
+			}
+			
+			for(; j < n ; j++)
+			{
+				pG[j * n + i] = pA[j * n + i];
+				
+				acc = vdupq_n_f64(0.0f);
+				
+				kCnt = i >> 1U;
+				k=0;
+				while(kCnt > 0)
+				{
+					
+					vecGi=vld1q_f64(&pG[i * n + k]);
+					vecGj=vld1q_f64(&pG[j * n + k]);
+					
+					acc = vfmaq_f64(acc, vecGi, vecGj);
+					
+					kCnt--;
+					k+=2;
+				}
+				
+				
+				sum = vaddvq_f64(acc);
+				
+				kCnt = i & 1;
+				while(kCnt > 0)
+				{
+					sum = sum + pG[i * n + k] * pG[(j + 0) * n + k];
+					
+					
+					kCnt--;
+					k++;
+				}
+				
+				pG[j * n + i] -= sum;
+			}
+			
+			if (pG[i * n + i] <= 0.0f)
+			{
+				return(ARM_MATH_DECOMPOSITION_FAILURE);
+			}
+			
+			invSqrtVj = 1.0f/sqrtf(pG[i * n + i]);
+			SCALE_COL_F64(pDst,i,invSqrtVj,i);
+		}
+		
+		status = ARM_MATH_SUCCESS;
+		
+	}
+	
+	
+	/* Return to application */
+	return (status);
 }
 #else
 arm_status arm_mat_cholesky_f64(
-  const arm_matrix_instance_f64 * pSrc,
-        arm_matrix_instance_f64 * pDst)
+								const arm_matrix_instance_f64 * pSrc,
+								arm_matrix_instance_f64 * pDst)
 {
-
-  arm_status status;                             /* status of matrix inverse */
-
-
+	
+	arm_status status;                             /* status of matrix inverse */
+	
+	
 #ifdef ARM_MATH_MATRIX_CHECK
-
-  /* Check for matrix mismatch condition */
-  if ((pSrc->numRows != pSrc->numCols) ||
-      (pDst->numRows != pDst->numCols) ||
-      (pSrc->numRows != pDst->numRows)   )
-  {
-    /* Set status as ARM_MATH_SIZE_MISMATCH */
-    status = ARM_MATH_SIZE_MISMATCH;
-  }
-  else
-
+	
+	/* Check for matrix mismatch condition */
+	if ((pSrc->numRows != pSrc->numCols) ||
+		(pDst->numRows != pDst->numCols) ||
+		(pSrc->numRows != pDst->numRows)   )
+	{
+		/* Set status as ARM_MATH_SIZE_MISMATCH */
+		status = ARM_MATH_SIZE_MISMATCH;
+	}
+	else
+		
 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
-
-  {
-    int i,j,k;
-    int n = pSrc->numRows;
-    float64_t invSqrtVj;
-    float64_t *pA,*pG;
-
-    pA = pSrc->pData;
-    pG = pDst->pData;
-    
-
-    for(i=0 ; i < n ; i++)
-    {
-       for(j=i ; j < n ; j++)
-       {
-          pG[j * n + i] = pA[j * n + i];
-
-          for(k=0; k < i ; k++)
-          {
-             pG[j * n + i] = pG[j * n + i] - pG[i * n + k] * pG[j * n + k];
-          }
-       }
-
-       if (pG[i * n + i] <= 0.0)
-       {
-         return(ARM_MATH_DECOMPOSITION_FAILURE);
-       }
-
-       invSqrtVj = 1.0/sqrt(pG[i * n + i]);
-       SCALE_COL_F64(pDst,i,invSqrtVj,i);
-
-    }
-
-    status = ARM_MATH_SUCCESS;
-
-  }
-
-  
-  /* Return to application */
-  return (status);
+		
+	{
+		int i,j,k;
+		int n = pSrc->numRows;
+		float64_t invSqrtVj;
+		float64_t *pA,*pG;
+		
+		pA = pSrc->pData;
+		pG = pDst->pData;
+		
+		
+		for(i=0 ; i < n ; i++)
+		{
+			for(j=i ; j < n ; j++)
+			{
+				pG[j * n + i] = pA[j * n + i];
+				
+				for(k=0; k < i ; k++)
+				{
+					pG[j * n + i] = pG[j * n + i] - pG[i * n + k] * pG[j * n + k];
+				}
+			}
+			
+			if (pG[i * n + i] <= 0.0)
+			{
+				return(ARM_MATH_DECOMPOSITION_FAILURE);
+			}
+			
+			invSqrtVj = 1.0/sqrt(pG[i * n + i]);
+			SCALE_COL_F64(pDst,i,invSqrtVj,i);
+			
+		}
+		
+		status = ARM_MATH_SUCCESS;
+		
+	}
+	
+	
+	/* Return to application */
+	return (status);
 }
 #endif

--- a/Source/MatrixFunctions/arm_mat_init_f64.c
+++ b/Source/MatrixFunctions/arm_mat_init_f64.c
@ -56,19 +56,19 @@
 */

 void arm_mat_init_f64(
-  arm_matrix_instance_f64 * S,
-  uint16_t nRows,
-  uint16_t nColumns,
-  float64_t * pData)
+	arm_matrix_instance_f64 * S,
+	uint16_t nRows,
+	uint16_t nColumns,
+	float64_t * pData)
 {
-  /* Assign Number of Rows */
-  S->numRows = nRows;
-
-  /* Assign Number of Columns */
-  S->numCols = nColumns;
-
-  /* Assign Data pointer */
-  S->pData = pData;
+	/* Assign Number of Rows */
+	S->numRows = nRows;
+	
+	/* Assign Number of Columns */
+	S->numCols = nColumns;
+	
+	/* Assign Data pointer */
+	S->pData = pData;
 }

 /**
--- a/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f64.c
+++ b/Source/MatrixFunctions/arm_mat_solve_lower_triangular_f64.c
@ -48,179 +48,179 @@
   */

 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
-  arm_status arm_mat_solve_lower_triangular_f64(
-  const arm_matrix_instance_f64 * lt,
-  const arm_matrix_instance_f64 * a,
-  arm_matrix_instance_f64 * dst)
-  {
-  arm_status status;                             /* status of matrix inverse */
-
-
+arm_status arm_mat_solve_lower_triangular_f64(
+	const arm_matrix_instance_f64 * lt,
+	const arm_matrix_instance_f64 * a,
+	arm_matrix_instance_f64 * dst)
+{
+	arm_status status;                             /* status of matrix inverse */
+	
+	
 #ifdef ARM_MATH_MATRIX_CHECK
-
-  /* Check for matrix mismatch condition */
-  if ((lt->numRows != lt->numCols) ||
-      (lt->numRows != a->numRows)   )
-  {
-    /* Set status as ARM_MATH_SIZE_MISMATCH */
-    status = ARM_MATH_SIZE_MISMATCH;
-  }
-  else
-
+	
+	/* Check for matrix mismatch condition */
+	if ((lt->numRows != lt->numCols) ||
+		(lt->numRows != a->numRows)   )
+	{
+		/* Set status as ARM_MATH_SIZE_MISMATCH */
+		status = ARM_MATH_SIZE_MISMATCH;
+	}
+	else
+		
 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
-
-  {
-    /* a1 b1 c1   x1 = a1
-          b2 c2   x2   a2
-             c3   x3   a3
-
-    x3 = a3 / c3
-    x2 = (a2 - c2 x3) / b2
-
-    */
-    int i,j,k,n,cols;
-
-    n = dst->numRows;
-    cols = dst->numCols;
-
-    float64_t *pX = dst->pData;
-    float64_t *pLT = lt->pData;
-    float64_t *pA = a->pData;
-
-    float64_t *lt_row;
-    float64_t *a_col;
-
-    float64_t invLT;
-
-    float64x2_t vecA;
-    float64x2_t vecX;
-
-    for(i=0; i < n ; i++)
-    {
-
-      for(j=0; j+1 < cols; j += 2)
-      {
-            vecA = vld1q_f64(&pA[i * cols + j]);
-
-            for(k=0; k < i; k++)
-            {
-                vecX = vld1q_f64(&pX[cols*k+j]);
-                vecA = vfmsq_f64(vecA,vdupq_n_f64(pLT[n*i + k]),vecX);
-            }
-
-            if (pLT[n*i + i]==0.0f)
-            {
-              return(ARM_MATH_SINGULAR);
-            }
-
-            invLT = 1.0f / pLT[n*i + i];
-            vecA = vmulq_f64(vecA,vdupq_n_f64(invLT));
-            vst1q_f64(&pX[i*cols+j],vecA);
-
-       }
-
-       for(; j < cols; j ++)
-       {
-            a_col = &pA[j];
-            lt_row = &pLT[n*i];
-
-            float64_t tmp=a_col[i * cols];
-            
-            for(k=0; k < i; k++)
-            {
-                tmp -= lt_row[k] * pX[cols*k+j];
-            }
-
-            if (lt_row[i]==0.0f)
-            {
-              return(ARM_MATH_SINGULAR);
-            }
-            tmp = tmp / lt_row[i];
-            pX[i*cols+j] = tmp;
-        }
-
-    }
-    status = ARM_MATH_SUCCESS;
-
-  }
-
-  /* Return to application */
-  return (status);
+		
+	{
+		/* a1 b1 c1   x1 = a1
+		 b2 c2   x2   a2
+		 c3   x3   a3
+		 
+		 x3 = a3 / c3
+		 x2 = (a2 - c2 x3) / b2
+		 
+		 */
+		int i,j,k,n,cols;
+		
+		n = dst->numRows;
+		cols = dst->numCols;
+		
+		float64_t *pX = dst->pData;
+		float64_t *pLT = lt->pData;
+		float64_t *pA = a->pData;
+		
+		float64_t *lt_row;
+		float64_t *a_col;
+		
+		float64_t invLT;
+		
+		float64x2_t vecA;
+		float64x2_t vecX;
+		
+		for(i=0; i < n ; i++)
+		{
+			
+			for(j=0; j+1 < cols; j += 2)
+			{
+				vecA = vld1q_f64(&pA[i * cols + j]);
+				
+				for(k=0; k < i; k++)
+				{
+					vecX = vld1q_f64(&pX[cols*k+j]);
+					vecA = vfmsq_f64(vecA,vdupq_n_f64(pLT[n*i + k]),vecX);
+				}
+				
+				if (pLT[n*i + i]==0.0f)
+				{
+					return(ARM_MATH_SINGULAR);
+				}
+				
+				invLT = 1.0f / pLT[n*i + i];
+				vecA = vmulq_f64(vecA,vdupq_n_f64(invLT));
+				vst1q_f64(&pX[i*cols+j],vecA);
+				
+			}
+			
+			for(; j < cols; j ++)
+			{
+				a_col = &pA[j];
+				lt_row = &pLT[n*i];
+				
+				float64_t tmp=a_col[i * cols];
+				
+				for(k=0; k < i; k++)
+				{
+					tmp -= lt_row[k] * pX[cols*k+j];
+				}
+				
+				if (lt_row[i]==0.0f)
+				{
+					return(ARM_MATH_SINGULAR);
+				}
+				tmp = tmp / lt_row[i];
+				pX[i*cols+j] = tmp;
+			}
+			
+		}
+		status = ARM_MATH_SUCCESS;
+		
+	}
+	
+	/* Return to application */
+	return (status);
 }

 #else
-  arm_status arm_mat_solve_lower_triangular_f64(
-  const arm_matrix_instance_f64 * lt,
-  const arm_matrix_instance_f64 * a,
-  arm_matrix_instance_f64 * dst)
-  {
-  arm_status status;                             /* status of matrix inverse */
-
-
+arm_status arm_mat_solve_lower_triangular_f64(
+	const arm_matrix_instance_f64 * lt,
+	const arm_matrix_instance_f64 * a,
+	arm_matrix_instance_f64 * dst)
+{
+	arm_status status;                             /* status of matrix inverse */
+	
+	
 #ifdef ARM_MATH_MATRIX_CHECK
-
-  /* Check for matrix mismatch condition */
-  if ((lt->numRows != lt->numCols) ||
-      (lt->numRows != a->numRows)   )
-  {
-    /* Set status as ARM_MATH_SIZE_MISMATCH */
-    status = ARM_MATH_SIZE_MISMATCH;
-  }
-  else
-
+	
+	/* Check for matrix mismatch condition */
+	if ((lt->numRows != lt->numCols) ||
+		(lt->numRows != a->numRows)   )
+	{
+		/* Set status as ARM_MATH_SIZE_MISMATCH */
+		status = ARM_MATH_SIZE_MISMATCH;
+	}
+	else
+		
 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
-
-  {
-    /* a1 b1 c1   x1 = a1
-          b2 c2   x2   a2
-             c3   x3   a3
-
-    x3 = a3 / c3 
-    x2 = (a2 - c2 x3) / b2
-
-    */
-    int i,j,k,n,cols;
-
-    float64_t *pX = dst->pData;
-    float64_t *pLT = lt->pData;
-    float64_t *pA = a->pData;
-
-    float64_t *lt_row;
-    float64_t *a_col;
-
-    n = dst->numRows;
-    cols = dst->numCols;
-
-    for(j=0; j < cols; j ++)
-    {
-       a_col = &pA[j];
-
-       for(i=0; i < n ; i++)
-       {
-            float64_t tmp=a_col[i * cols];
-
-            lt_row = &pLT[n*i];
-
-            for(k=0; k < i; k++)
-            {
-                tmp -= lt_row[k] * pX[cols*k+j];
-            }
-
-            if (lt_row[i]==0.0)
-            {
-              return(ARM_MATH_SINGULAR);
-            }
-            tmp = tmp / lt_row[i];
-            pX[i*cols+j] = tmp;
-       }
-
-    }
-    status = ARM_MATH_SUCCESS;
-
-  }
-
-  /* Return to application */
-  return (status);
+		
+	{
+		/* a1 b1 c1   x1 = a1
+		 b2 c2   x2   a2
+		 c3   x3   a3
+		 
+		 x3 = a3 / c3
+		 x2 = (a2 - c2 x3) / b2
+		 
+		 */
+		int i,j,k,n,cols;
+		
+		float64_t *pX = dst->pData;
+		float64_t *pLT = lt->pData;
+		float64_t *pA = a->pData;
+		
+		float64_t *lt_row;
+		float64_t *a_col;
+		
+		n = dst->numRows;
+		cols = dst->numCols;
+		
+		for(j=0; j < cols; j ++)
+		{
+			a_col = &pA[j];
+			
+			for(i=0; i < n ; i++)
+			{
+				float64_t tmp=a_col[i * cols];
+				
+				lt_row = &pLT[n*i];
+				
+				for(k=0; k < i; k++)
+				{
+					tmp -= lt_row[k] * pX[cols*k+j];
+				}
+				
+				if (lt_row[i]==0.0)
+				{
+					return(ARM_MATH_SINGULAR);
+				}
+				tmp = tmp / lt_row[i];
+				pX[i*cols+j] = tmp;
+			}
+			
+		}
+		status = ARM_MATH_SUCCESS;
+		
+	}
+	
+	/* Return to application */
+	return (status);
 }
 #endif
 /**
--- a/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f64.c
+++ b/Source/MatrixFunctions/arm_mat_solve_upper_triangular_f64.c
@ -48,168 +48,168 @@
  */

 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
-  arm_status arm_mat_solve_upper_triangular_f64(
-  const arm_matrix_instance_f64 * ut,
-  const arm_matrix_instance_f64 * a,
-  arm_matrix_instance_f64 * dst)
-  {
-arm_status status;                             /* status of matrix inverse */
-
-
+arm_status arm_mat_solve_upper_triangular_f64(
+	const arm_matrix_instance_f64 * ut,
+	const arm_matrix_instance_f64 * a,
+	arm_matrix_instance_f64 * dst)
+{
+	arm_status status;                             /* status of matrix inverse */
+	
+	
 #ifdef ARM_MATH_MATRIX_CHECK
-
-  /* Check for matrix mismatch condition */
-  if ((ut->numRows != ut->numCols) ||
-      (ut->numRows != a->numRows)   )
-  {
-    /* Set status as ARM_MATH_SIZE_MISMATCH */
-    status = ARM_MATH_SIZE_MISMATCH;
-  }
-  else
-
+	
+	/* Check for matrix mismatch condition */
+	if ((ut->numRows != ut->numCols) ||
+		(ut->numRows != a->numRows)   )
+	{
+		/* Set status as ARM_MATH_SIZE_MISMATCH */
+		status = ARM_MATH_SIZE_MISMATCH;
+	}
+	else
+		
 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
-
-  {
-
-    int i,j,k,n,cols;
-
-    n = dst->numRows;
-    cols = dst->numCols;
-
-    float64_t *pX = dst->pData;
-    float64_t *pUT = ut->pData;
-    float64_t *pA = a->pData;
-
-    float64_t *ut_row;
-    float64_t *a_col;
-
-    float64_t invUT;
-
-    float64x2_t vecA;
-    float64x2_t vecX;
-    
-    for(i=n-1; i >= 0 ; i--)
-    {
-      for(j=0; j+1 < cols; j +=2)
-      {
-            vecA = vld1q_f64(&pA[i * cols + j]);
-            
-            for(k=n-1; k > i; k--)
-            {
-                vecX = vld1q_f64(&pX[cols*k+j]);
-                vecA = vfmsq_f64(vecA,vdupq_n_f64(pUT[n*i + k]),vecX);
-            }
-
-            if (pUT[n*i + i]==0.0f)
-            {
-              return(ARM_MATH_SINGULAR);
-            }
-
-            invUT = 1.0f / pUT[n*i + i];
-            vecA = vmulq_f64(vecA,vdupq_n_f64(invUT));
-           
-
-            vst1q_f64(&pX[i*cols+j],vecA);
-      }
-
-      for(; j < cols; j ++)
-      {
-            a_col = &pA[j];
-
-            ut_row = &pUT[n*i];
-
-            float64_t tmp=a_col[i * cols];
-            
-            for(k=n-1; k > i; k--)
-            {
-                tmp -= ut_row[k] * pX[cols*k+j];
-            }
-
-            if (ut_row[i]==0.0f)
-            {
-              return(ARM_MATH_SINGULAR);
-            }
-            tmp = tmp / ut_row[i];
-            pX[i*cols+j] = tmp;
-       }
-
-    }
-    status = ARM_MATH_SUCCESS;
-
-  }
-
-  
-  /* Return to application */
-  return (status);
+		
+	{
+		
+		int i,j,k,n,cols;
+		
+		n = dst->numRows;
+		cols = dst->numCols;
+		
+		float64_t *pX = dst->pData;
+		float64_t *pUT = ut->pData;
+		float64_t *pA = a->pData;
+		
+		float64_t *ut_row;
+		float64_t *a_col;
+		
+		float64_t invUT;
+		
+		float64x2_t vecA;
+		float64x2_t vecX;
+		
+		for(i=n-1; i >= 0 ; i--)
+		{
+			for(j=0; j+1 < cols; j +=2)
+			{
+				vecA = vld1q_f64(&pA[i * cols + j]);
+				
+				for(k=n-1; k > i; k--)
+				{
+					vecX = vld1q_f64(&pX[cols*k+j]);
+					vecA = vfmsq_f64(vecA,vdupq_n_f64(pUT[n*i + k]),vecX);
+				}
+				
+				if (pUT[n*i + i]==0.0f)
+				{
+					return(ARM_MATH_SINGULAR);
+				}
+				
+				invUT = 1.0f / pUT[n*i + i];
+				vecA = vmulq_f64(vecA,vdupq_n_f64(invUT));
+				
+				
+				vst1q_f64(&pX[i*cols+j],vecA);
+			}
+			
+			for(; j < cols; j ++)
+			{
+				a_col = &pA[j];
+				
+				ut_row = &pUT[n*i];
+				
+				float64_t tmp=a_col[i * cols];
+				
+				for(k=n-1; k > i; k--)
+				{
+					tmp -= ut_row[k] * pX[cols*k+j];
+				}
+				
+				if (ut_row[i]==0.0f)
+				{
+					return(ARM_MATH_SINGULAR);
+				}
+				tmp = tmp / ut_row[i];
+				pX[i*cols+j] = tmp;
+			}
+			
+		}
+		status = ARM_MATH_SUCCESS;
+		
+	}
+	
+	
+	/* Return to application */
+	return (status);
 }

 #else
-  arm_status arm_mat_solve_upper_triangular_f64(
-  const arm_matrix_instance_f64 * ut,
-  const arm_matrix_instance_f64 * a,
-  arm_matrix_instance_f64 * dst)
-  {
-arm_status status;                             /* status of matrix inverse */
-
-
+arm_status arm_mat_solve_upper_triangular_f64(
+	const arm_matrix_instance_f64 * ut,
+	const arm_matrix_instance_f64 * a,
+	arm_matrix_instance_f64 * dst)
+{
+	arm_status status;                             /* status of matrix inverse */
+	
+	
 #ifdef ARM_MATH_MATRIX_CHECK
-
-  /* Check for matrix mismatch condition */
-  if ((ut->numRows != ut->numCols) ||
-      (ut->numRows != a->numRows)   )
-  {
-    /* Set status as ARM_MATH_SIZE_MISMATCH */
-    status = ARM_MATH_SIZE_MISMATCH;
-  }
-  else
-
+	
+	/* Check for matrix mismatch condition */
+	if ((ut->numRows != ut->numCols) ||
+		(ut->numRows != a->numRows)   )
+	{
+		/* Set status as ARM_MATH_SIZE_MISMATCH */
+		status = ARM_MATH_SIZE_MISMATCH;
+	}
+	else
+		
 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
-
-  {
-
-    int i,j,k,n,cols;
-
-    float64_t *pX = dst->pData;
-    float64_t *pUT = ut->pData;
-    float64_t *pA = a->pData;
-
-    float64_t *ut_row;
-    float64_t *a_col;
-
-    n = dst->numRows;
-    cols = dst->numCols;
-
-    for(j=0; j < cols; j ++)
-    {
-       a_col = &pA[j];
-
-       for(i=n-1; i >= 0 ; i--)
-       {
-            float64_t tmp=a_col[i * cols];
-
-            ut_row = &pUT[n*i];
-
-            for(k=n-1; k > i; k--)
-            {
-                tmp -= ut_row[k] * pX[cols*k+j];
-            }
-
-            if (ut_row[i]==0.0)
-            {
-              return(ARM_MATH_SINGULAR);
-            }
-            tmp = tmp / ut_row[i];
-            pX[i*cols+j] = tmp;
-       }
-
-    }
-    status = ARM_MATH_SUCCESS;
-
-  }
-
-  
-  /* Return to application */
-  return (status);
+		
+	{
+		
+		int i,j,k,n,cols;
+		
+		float64_t *pX = dst->pData;
+		float64_t *pUT = ut->pData;
+		float64_t *pA = a->pData;
+		
+		float64_t *ut_row;
+		float64_t *a_col;
+		
+		n = dst->numRows;
+		cols = dst->numCols;
+		
+		for(j=0; j < cols; j ++)
+		{
+			a_col = &pA[j];
+			
+			for(i=n-1; i >= 0 ; i--)
+			{
+				float64_t tmp=a_col[i * cols];
+				
+				ut_row = &pUT[n*i];
+				
+				for(k=n-1; k > i; k--)
+				{
+					tmp -= ut_row[k] * pX[cols*k+j];
+				}
+				
+				if (ut_row[i]==0.0)
+				{
+					return(ARM_MATH_SINGULAR);
+				}
+				tmp = tmp / ut_row[i];
+				pX[i*cols+j] = tmp;
+			}
+			
+		}
+		status = ARM_MATH_SUCCESS;
+		
+	}
+	
+	
+	/* Return to application */
+	return (status);
 }
 #endif

--- a/Source/MatrixFunctions/arm_mat_trans_f64.c
+++ b/Source/MatrixFunctions/arm_mat_trans_f64.c
@ -57,216 +57,216 @@
 #if defined(ARM_MATH_NEON)

 arm_status arm_mat_trans_f64(
-  const arm_matrix_instance_f64 * pSrc,
-  arm_matrix_instance_f64 * pDst)
+	const arm_matrix_instance_f64 * pSrc,
+	arm_matrix_instance_f64 * pDst)
 {
-  float64_t *pIn = pSrc->pData;                  /* input data matrix pointer */
-  float64_t *pOut = pDst->pData;                 /* output data matrix pointer */
-  float64_t *px;                                 /* Temporary output data matrix pointer */
-  uint16_t nRows = pSrc->numRows;                /* number of rows */
-  uint16_t nColumns = pSrc->numCols;             /* number of columns */
-
-  uint16_t blkCnt, rowCnt, i = 0U, row = nRows;          /* loop counters */
-  arm_status status;                             /* status of matrix transpose  */
-
+	float64_t *pIn = pSrc->pData;                  /* input data matrix pointer */
+	float64_t *pOut = pDst->pData;                 /* output data matrix pointer */
+	float64_t *px;                                 /* Temporary output data matrix pointer */
+	uint16_t nRows = pSrc->numRows;                /* number of rows */
+	uint16_t nColumns = pSrc->numCols;             /* number of columns */
+	
+	uint16_t blkCnt, rowCnt, i = 0U, row = nRows;          /* loop counters */
+	arm_status status;                             /* status of matrix transpose  */
+	
 #ifdef ARM_MATH_MATRIX_CHECK
-
-  /* Check for matrix mismatch condition */
-  if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
-  {
-    /* Set status as ARM_MATH_SIZE_MISMATCH */
-    status = ARM_MATH_SIZE_MISMATCH;
-  }
-  else
+	
+	/* Check for matrix mismatch condition */
+	if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
+	{
+		/* Set status as ARM_MATH_SIZE_MISMATCH */
+		status = ARM_MATH_SIZE_MISMATCH;
+	}
+	else
 #endif /*    #ifdef ARM_MATH_MATRIX_CHECK    */
-
-  {
-    /* Matrix transpose by exchanging the rows with columns */
-    /* Row loop */
-    rowCnt = row >> 1;
-    while (rowCnt > 0U)
-    {
-      float64_t *row0,*row1;
-      float64x2x4_t raV;
-
-      blkCnt = nColumns >> 2;
-
-      /* The pointer px is set to starting address of the column being processed */
-      px = pOut + i;
-
-      /* Compute 4 outputs at a time.
-       ** a second loop below computes the remaining 1 to 3 samples. */
-      while (blkCnt > 0U)        /* Column loop */
-      {
-        row0 = pIn;
-        row1 = pIn+nColumns;
-        pIn+=4;
-        raV = vld4q_lane_f64(row0, raV, 0);
-        raV = vld4q_lane_f64(row1, raV, 1);
-        
-        vst1q_f64(px,raV.val[0]);
-        px += nRows;
-
-        vst1q_f64(px,raV.val[1]);
-        px += nRows;
-
-        vst1q_f64(px,raV.val[2]);
-        px += nRows;
-
-        vst1q_f64(px,raV.val[3]);
-        px += nRows;
-
-        /* Decrement the column loop counter */
-        blkCnt--;
-      }
-
-      /* Perform matrix transpose for last 3 samples here. */
-      blkCnt = nColumns % 0x4U;
-
-      while (blkCnt > 0U)
-      {
-        /* Read and store the input element in the destination */
-        *px++ = *pIn;
-        *px++ = *(pIn + 1 * nColumns);
-        
-        px += (nRows - 2);
-        pIn++;
-
-        /* Decrement the column loop counter */
-        blkCnt--;
-      }
-
-      i += 2;
-      pIn += 1 * nColumns;
-
-      /* Decrement the row loop counter */
-      rowCnt--;
-
-    }         /* Row loop end  */
-
-    rowCnt = row & 1;
-    while (rowCnt > 0U)
-    {
-      blkCnt = nColumns ;
-      /* The pointer px is set to starting address of the column being processed */
-      px = pOut + i;
-
-      while (blkCnt > 0U)
-      {
-        /* Read and store the input element in the destination */
-        *px = *pIn++;
-
-        /* Update the pointer px to point to the next row of the transposed matrix */
-        px += nRows;
-
-        /* Decrement the column loop counter */
-        blkCnt--;
-      }
-      i++;
-      rowCnt -- ;
-    }
-
-    /* Set status as ARM_MATH_SUCCESS */
-    status = ARM_MATH_SUCCESS;
-  }
-
-  /* Return to application */
-  return (status);
+		
+	{
+		/* Matrix transpose by exchanging the rows with columns */
+		/* Row loop */
+		rowCnt = row >> 1;
+		while (rowCnt > 0U)
+		{
+			float64_t *row0,*row1;
+			float64x2x4_t raV;
+			
+			blkCnt = nColumns >> 2;
+			
+			/* The pointer px is set to starting address of the column being processed */
+			px = pOut + i;
+			
+			/* Compute 4 outputs at a time.
+			 ** a second loop below computes the remaining 1 to 3 samples. */
+			while (blkCnt > 0U)        /* Column loop */
+			{
+				row0 = pIn;
+				row1 = pIn+nColumns;
+				pIn+=4;
+				raV = vld4q_lane_f64(row0, raV, 0);
+				raV = vld4q_lane_f64(row1, raV, 1);
+				
+				vst1q_f64(px,raV.val[0]);
+				px += nRows;
+				
+				vst1q_f64(px,raV.val[1]);
+				px += nRows;
+				
+				vst1q_f64(px,raV.val[2]);
+				px += nRows;
+				
+				vst1q_f64(px,raV.val[3]);
+				px += nRows;
+				
+				/* Decrement the column loop counter */
+				blkCnt--;
+			}
+			
+			/* Perform matrix transpose for last 3 samples here. */
+			blkCnt = nColumns % 0x4U;
+			
+			while (blkCnt > 0U)
+			{
+				/* Read and store the input element in the destination */
+				*px++ = *pIn;
+				*px++ = *(pIn + 1 * nColumns);
+				
+				px += (nRows - 2);
+				pIn++;
+				
+				/* Decrement the column loop counter */
+				blkCnt--;
+			}
+			
+			i += 2;
+			pIn += 1 * nColumns;
+			
+			/* Decrement the row loop counter */
+			rowCnt--;
+			
+		}         /* Row loop end  */
+		
+		rowCnt = row & 1;
+		while (rowCnt > 0U)
+		{
+			blkCnt = nColumns ;
+			/* The pointer px is set to starting address of the column being processed */
+			px = pOut + i;
+			
+			while (blkCnt > 0U)
+			{
+				/* Read and store the input element in the destination */
+				*px = *pIn++;
+				
+				/* Update the pointer px to point to the next row of the transposed matrix */
+				px += nRows;
+				
+				/* Decrement the column loop counter */
+				blkCnt--;
+			}
+			i++;
+			rowCnt -- ;
+		}
+		
+		/* Set status as ARM_MATH_SUCCESS */
+		status = ARM_MATH_SUCCESS;
+	}
+	
+	/* Return to application */
+	return (status);
 }
 #else
 arm_status arm_mat_trans_f64(
-  const arm_matrix_instance_f64 * pSrc,
-        arm_matrix_instance_f64 * pDst)
+	const arm_matrix_instance_f64 * pSrc,
+	arm_matrix_instance_f64 * pDst)
 {
-  float64_t *pIn = pSrc->pData;                  /* input data matrix pointer */
-  float64_t *pOut = pDst->pData;                 /* output data matrix pointer */
-  float64_t *px;                                 /* Temporary output data matrix pointer */
-  uint16_t nRows = pSrc->numRows;                /* number of rows */
-  uint16_t nCols = pSrc->numCols;                /* number of columns */
-  uint64_t col, row = nRows, i = 0U;             /* Loop counters */
-  arm_status status;                             /* status of matrix transpose */
-
+	float64_t *pIn = pSrc->pData;                  /* input data matrix pointer */
+	float64_t *pOut = pDst->pData;                 /* output data matrix pointer */
+	float64_t *px;                                 /* Temporary output data matrix pointer */
+	uint16_t nRows = pSrc->numRows;                /* number of rows */
+	uint16_t nCols = pSrc->numCols;                /* number of columns */
+	uint64_t col, row = nRows, i = 0U;             /* Loop counters */
+	arm_status status;                             /* status of matrix transpose */
+	
 #ifdef ARM_MATH_MATRIX_CHECK
-
-  /* Check for matrix mismatch condition */
-  if ((pSrc->numRows != pDst->numCols) ||
-      (pSrc->numCols != pDst->numRows)   )
-  {
-    /* Set status as ARM_MATH_SIZE_MISMATCH */
-    status = ARM_MATH_SIZE_MISMATCH;
-  }
-  else
-
+	
+	/* Check for matrix mismatch condition */
+	if ((pSrc->numRows != pDst->numCols) ||
+		(pSrc->numCols != pDst->numRows)   )
+	{
+		/* Set status as ARM_MATH_SIZE_MISMATCH */
+		status = ARM_MATH_SIZE_MISMATCH;
+	}
+	else
+		
 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
-
-  {
-    /* Matrix transpose by exchanging the rows with columns */
-    /* row loop */
-    do
-    {
-      /* Pointer px is set to starting address of column being processed */
-      px = pOut + i;
-
+		
+	{
+		/* Matrix transpose by exchanging the rows with columns */
+		/* row loop */
+		do
+		{
+			/* Pointer px is set to starting address of column being processed */
+			px = pOut + i;
+			
 #if defined (ARM_MATH_LOOPUNROLL)
-
-      /* Loop unrolling: Compute 4 outputs at a time */
-      col = nCols >> 2U;
-
-      while (col > 0U)        /* column loop */
-      {
-        /* Read and store input element in destination */
-        *px = *pIn++;
-        /* Update pointer px to point to next row of transposed matrix */
-        px += nRows;
-
-        *px = *pIn++;
-        px += nRows;
-
-        *px = *pIn++;
-        px += nRows;
-
-        *px = *pIn++;
-        px += nRows;
-
-        /* Decrement column loop counter */
-        col--;
-      }
-
-      /* Loop unrolling: Compute remaining outputs */
-      col = nCols % 0x4U;
-
+			
+			/* Loop unrolling: Compute 4 outputs at a time */
+			col = nCols >> 2U;
+			
+			while (col > 0U)        /* column loop */
+			{
+				/* Read and store input element in destination */
+				*px = *pIn++;
+				/* Update pointer px to point to next row of transposed matrix */
+				px += nRows;
+				
+				*px = *pIn++;
+				px += nRows;
+				
+				*px = *pIn++;
+				px += nRows;
+				
+				*px = *pIn++;
+				px += nRows;
+				
+				/* Decrement column loop counter */
+				col--;
+			}
+			
+			/* Loop unrolling: Compute remaining outputs */
+			col = nCols % 0x4U;
+			
 #else
-
-      /* Initialize col with number of samples */
-      col = nCols;
-
+			
+			/* Initialize col with number of samples */
+			col = nCols;
+			
 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
-      while (col > 0U)
-      {
-        /* Read and store input element in destination */
-        *px = *pIn++;
-
-        /* Update pointer px to point to next row of transposed matrix */
-        px += nRows;
-
-        /* Decrement column loop counter */
-        col--;
-      }
-
-      i++;
-
-      /* Decrement row loop counter */
-      row--;
-
-    } while (row > 0U);          /* row loop end */
-
-    /* Set status as ARM_MATH_SUCCESS */
-    status = ARM_MATH_SUCCESS;
-  }
-
-  /* Return to application */
-  return (status);
+			
+			while (col > 0U)
+			{
+				/* Read and store input element in destination */
+				*px = *pIn++;
+				
+				/* Update pointer px to point to next row of transposed matrix */
+				px += nRows;
+				
+				/* Decrement column loop counter */
+				col--;
+			}
+			
+			i++;
+			
+			/* Decrement row loop counter */
+			row--;
+			
+		} while (row > 0U);          /* row loop end */
+		
+		/* Set status as ARM_MATH_SUCCESS */
+		status = ARM_MATH_SUCCESS;
+	}
+	
+	/* Return to application */
+	return (status);
 }
 #endif
 /**
--- a/Source/StatisticsFunctions/arm_absmax_no_idx_f64.c
+++ b/Source/StatisticsFunctions/arm_absmax_no_idx_f64.c
@ -49,9 +49,9 @@

 #if defined(ARM_MATH_NEON)
 void arm_absmax_no_idx_f64(
-                           const float64_t * pSrc,
-                           uint32_t blockSize,
-                           float64_t * pResult)
+	const float64_t * pSrc,
+	uint32_t blockSize,
+	float64_t * pResult)
 {
    float64_t maxVal , in;                         /* Temporary variables to store the output value. */
    uint32_t blkCnt;                     /* Loop counter */
@ -112,9 +112,9 @@ void arm_absmax_no_idx_f64(
 }
 #else
 void arm_absmax_no_idx_f64(
-                           const float64_t * pSrc,
-                           uint32_t blockSize,
-                           float64_t * pResult)
+	const float64_t * pSrc,
+	uint32_t blockSize,
+	float64_t * pResult)
 {
    float64_t maxVal, out;                         /* Temporary variables to store the output value. */
    uint32_t blkCnt;                     /* Loop counter */
--- a/Source/StatisticsFunctions/arm_absmin_no_idx_f64.c
+++ b/Source/StatisticsFunctions/arm_absmin_no_idx_f64.c
@ -47,101 +47,101 @@

 #if defined(ARM_MATH_NEON)
 void arm_absmin_no_idx_f64(
-                           const float64_t * pSrc,
-                           uint32_t blockSize,
-                           float64_t * pResult)
+	const float64_t * pSrc,
+	uint32_t blockSize,
+	float64_t * pResult)
 {
-    float64_t minVal , in;                         /* Temporary variables to store the output value. */
-    uint32_t blkCnt;                     /* Loop counter */
-    
-    float64x2_t minV;
-    float64x2_t pSrcV ;
-    pSrcV = vld1q_f64(pSrc);
-    pSrc += 2 ;
-    minV = vabsq_f64(pSrcV);
-
-    
-    
-    
-    /* Load first input value that act as reference value for comparision */
-
-    
-    /* Initialize blkCnt with number of samples */
-    blkCnt = (blockSize - 2U) >> 1U;
-    
-    while (blkCnt > 0U)
-    {
-        /* Initialize maxVal to the next consecutive values one by one */
-        pSrcV = vld1q_f64(pSrc);
-        minV = vminq_f64(minV, vabsq_f64(pSrcV));
-        
-        pSrc += 2 ;
-        
-        /* Decrement loop counter */
-        blkCnt--;
-    }
-    minVal =vgetq_lane_f64(minV, 0);
-    if(minVal > vgetq_lane_f64(minV, 1))
-    {
-        minVal = vgetq_lane_f64(minV, 1);
-    }
-    blkCnt = (blockSize - 2U) & 1;
-    
-    while (blkCnt > 0U)
-    {
-        /* Initialize maxVal to the next consecutive values one by one */
-        in = fabs(*pSrc++);
-        
-        /* compare for the maximum value */
-        if (minVal > in)
-        {
-            /* Update the maximum value and it's index */
-            minVal = in;
-        }
-        
-        /* Decrement loop counter */
-        blkCnt--;
-    }
-    *pResult = minVal;
-    
-    
-    /* Store the maximum value and it's index into destination pointers */
-
+	float64_t minVal , in;                         /* Temporary variables to store the output value. */
+	uint32_t blkCnt;                     /* Loop counter */
+	
+	float64x2_t minV;
+	float64x2_t pSrcV ;
+	pSrcV = vld1q_f64(pSrc);
+	pSrc += 2 ;
+	minV = vabsq_f64(pSrcV);
+	
+	
+	
+	
+	/* Load first input value that act as reference value for comparision */
+	
+	
+	/* Initialize blkCnt with number of samples */
+	blkCnt = (blockSize - 2U) >> 1U;
+	
+	while (blkCnt > 0U)
+	{
+		/* Initialize maxVal to the next consecutive values one by one */
+		pSrcV = vld1q_f64(pSrc);
+		minV = vminq_f64(minV, vabsq_f64(pSrcV));
+		
+		pSrc += 2 ;
+		
+		/* Decrement loop counter */
+		blkCnt--;
+	}
+	minVal =vgetq_lane_f64(minV, 0);
+	if(minVal > vgetq_lane_f64(minV, 1))
+	{
+		minVal = vgetq_lane_f64(minV, 1);
+	}
+	blkCnt = (blockSize - 2U) & 1;
+	
+	while (blkCnt > 0U)
+	{
+		/* Initialize maxVal to the next consecutive values one by one */
+		in = fabs(*pSrc++);
+		
+		/* compare for the maximum value */
+		if (minVal > in)
+		{
+			/* Update the maximum value and it's index */
+			minVal = in;
+		}
+		
+		/* Decrement loop counter */
+		blkCnt--;
+	}
+	*pResult = minVal;
+	
+	
+	/* Store the maximum value and it's index into destination pointers */
+	
 }
 #else
 void arm_absmin_no_idx_f64(
-  const float64_t * pSrc,
-        uint32_t blockSize,
-        float64_t * pResult)
+	const float64_t * pSrc,
+	uint32_t blockSize,
+	float64_t * pResult)
 {
-       float64_t minVal, out;                         /* Temporary variables to store the output value. */
-       uint32_t blkCnt;                     /* Loop counter */
-
-
-  /* Load first input value that act as reference value for comparision */
-  out = fabs(*pSrc++);
-
-  /* Initialize blkCnt with number of samples */
-  blkCnt = (blockSize - 1U);
-
-  while (blkCnt > 0U)
-  {
-    /* Initialize minVal to the next consecutive values one by one */
-    minVal = fabs(*pSrc++);
-
-    /* compare for the minimum value */
-    if (out > minVal)
-    {
-      /* Update the minimum value and it's index */
-      out = minVal;
-    }
-
-    /* Decrement loop counter */
-    blkCnt--;
-  }
-
-  /* Store the minimum value and it's index into destination pointers */
-  *pResult = out;
+	float64_t minVal, out;                         /* Temporary variables to store the output value. */
+	uint32_t blkCnt;                     /* Loop counter */
+	
+	
+	/* Load first input value that act as reference value for comparision */
+	out = fabs(*pSrc++);
+	
+	/* Initialize blkCnt with number of samples */
+	blkCnt = (blockSize - 1U);
+	
+	while (blkCnt > 0U)
+	{
+		/* Initialize minVal to the next consecutive values one by one */
+		minVal = fabs(*pSrc++);
+		
+		/* compare for the minimum value */
+		if (out > minVal)
+		{
+			/* Update the minimum value and it's index */
+			out = minVal;
+		}
+		
+		/* Decrement loop counter */
+		blkCnt--;
+	}
+	
+	/* Store the minimum value and it's index into destination pointers */
+	*pResult = out;
 }
 #endif
 /**
--- a/Source/StatisticsFunctions/arm_entropy_f64.c
+++ b/Source/StatisticsFunctions/arm_entropy_f64.c
@ -49,43 +49,43 @@

 float64_t arm_entropy_f64(const float64_t * pSrcA, uint32_t blockSize)
 {
-    const float64_t *pIn;
-    uint32_t blkCnt;
-    float64_t accum, p;
- 
-    pIn = pSrcA;
-
-
-    accum = 0.0;
+	const float64_t *pIn;
+	uint32_t blkCnt;
+	float64_t accum, p;
+	
+	pIn = pSrcA;
+	
+	
+	accum = 0.0;
 #if defined(ARM_MATH_NEON)
-  float64x2_t sumV ,pInV ;
-  sumV = vdupq_n_f64(0.0f);
-  blkCnt = blockSize >> 1U ;
-  
-  while(blkCnt > 0){
-    pInV = vld1q_f64(pIn);
-    sumV = vmlaq_f64(sumV, pInV,vlogq_f64(pInV) );
-    pIn += 2 ;
-    blkCnt--;
-    
-  }
-  accum = vaddvq_f64(sumV);
-  blkCnt = blockSize & 1 ;
+	float64x2_t sumV ,pInV ;
+	sumV = vdupq_n_f64(0.0f);
+	blkCnt = blockSize >> 1U ;
+	
+	while(blkCnt > 0){
+		pInV = vld1q_f64(pIn);
+		sumV = vmlaq_f64(sumV, pInV,vlogq_f64(pInV) );
+		pIn += 2 ;
+		blkCnt--;
+		
+	}
+	accum = vaddvq_f64(sumV);
+	blkCnt = blockSize & 1 ;
 #else
-  blkCnt = blockSize;
+	blkCnt = blockSize;
 #endif
-
-    while(blkCnt > 0)
-    {
-        p = *pIn++;
-
-        accum += p * log(p);
-       
-        blkCnt--;
-    
-    }
-
-    return(-accum);
+	
+	while(blkCnt > 0)
+	{
+		p = *pIn++;
+		
+		accum += p * log(p);
+		
+		blkCnt--;
+		
+	}
+	
+	return(-accum);
 }

 /**
--- a/Source/StatisticsFunctions/arm_kullback_leibler_f64.c
+++ b/Source/StatisticsFunctions/arm_kullback_leibler_f64.c
@ -50,53 +50,53 @@

 float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,const float64_t * pSrcB,uint32_t blockSize)
 {
-    const float64_t *pInA, *pInB;
-    uint32_t blkCnt;
-    float64_t accum, pA,pB;
-
-    float64x2_t accumV;
-    float64x2_t tmpVA, tmpVB,tmpV;
- 
-    pInA = pSrcA;
-    pInB = pSrcB;
-
-    accum = 0.0f;
-    accumV = vdupq_n_f64(0.0f);
-
-    blkCnt = blockSize >> 1;
-    while(blkCnt > 0)
-    {
-      tmpVA = vld1q_f64(pInA);
-      pInA += 2;
-
-      tmpVB = vld1q_f64(pInB);
-      pInB += 2;
-
-      tmpV = vinvq_f64(tmpVA);
-      tmpVB = vmulq_f64(tmpVB, tmpV);
-      tmpVB = vlogq_f64(tmpVB);
-
-      accumV = vmlaq_f64(accumV, tmpVA, tmpVB);
-       
-      blkCnt--;
-    
-    }
-
-
-  accum = vaddvq_f64(accumV);
-
-    blkCnt = blockSize & 1;
-    while(blkCnt > 0)
-    {
-       pA = *pInA++;
-       pB = *pInB++;
-       accum += pA * logf(pB/pA);
-       
-       blkCnt--;
-    
-    }
-
-    return(-accum);
+	const float64_t *pInA, *pInB;
+	uint32_t blkCnt;
+	float64_t accum, pA,pB;
+	
+	float64x2_t accumV;
+	float64x2_t tmpVA, tmpVB,tmpV;
+	
+	pInA = pSrcA;
+	pInB = pSrcB;
+	
+	accum = 0.0f;
+	accumV = vdupq_n_f64(0.0f);
+	
+	blkCnt = blockSize >> 1;
+	while(blkCnt > 0)
+	{
+		tmpVA = vld1q_f64(pInA);
+		pInA += 2;
+		
+		tmpVB = vld1q_f64(pInB);
+		pInB += 2;
+		
+		tmpV = vinvq_f64(tmpVA);
+		tmpVB = vmulq_f64(tmpVB, tmpV);
+		tmpVB = vlogq_f64(tmpVB);
+		
+		accumV = vmlaq_f64(accumV, tmpVA, tmpVB);
+		
+		blkCnt--;
+		
+	}
+	
+	
+	accum = vaddvq_f64(accumV);
+	
+	blkCnt = blockSize & 1;
+	while(blkCnt > 0)
+	{
+		pA = *pInA++;
+		pB = *pInB++;
+		accum += pA * logf(pB/pA);
+		
+		blkCnt--;
+		
+	}
+	
+	return(-accum);
 }
 #else

--- a/Source/StatisticsFunctions/arm_max_no_idx_f64.c
+++ b/Source/StatisticsFunctions/arm_max_no_idx_f64.c
@ -47,9 +47,9 @@
 */
 #if defined(ARM_MATH_NEON)
 void arm_max_no_idx_f64(
-                           const float64_t * pSrc,
-                           uint32_t blockSize,
-                           float64_t * pResult)
+	const float64_t * pSrc,
+	uint32_t blockSize,
+	float64_t * pResult)
 {
    float64_t maxVal , in;                         /* Temporary variables to store the output value. */
    uint32_t blkCnt;                     /* Loop counter */
@ -110,9 +110,9 @@ void arm_max_no_idx_f64(
 }
 #else
 void arm_max_no_idx_f64(
-                        const float64_t *pSrc,
-                        uint32_t   blockSize,
-                        float64_t *pResult)
+	const float64_t *pSrc,
+	uint32_t   blockSize,
+	float64_t *pResult)
 {
    float64_t   maxValue = F64_MIN;
    float64_t   newVal;
--- a/Source/StatisticsFunctions/arm_mean_f64.c
+++ b/Source/StatisticsFunctions/arm_mean_f64.c
@ -49,68 +49,68 @@
 #if defined(ARM_MATH_NEON)

 void arm_mean_f64(
-                  const float64_t * pSrc,
-                  uint32_t blockSize,
-                  float64_t * pResult)
+	const float64_t * pSrc,
+	uint32_t blockSize,
+	float64_t * pResult)
 {
-  uint32_t blkCnt;                               /* Loop counter */
-  float64x2_t vSum = vdupq_n_f64(0.0f);
-  float64_t sum = 0.;                            /* Temporary result storage */
-  float64x2_t afterLoad ;
-  /* Initialize blkCnt with number of samples */
-  blkCnt = blockSize >> 1U;
-  
-  
-  while (blkCnt > 0U)
-  {
-    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
-    
-    afterLoad = vld1q_f64(pSrc);
-    vSum = vaddq_f64(vSum, afterLoad);
-    pSrc += 2;
-    /* Decrement loop counter */
-    blkCnt--;
-    
-
-  }
-  sum = vaddvq_f64(vSum);
-  
-  blkCnt = blockSize & 1;
-  
-  while (blkCnt > 0U)
-  {
-    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
-    sum += *pSrc++;
-    
-    /* Decrement loop counter */
-    blkCnt--;
-  }
-  *pResult = (sum/blockSize);
+	uint32_t blkCnt;                               /* Loop counter */
+	float64x2_t vSum = vdupq_n_f64(0.0f);
+	float64_t sum = 0.;                            /* Temporary result storage */
+	float64x2_t afterLoad ;
+	/* Initialize blkCnt with number of samples */
+	blkCnt = blockSize >> 1U;
+	
+	
+	while (blkCnt > 0U)
+	{
+		/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+		
+		afterLoad = vld1q_f64(pSrc);
+		vSum = vaddq_f64(vSum, afterLoad);
+		pSrc += 2;
+		/* Decrement loop counter */
+		blkCnt--;
+		
+		
+	}
+	sum = vaddvq_f64(vSum);
+	
+	blkCnt = blockSize & 1;
+	
+	while (blkCnt > 0U)
+	{
+		/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+		sum += *pSrc++;
+		
+		/* Decrement loop counter */
+		blkCnt--;
+	}
+	*pResult = (sum/blockSize);
 }
 #else
 void arm_mean_f64(
-                  const float64_t * pSrc,
-                  uint32_t blockSize,
-                  float64_t * pResult)
+	const float64_t * pSrc,
+	uint32_t blockSize,
+	float64_t * pResult)
 {
-  uint32_t blkCnt;                               /* Loop counter */
-  float64_t sum = 0.;                            /* Temporary result storage */
-  
-  /* Initialize blkCnt with number of samples */
-  blkCnt = blockSize;
-  
-  while (blkCnt > 0U)
-  {
-    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
-    sum += *pSrc++;
-    
-    /* Decrement loop counter */
-    blkCnt--;
-  }
-  
-  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
-  /* Store result to destination */
-  *pResult = (sum / blockSize);
+	uint32_t blkCnt;                               /* Loop counter */
+	float64_t sum = 0.;                            /* Temporary result storage */
+	
+	/* Initialize blkCnt with number of samples */
+	blkCnt = blockSize;
+	
+	while (blkCnt > 0U)
+	{
+		/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+		sum += *pSrc++;
+		
+		/* Decrement loop counter */
+		blkCnt--;
+	}
+	
+	/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
+	/* Store result to destination */
+	*pResult = (sum / blockSize);
 }
 #endif
 /**
--- a/Source/StatisticsFunctions/arm_min_no_idx_f64.c
+++ b/Source/StatisticsFunctions/arm_min_no_idx_f64.c
@ -46,51 +46,51 @@
  @return        none
 */
 void arm_min_no_idx_f64(
-    const float64_t *pSrc,
-    uint32_t   blockSize,
-    float64_t *pResult)
+	const float64_t *pSrc,
+	uint32_t   blockSize,
+	float64_t *pResult)
 {
-   float64_t   minValue = F64_MAX;
-   float64_t   newVal;
-  uint32_t blkCnt ;
+	float64_t   minValue = F64_MAX;
+	float64_t   newVal;
+	uint32_t blkCnt ;
 #if defined(ARM_MATH_NEON)
-  float64x2_t minValueV , newValV ;
-  minValueV = vdupq_n_f64(F64_MAX);
-  blkCnt = blockSize >> 1U;
-  while(blkCnt > 0)
-  {
-    newValV = vld1q_f64(pSrc);
-    minValueV = vminq_f64(minValueV, newValV);
-    pSrc += 2 ;
-    blkCnt--;
-    
-  }
-  minValue =vgetq_lane_f64(minValueV, 0);
-  if(minValue > vgetq_lane_f64(minValueV, 1))
-  {
-      minValue = vgetq_lane_f64(minValueV, 1);
-  }
-  
-  blkCnt = blockSize & 1 ;
+	float64x2_t minValueV , newValV ;
+	minValueV = vdupq_n_f64(F64_MAX);
+	blkCnt = blockSize >> 1U;
+	while(blkCnt > 0)
+	{
+		newValV = vld1q_f64(pSrc);
+		minValueV = vminq_f64(minValueV, newValV);
+		pSrc += 2 ;
+		blkCnt--;
+		
+	}
+	minValue =vgetq_lane_f64(minValueV, 0);
+	if(minValue > vgetq_lane_f64(minValueV, 1))
+	{
+		minValue = vgetq_lane_f64(minValueV, 1);
+	}
+	
+	blkCnt = blockSize & 1 ;
 #else
-  blkCnt = blockSize;
+	blkCnt = blockSize;
 #endif
-
-   while (blkCnt > 0U)
-   {
-       newVal = *pSrc++;
-   
-       /* compare for the minimum value */
-       if (minValue > newVal)
-       {
-           /* Update the minimum value and it's index */
-           minValue = newVal;
-       }
-   
-       blkCnt --;
-   }
-    
-   *pResult = minValue;
+	
+	while (blkCnt > 0U)
+	{
+		newVal = *pSrc++;
+		
+		/* compare for the minimum value */
+		if (minValue > newVal)
+		{
+			/* Update the minimum value and it's index */
+			minValue = newVal;
+		}
+		
+		blkCnt --;
+	}
+	
+	*pResult = minValue;
 }

 /**
--- a/Source/StatisticsFunctions/arm_mse_f64.c
+++ b/Source/StatisticsFunctions/arm_mse_f64.c
@ -47,84 +47,84 @@
 */

 void arm_mse_f64(
-                 const float64_t * pSrcA,
-                 const float64_t * pSrcB,
-                 uint32_t    blockSize,
-                 float64_t * result)
+	const float64_t * pSrcA,
+	const float64_t * pSrcB,
+	uint32_t 	blockSize,
+	float64_t * result)

 {
-    
-    uint32_t blkCnt;                               /* Loop counter */
-    float64_t inA, inB;
-    float64_t sum = 0.0;
+	
+	uint32_t blkCnt;                               /* Loop counter */
+	float64_t inA, inB;
+	float64_t sum = 0.0;
 #if defined (ARM_MATH_NEON)
-    
-    float64x2_t inAV , inBV , subV, sumV;
-    sumV = vdupq_n_f64(0.0f);
-    
-    blkCnt = blockSize >> 1U ;
-    
-    while (blkCnt > 0U)
-    {
-        inAV = vld1q_f64(pSrcA);
-        pSrcA+=2;
-        inBV = vld1q_f64(pSrcB);
-        pSrcB+=2;
-        subV = vsubq_f64(inAV, inBV);
-        sumV = vmlaq_f64(sumV, subV, subV);
-        
-        blkCnt--;
-        
-    }
-    sum = vaddvq_f64(sumV);
-    blkCnt = (blockSize) & 1;
-    
+	
+	float64x2_t inAV , inBV , subV, sumV;
+	sumV = vdupq_n_f64(0.0f);
+	
+	blkCnt = blockSize >> 1U ;
+	
+	while (blkCnt > 0U)
+	{
+		inAV = vld1q_f64(pSrcA);
+		pSrcA+=2;
+		inBV = vld1q_f64(pSrcB);
+		pSrcB+=2;
+		subV = vsubq_f64(inAV, inBV);
+		sumV = vmlaq_f64(sumV, subV, subV);
+		
+		blkCnt--;
+		
+	}
+	sum = vaddvq_f64(sumV);
+	blkCnt = (blockSize) & 1;
+	
 #else
-                            /* Temporary return variable */
+	/* Temporary return variable */
 #if defined (ARM_MATH_LOOPUNROLL)
-    blkCnt = (blockSize) >> 1;
-    
+	blkCnt = (blockSize) >> 1;
+	
 #pragma clang loop vectorize(enable)
-    while (blkCnt > 0U)
-    {
-        
-        
-        inA = *pSrcA++;
-        inB = *pSrcB++;
-        inA = inA - inB;
-        sum += inA * inA;
-        
-        inA = *pSrcA++;
-        inB = *pSrcB++;
-        inA = inA - inB;
-        sum += inA * inA;
-        
-        /* Decrement loop counter */
-        blkCnt--;
-    }
-    
-    
-    /* Loop unrolling: Compute remaining outputs */
-    blkCnt = (blockSize) & 1;
+	while (blkCnt > 0U)
+	{
+		
+		
+		inA = *pSrcA++;
+		inB = *pSrcB++;
+		inA = inA - inB;
+		sum += inA * inA;
+		
+		inA = *pSrcA++;
+		inB = *pSrcB++;
+		inA = inA - inB;
+		sum += inA * inA;
+		
+		/* Decrement loop counter */
+		blkCnt--;
+	}
+	
+	
+	/* Loop unrolling: Compute remaining outputs */
+	blkCnt = (blockSize) & 1;
 #else
-    /* Initialize blkCnt with number of samples */
-    blkCnt = blockSize;
+	/* Initialize blkCnt with number of samples */
+	blkCnt = blockSize;
 #endif
 #endif
-//#pragma clang loop vectorize(enable) unroll(disable)
-    while (blkCnt > 0U)
-    {
-        inA = *pSrcA++;
-        inB = *pSrcB++;
-        inA = inA - inB;
-        sum += inA * inA;
-        
-        /* Decrement loop counter */
-        blkCnt--;
-    }
-    
-    /* Store result in destination buffer */
-    *result = sum / blockSize;
+	//#pragma clang loop vectorize(enable) unroll(disable)
+	while (blkCnt > 0U)
+	{
+		inA = *pSrcA++;
+		inB = *pSrcB++;
+		inA = inA - inB;
+		sum += inA * inA;
+		
+		/* Decrement loop counter */
+		blkCnt--;
+	}
+	
+	/* Store result in destination buffer */
+	*result = sum / blockSize;
 }


--- a/Source/StatisticsFunctions/arm_power_f64.c
+++ b/Source/StatisticsFunctions/arm_power_f64.c
@ -46,81 +46,81 @@
 */
 #if defined(ARM_MATH_NEON)
 void arm_power_f64(
-                   const float64_t * pSrc,
-                   uint32_t blockSize,
-                   float64_t * pResult)
+	const float64_t * pSrc,
+	uint32_t blockSize,
+	float64_t * pResult)
 {
-    uint32_t blkCnt;                               /* Loop counter */
-    float64_t sum = 0.;                          /* Temporary result storage */
-    float64x2_t sumV ;                                 /* Temporary variable to store input value */
-    sumV = vdupq_n_f64(0.0f);
-    float64x2_t pSrcV;
-    
-    /* Initialize blkCnt with number of samples */
-    blkCnt = blockSize >> 1U;
-    
-    while (blkCnt > 0U)
-    {
-        /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-        
-        /* Compute Power and store result in a temporary variable, sum. */
-        pSrcV = vld1q_f64(pSrc);
-        sumV = vmlaq_f64(sumV, pSrcV, pSrcV);
-        pSrc+= 2 ;
-        
-        
-        
-        /* Decrement loop counter */
-        blkCnt--;
-    }
-    sum = vaddvq_f64(sumV);
-    
-    
-    float64_t in;
-    blkCnt = blockSize & 1;
-    
-    while (blkCnt > 0U)
-    {
-        /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-        
-        /* Compute Power and store result in a temporary variable, sum. */
-        in = *pSrc++;
-        sum += in * in;
-        
-        /* Decrement loop counter */
-        blkCnt--;
-    }
-    
-    /* Store result to destination */
-    *pResult = sum;
+	uint32_t blkCnt;                               /* Loop counter */
+	float64_t sum = 0.;                          /* Temporary result storage */
+	float64x2_t sumV ;                                 /* Temporary variable to store input value */
+	sumV = vdupq_n_f64(0.0f);
+	float64x2_t pSrcV;
+	
+	/* Initialize blkCnt with number of samples */
+	blkCnt = blockSize >> 1U;
+	
+	while (blkCnt > 0U)
+	{
+		/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+		
+		/* Compute Power and store result in a temporary variable, sum. */
+		pSrcV = vld1q_f64(pSrc);
+		sumV = vmlaq_f64(sumV, pSrcV, pSrcV);
+		pSrc+= 2 ;
+		
+		
+		
+		/* Decrement loop counter */
+		blkCnt--;
+	}
+	sum = vaddvq_f64(sumV);
+	
+	
+	float64_t in;
+	blkCnt = blockSize & 1;
+	
+	while (blkCnt > 0U)
+	{
+		/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+		
+		/* Compute Power and store result in a temporary variable, sum. */
+		in = *pSrc++;
+		sum += in * in;
+		
+		/* Decrement loop counter */
+		blkCnt--;
+	}
+	
+	/* Store result to destination */
+	*pResult = sum;
 }
 #else
 void arm_power_f64(
-                   const float64_t * pSrc,
-                   uint32_t blockSize,
-                   float64_t * pResult)
+	const float64_t * pSrc,
+	uint32_t blockSize,
+	float64_t * pResult)
 {
-    uint32_t blkCnt;                               /* Loop counter */
-    float64_t sum = 0.;                          /* Temporary result storage */
-    float64_t in;                                  /* Temporary variable to store input value */
-    
-    /* Initialize blkCnt with number of samples */
-    blkCnt = blockSize;
-    
-    while (blkCnt > 0U)
-    {
-        /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-        
-        /* Compute Power and store result in a temporary variable, sum. */
-        in = *pSrc++;
-        sum += in * in;
-        
-        /* Decrement loop counter */
-        blkCnt--;
-    }
-    
-    /* Store result to destination */
-    *pResult = sum;
+	uint32_t blkCnt;                               /* Loop counter */
+	float64_t sum = 0.;                          /* Temporary result storage */
+	float64_t in;                                  /* Temporary variable to store input value */
+	
+	/* Initialize blkCnt with number of samples */
+	blkCnt = blockSize;
+	
+	while (blkCnt > 0U)
+	{
+		/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+		
+		/* Compute Power and store result in a temporary variable, sum. */
+		in = *pSrc++;
+		sum += in * in;
+		
+		/* Decrement loop counter */
+		blkCnt--;
+	}
+	
+	/* Store result to destination */
+	*pResult = sum;
 }
 #endif
 /**
--- a/Source/StatisticsFunctions/arm_var_f64.c
+++ b/Source/StatisticsFunctions/arm_var_f64.c
@ -45,56 +45,58 @@
  @return        none
 */
 void arm_var_f64(
-  const float64_t * pSrc,
-        uint32_t blockSize,
-        float64_t * pResult)
+	const float64_t * pSrc,
+	uint32_t blockSize,
+	float64_t * pResult)
 {
-        uint32_t blkCnt;                               /* Loop counter */
-        float64_t sum = 0.;                          /* Temporary result storage */
-        float64_t fSum = 0.;
-        float64_t fMean, fValue;
-  const float64_t * pInput = pSrc;
-
-  if (blockSize <= 1U)
-  {
-    *pResult = 0;
-    return;
-  }
-
-  /* Initialize blkCnt with number of samples */
-  blkCnt = blockSize;
-
-  while (blkCnt > 0U)
-  {
-    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
-
-    sum += *pInput++;
-
-    /* Decrement loop counter */
-    blkCnt--;
-  }
-
-  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
-  fMean = sum / (float64_t) blockSize;
-
-  pInput = pSrc;
-
-  /* Initialize blkCnt with number of samples */
-  blkCnt = blockSize;
-
-  while (blkCnt > 0U)
-  {
-    fValue = *pInput++ - fMean;
-    fSum += fValue * fValue;
-
-    /* Decrement loop counter */
-    blkCnt--;
-  }
-
-  /* Variance */
-  *pResult = fSum / (float64_t)(blockSize - 1.);
+	
+	uint32_t blkCnt;                               /* Loop counter */
+	float64_t sum = 0.;                          /* Temporary result storage */
+	float64_t fSum = 0.;
+	float64_t fMean, fValue;
+	const float64_t * pInput = pSrc;
+	
+	if (blockSize <= 1U)
+	{
+		*pResult = 0;
+		return;
+	}
+	arm_mean_f64(pInput, blockSize, &fMean);
+#if defined(ARM_MATH_NEON)
+	float64x2_t fValueV ,fsumV , pInputV , fMeanV;
+	fsumV = vdupq_n_f64(0.0f);
+	fMeanV = vdupq_n_f64(fMean);
+	blkCnt = blockSize >> 1U;
+	
+	while(blkCnt > 0U)
+	{
+		pInputV = vld1q_f64(pInput);
+		fValueV = vsubq_f64(pInputV, fMeanV);
+		fsumV = vmlaq_f64(fsumV, fValueV, fValueV);
+		pInput += 2 ;
+		blkCnt--;
+	}
+	fSum = vaddvq_f64(fsumV);
+	
+	blkCnt = blockSize & 1 ;
+	
+#else
+	/* Initialize blkCnt with number of samples */
+	blkCnt = blockSize;
+#endif
+	while (blkCnt > 0U)
+	{
+		fValue = *pInput++ - fMean;
+		fSum += fValue * fValue;
+		
+		/* Decrement loop counter */
+		blkCnt--;
+	}
+	
+	/* Variance */
+	*pResult = fSum / (float64_t)(blockSize - 1.);
 }

 /**
-  @} end of variance group
+ @} end of variance group
 */