Corrected arm_conv_partial_q15(), arm_conv_q15(), arm_correlate_q15() for Cortex-M7 based cores.

9 years ago · 3a1cfe0e0b
parent 31eba363dd
commit 3a1cfe0e0b
3 changed files with 329 additions and 329 deletions
--- a/Source/FilteringFunctions/arm_conv_partial_q15.c
+++ b/Source/FilteringFunctions/arm_conv_partial_q15.c
@ -1,13 +1,13 @@
 /* ----------------------------------------------------------------------
 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
 *
-* $Date:        19. March 2015
-* $Revision: 	V.1.4.5
+* $Date:        26. September 2016
+* $Revision:    V.1.4.5 a
 *
-* Project: 	    CMSIS DSP Library   
-* Title:		arm_conv_partial_q15.c   
+* Project:      CMSIS DSP Library
+* Title:        arm_conv_partial_q15.c
 *
-* Description:	Partial convolution of Q15 sequences.  
+* Description:  Partial convolution of Q15 sequences.
 *
 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
 *
@ -67,7 +67,6 @@
 *
 */

-
 arm_status arm_conv_partial_q15(
  q15_t * pSrcA,
  uint32_t srcALen,
@ -78,7 +77,8 @@ arm_status arm_conv_partial_q15(
  uint32_t numPoints)
 {

-#if (defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE)
+
+#if (defined(ARM_MATH_CM7) || defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE)

  /* Run the below code for Cortex-M4 and Cortex-M3 */

@ -300,8 +300,8 @@ arm_status arm_conv_partial_q15(
    pSrc2 = pIn2 + (srcBLen - 1u);
    py = pSrc2;

-  /* count is the index by which the pointer pIn1 to be incremented */
-  count = 0u;
+    /* count is the index by which the pointer pIn1 to be incremented */
+    count = 0u;


  /* --------------------
@ -331,7 +331,7 @@ arm_status arm_conv_partial_q15(
      x0 = *__SIMD32(px);
      /* read x[1], x[2] samples */
      x1 = _SIMD32_OFFSET(px+1);
-	  px+= 2u;
+      px+= 2u;


      /* Apply loop unrolling and compute 4 MACs simultaneously. */
@ -377,7 +377,7 @@ arm_status arm_conv_partial_q15(

        /* Read x[5], x[6] */
        x1 = _SIMD32_OFFSET(px+3);
-		px += 4u;
+        px += 4u;

        /* acc2 +=  x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */
        acc2 = __SMLALDX(x0, c0, acc2);
@ -411,7 +411,7 @@ arm_status arm_conv_partial_q15(

        /* Read x[7] */
        x3 = *__SIMD32(px);
-		px++;
+        px++;

        /* Perform the multiply-accumulates */
        acc0 = __SMLALD(x0, c0, acc0);
@ -430,7 +430,7 @@ arm_status arm_conv_partial_q15(

        /* Read x[9] */
        x2 = _SIMD32_OFFSET(px+1);
-		px += 2u;
+        px += 2u;

        /* Perform the multiply-accumulates */
        acc0 = __SMLALDX(x0, c0, acc0);
@ -456,7 +456,7 @@ arm_status arm_conv_partial_q15(
        acc2 = __SMLALDX(x3, c0, acc2);
        acc3 = __SMLALDX(x2, c0, acc3);

-		c0 = *(py-1);
+        c0 = *(py-1);

 #ifdef  ARM_MATH_BIG_ENDIAN

@ -468,7 +468,7 @@ arm_status arm_conv_partial_q15(

        /* Read x[10] */
        x3 =  _SIMD32_OFFSET(px+2);
-		px += 3u;
+        px += 3u;

        /* Perform the multiply-accumulates */
        acc0 = __SMLALDX(x1, c0, acc0);
@ -777,7 +777,7 @@ arm_status arm_conv_partial_q15(
  }
  return (status);

-#endif /* #if (defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE)  */
+#endif /* #if (defined(ARM_MATH_CM7) || defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE) */

 }

--- a/Source/FilteringFunctions/arm_conv_q15.c
+++ b/Source/FilteringFunctions/arm_conv_q15.c
@ -1,13 +1,13 @@
 /* ----------------------------------------------------------------------
 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
 *
-* $Date:        19. March 2015
-* $Revision: 	V.1.4.5
+* $Date:        26. September 2016
+* $Revision:    V.1.4.5 a
 *
-* Project: 	    CMSIS DSP Library   
-* Title:		arm_conv_q15.c   
+* Project:      CMSIS DSP Library
+* Title:        arm_conv_q15.c
 *
-* Description:	Convolution of Q15 sequences.     
+* Description:  Convolution of Q15 sequences.
 *
 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
 *
@ -84,7 +84,7 @@ void arm_conv_q15(
  q15_t * pDst)
 {

-#if (defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE)
+#if (defined(ARM_MATH_CM7) || defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE)

  /* Run the below code for Cortex-M4 and Cortex-M3 */

@ -306,7 +306,7 @@ void arm_conv_q15(
      x0 = *__SIMD32(px);
      /* read x[1], x[2] samples */
      x1 = _SIMD32_OFFSET(px+1);
-	  px+= 2u;
+      px+= 2u;


      /* Apply loop unrolling and compute 4 MACs simultaneously. */
@ -352,7 +352,7 @@ void arm_conv_q15(

        /* Read x[5], x[6] */
        x1 = _SIMD32_OFFSET(px+3);
-		px += 4u;
+        px += 4u;

        /* acc2 +=  x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */
        acc2 = __SMLALDX(x0, c0, acc2);
@ -385,7 +385,7 @@ void arm_conv_q15(
 #endif /*      #ifdef  ARM_MATH_BIG_ENDIAN     */
        /* Read x[7] */
        x3 = *__SIMD32(px);
-		px++;
+        px++;

        /* Perform the multiply-accumulates */
        acc0 = __SMLALD(x0, c0, acc0);
@ -404,7 +404,7 @@ void arm_conv_q15(

        /* Read x[9] */
        x2 = _SIMD32_OFFSET(px+1);
-		px += 2u;
+        px += 2u;

        /* Perform the multiply-accumulates */
        acc0 = __SMLALDX(x0, c0, acc0);
@ -430,7 +430,7 @@ void arm_conv_q15(
        acc2 = __SMLALDX(x3, c0, acc2);
        acc3 = __SMLALDX(x2, c0, acc3);

-		c0 = *(py-1);
+        c0 = *(py-1);

 #ifdef  ARM_MATH_BIG_ENDIAN

@ -441,7 +441,7 @@ void arm_conv_q15(
 #endif /*      #ifdef  ARM_MATH_BIG_ENDIAN     */
        /* Read x[10] */
        x3 =  _SIMD32_OFFSET(px+2);
-		px += 3u;
+        px += 3u;

        /* Perform the multiply-accumulates */
        acc0 = __SMLALDX(x1, c0, acc0);
@ -725,7 +725,7 @@ void arm_conv_q15(
    pDst[i] = (q15_t) __SSAT((sum >> 15u), 16u);
  }

-#endif /*  #if (defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE)*/
+#endif /* #if (defined(ARM_MATH_CM7) || defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE) */

 }

--- a/Source/FilteringFunctions/arm_correlate_q15.c
+++ b/Source/FilteringFunctions/arm_correlate_q15.c
@ -1,13 +1,13 @@
 /* ----------------------------------------------------------------------
 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
 *
-* $Date:        19. March 2015
-* $Revision: 	V.1.4.5
+* $Date:        26. September 2016
+* $Revision:    V.1.4.5 a
 *
-* Project: 	    CMSIS DSP Library   
-* Title:		arm_correlate_q15.c   
+* Project:      CMSIS DSP Library
+* Title:        arm_correlate_q15.c
 *
-* Description:	Correlation of Q15 sequences. 
+* Description:  Correlation of Q15 sequences.
 *
 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
 *
@ -84,7 +84,7 @@ void arm_correlate_q15(
  q15_t * pDst)
 {

-#if (defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE)
+#if (defined(ARM_MATH_CM7) || defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE)

  /* Run the below code for Cortex-M4 and Cortex-M3 */

@ -289,7 +289,7 @@ void arm_correlate_q15(
      x0 = *__SIMD32(px);
      /* read x[1], x[2] samples */
      x1 = _SIMD32_OFFSET(px + 1);
-	  px += 2u;
+      px += 2u;

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
      k = srcBLen >> 2u;
@ -335,7 +335,7 @@ void arm_correlate_q15(
        /* Read x[5], x[6] */
        x1 = _SIMD32_OFFSET(px + 3);

-		px += 4u;
+        px += 4u;

        /* acc2 +=  x[4] * y[2] + x[5] * y[3] */
        acc2 = __SMLALD(x0, c0, acc2);
@ -364,7 +364,7 @@ void arm_correlate_q15(
 #endif /*      #ifdef  ARM_MATH_BIG_ENDIAN     */
        /* Read x[7] */
        x3 = *__SIMD32(px);
-		px++;
+        px++;

        /* Perform the multiply-accumulates */
        acc0 = __SMLALD(x0, c0, acc0);
@ -383,7 +383,7 @@ void arm_correlate_q15(

        /* Read x[9] */
        x2 = _SIMD32_OFFSET(px + 1);
-		px += 2u;
+        px += 2u;

        /* Perform the multiply-accumulates */
        acc0 = __SMLALD(x0, c0, acc0);
@ -421,7 +421,7 @@ void arm_correlate_q15(
 #endif /*      #ifdef  ARM_MATH_BIG_ENDIAN     */
        /* Read x[10] */
        x3 = _SIMD32_OFFSET(px + 2);
-		px += 3u;
+        px += 3u;

        /* Perform the multiply-accumulates */
        acc0 = __SMLALDX(x1, c0, acc0);
@ -710,7 +710,7 @@ void arm_correlate_q15(
      *pDst++ = (q15_t) __SSAT((sum >> 15u), 16u);
  }

-#endif /*#if (defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE) */
+#endif /* #if (defined(ARM_MATH_CM7) || defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE) */

 }