Corrected arm_conv_partial_q15(), arm_conv_q15(), arm_correlate_q15() for Cortex-M7 based cores.

pull/19/head
Martin Günther 9 years ago
parent 31eba363dd
commit 3a1cfe0e0b

@ -1,13 +1,13 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
* $Date: 26. September 2016
* $Revision: V.1.4.5 a
*
* Project: CMSIS DSP Library
* Title: arm_conv_partial_q15.c
* Project: CMSIS DSP Library
* Title: arm_conv_partial_q15.c
*
* Description: Partial convolution of Q15 sequences.
* Description: Partial convolution of Q15 sequences.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
@ -67,7 +67,6 @@
*
*/
arm_status arm_conv_partial_q15(
q15_t * pSrcA,
uint32_t srcALen,
@ -78,7 +77,8 @@ arm_status arm_conv_partial_q15(
uint32_t numPoints)
{
#if (defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE)
#if (defined(ARM_MATH_CM7) || defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE)
/* Run the below code for Cortex-M4 and Cortex-M3 */
@ -300,8 +300,8 @@ arm_status arm_conv_partial_q15(
pSrc2 = pIn2 + (srcBLen - 1u);
py = pSrc2;
/* count is the index by which the pointer pIn1 to be incremented */
count = 0u;
/* count is the index by which the pointer pIn1 to be incremented */
count = 0u;
/* --------------------
@ -331,7 +331,7 @@ arm_status arm_conv_partial_q15(
x0 = *__SIMD32(px);
/* read x[1], x[2] samples */
x1 = _SIMD32_OFFSET(px+1);
px+= 2u;
px+= 2u;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
@ -377,7 +377,7 @@ arm_status arm_conv_partial_q15(
/* Read x[5], x[6] */
x1 = _SIMD32_OFFSET(px+3);
px += 4u;
px += 4u;
/* acc2 += x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */
acc2 = __SMLALDX(x0, c0, acc2);
@ -411,7 +411,7 @@ arm_status arm_conv_partial_q15(
/* Read x[7] */
x3 = *__SIMD32(px);
px++;
px++;
/* Perform the multiply-accumulates */
acc0 = __SMLALD(x0, c0, acc0);
@ -430,7 +430,7 @@ arm_status arm_conv_partial_q15(
/* Read x[9] */
x2 = _SIMD32_OFFSET(px+1);
px += 2u;
px += 2u;
/* Perform the multiply-accumulates */
acc0 = __SMLALDX(x0, c0, acc0);
@ -456,7 +456,7 @@ arm_status arm_conv_partial_q15(
acc2 = __SMLALDX(x3, c0, acc2);
acc3 = __SMLALDX(x2, c0, acc3);
c0 = *(py-1);
c0 = *(py-1);
#ifdef ARM_MATH_BIG_ENDIAN
@ -468,7 +468,7 @@ arm_status arm_conv_partial_q15(
/* Read x[10] */
x3 = _SIMD32_OFFSET(px+2);
px += 3u;
px += 3u;
/* Perform the multiply-accumulates */
acc0 = __SMLALDX(x1, c0, acc0);
@ -777,7 +777,7 @@ arm_status arm_conv_partial_q15(
}
return (status);
#endif /* #if (defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE) */
#endif /* #if (defined(ARM_MATH_CM7) || defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE) */
}

@ -1,13 +1,13 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
* $Date: 26. September 2016
* $Revision: V.1.4.5 a
*
* Project: CMSIS DSP Library
* Title: arm_conv_q15.c
* Project: CMSIS DSP Library
* Title: arm_conv_q15.c
*
* Description: Convolution of Q15 sequences.
* Description: Convolution of Q15 sequences.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
@ -84,7 +84,7 @@ void arm_conv_q15(
q15_t * pDst)
{
#if (defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE)
#if (defined(ARM_MATH_CM7) || defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE)
/* Run the below code for Cortex-M4 and Cortex-M3 */
@ -306,7 +306,7 @@ void arm_conv_q15(
x0 = *__SIMD32(px);
/* read x[1], x[2] samples */
x1 = _SIMD32_OFFSET(px+1);
px+= 2u;
px+= 2u;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
@ -352,7 +352,7 @@ void arm_conv_q15(
/* Read x[5], x[6] */
x1 = _SIMD32_OFFSET(px+3);
px += 4u;
px += 4u;
/* acc2 += x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */
acc2 = __SMLALDX(x0, c0, acc2);
@ -385,7 +385,7 @@ void arm_conv_q15(
#endif /* #ifdef ARM_MATH_BIG_ENDIAN */
/* Read x[7] */
x3 = *__SIMD32(px);
px++;
px++;
/* Perform the multiply-accumulates */
acc0 = __SMLALD(x0, c0, acc0);
@ -404,7 +404,7 @@ void arm_conv_q15(
/* Read x[9] */
x2 = _SIMD32_OFFSET(px+1);
px += 2u;
px += 2u;
/* Perform the multiply-accumulates */
acc0 = __SMLALDX(x0, c0, acc0);
@ -430,7 +430,7 @@ void arm_conv_q15(
acc2 = __SMLALDX(x3, c0, acc2);
acc3 = __SMLALDX(x2, c0, acc3);
c0 = *(py-1);
c0 = *(py-1);
#ifdef ARM_MATH_BIG_ENDIAN
@ -441,7 +441,7 @@ void arm_conv_q15(
#endif /* #ifdef ARM_MATH_BIG_ENDIAN */
/* Read x[10] */
x3 = _SIMD32_OFFSET(px+2);
px += 3u;
px += 3u;
/* Perform the multiply-accumulates */
acc0 = __SMLALDX(x1, c0, acc0);
@ -725,7 +725,7 @@ void arm_conv_q15(
pDst[i] = (q15_t) __SSAT((sum >> 15u), 16u);
}
#endif /* #if (defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE)*/
#endif /* #if (defined(ARM_MATH_CM7) || defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE) */
}

@ -1,13 +1,13 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
* $Date: 26. September 2016
* $Revision: V.1.4.5 a
*
* Project: CMSIS DSP Library
* Title: arm_correlate_q15.c
* Project: CMSIS DSP Library
* Title: arm_correlate_q15.c
*
* Description: Correlation of Q15 sequences.
* Description: Correlation of Q15 sequences.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
@ -84,7 +84,7 @@ void arm_correlate_q15(
q15_t * pDst)
{
#if (defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE)
#if (defined(ARM_MATH_CM7) || defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE)
/* Run the below code for Cortex-M4 and Cortex-M3 */
@ -289,7 +289,7 @@ void arm_correlate_q15(
x0 = *__SIMD32(px);
/* read x[1], x[2] samples */
x1 = _SIMD32_OFFSET(px + 1);
px += 2u;
px += 2u;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2u;
@ -335,7 +335,7 @@ void arm_correlate_q15(
/* Read x[5], x[6] */
x1 = _SIMD32_OFFSET(px + 3);
px += 4u;
px += 4u;
/* acc2 += x[4] * y[2] + x[5] * y[3] */
acc2 = __SMLALD(x0, c0, acc2);
@ -364,7 +364,7 @@ void arm_correlate_q15(
#endif /* #ifdef ARM_MATH_BIG_ENDIAN */
/* Read x[7] */
x3 = *__SIMD32(px);
px++;
px++;
/* Perform the multiply-accumulates */
acc0 = __SMLALD(x0, c0, acc0);
@ -383,7 +383,7 @@ void arm_correlate_q15(
/* Read x[9] */
x2 = _SIMD32_OFFSET(px + 1);
px += 2u;
px += 2u;
/* Perform the multiply-accumulates */
acc0 = __SMLALD(x0, c0, acc0);
@ -421,7 +421,7 @@ void arm_correlate_q15(
#endif /* #ifdef ARM_MATH_BIG_ENDIAN */
/* Read x[10] */
x3 = _SIMD32_OFFSET(px + 2);
px += 3u;
px += 3u;
/* Perform the multiply-accumulates */
acc0 = __SMLALDX(x1, c0, acc0);
@ -710,7 +710,7 @@ void arm_correlate_q15(
*pDst++ = (q15_t) __SSAT((sum >> 15u), 16u);
}
#endif /*#if (defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE) */
#endif /* #if (defined(ARM_MATH_CM7) || defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE) */
}

Loading…
Cancel
Save