DSP: Remove undefined behavior in post-increment/decrement read (#1443)

According to the C standard q15_t * and const q15_t * are not compatible
types which, among other things, imply that an object of type const
q15_t * can't be modified by writing to a q15_t ** or vice versa.
Programs doing this are undefined.

Because of this rule all programs using the functions read_q15x2_ia,
read_q15x2_da, read_q7x4_ia, or read_q7x4_da for reading data from an
array of constant elements will be undefined. To solve this it is not
enough to change the type of the function since this will give problems
when reading data from an array of non-const elements. To get a defined
solution I needed to switch from functions to macros to allow the
increment to be done in the original type of the pointer.
pull/19/head
Johan Bengtsson 4 years ago committed by GitHub
parent 8dcdb350a4
commit 065e8351c3

@ -74,7 +74,7 @@ extern "C"
@return Q31 value
*/
__STATIC_FORCEINLINE q31_t read_q15x2 (
q15_t * pQ15)
q15_t const * pQ15)
{
q31_t val;
@ -92,40 +92,14 @@ __STATIC_FORCEINLINE q31_t read_q15x2 (
@param[in] pQ15 points to input value
@return Q31 value
*/
__STATIC_FORCEINLINE q31_t read_q15x2_ia (
q15_t ** pQ15)
{
q31_t val;
#ifdef __ARM_FEATURE_UNALIGNED
memcpy (&val, *pQ15, 4);
#else
val = ((*pQ15)[1] << 16) | ((*pQ15)[0] & 0x0FFFF);
#endif
*pQ15 += 2;
return (val);
}
#define read_q15x2_ia(pQ15) read_q15x2((*(pQ15) += 2) - 2)
/**
@brief Read 2 Q15 from Q15 pointer and decrement pointer afterwards.
@param[in] pQ15 points to input value
@return Q31 value
*/
__STATIC_FORCEINLINE q31_t read_q15x2_da (
q15_t ** pQ15)
{
q31_t val;
#ifdef __ARM_FEATURE_UNALIGNED
memcpy (&val, *pQ15, 4);
#else
val = ((*pQ15)[1] << 16) | ((*pQ15)[0] & 0x0FFFF);
#endif
*pQ15 -= 2;
return (val);
}
#define read_q15x2_da(pQ15) read_q15x2((*(pQ15) -= 2) + 2)
/**
@brief Write 2 Q15 to Q15 pointer and increment pointer afterwards.
@ -170,45 +144,36 @@ __STATIC_FORCEINLINE void write_q15x2 (
/**
@brief Read 4 Q7 from Q7 pointer and increment pointer afterwards.
@brief Read 4 Q7 from Q7 pointer
@param[in] pQ7 points to input value
@return Q31 value
*/
__STATIC_FORCEINLINE q31_t read_q7x4_ia (
q7_t ** pQ7)
__STATIC_FORCEINLINE q31_t read_q7x4 (
q7_t const * pQ7)
{
q31_t val;
#ifdef __ARM_FEATURE_UNALIGNED
memcpy (&val, *pQ7, 4);
memcpy (&val, pQ7, 4);
#else
val =(((*pQ7)[3] & 0x0FF) << 24) | (((*pQ7)[2] & 0x0FF) << 16) | (((*pQ7)[1] & 0x0FF) << 8) | ((*pQ7)[0] & 0x0FF);
val =((pQ7[3] & 0x0FF) << 24) | ((pQ7[2] & 0x0FF) << 16) | ((pQ7[1] & 0x0FF) << 8) | (pQ7[0] & 0x0FF);
#endif
*pQ7 += 4;
return (val);
}
/**
@brief Read 4 Q7 from Q7 pointer and decrement pointer afterwards.
@brief Read 4 Q7 from Q7 pointer and increment pointer afterwards.
@param[in] pQ7 points to input value
@return Q31 value
*/
__STATIC_FORCEINLINE q31_t read_q7x4_da (
q7_t ** pQ7)
{
q31_t val;
#ifdef __ARM_FEATURE_UNALIGNED
memcpy (&val, *pQ7, 4);
#else
val = ((((*pQ7)[3]) & 0x0FF) << 24) | ((((*pQ7)[2]) & 0x0FF) << 16) | ((((*pQ7)[1]) & 0x0FF) << 8) | ((*pQ7)[0] & 0x0FF);
#endif
*pQ7 -= 4;
#define read_q7x4_ia(pQ7) read_q7x4((*(pQ7) += 4) - 4)
return (val);
}
/**
@brief Read 4 Q7 from Q7 pointer and decrement pointer afterwards.
@param[in] pQ7 points to input value
@return Q31 value
*/
#define read_q7x4_da(pQ7) read_q7x4((*(pQ7) -= 4) + 4)
/**
@brief Write 4 Q7 to Q7 pointer and increment pointer afterwards.

@ -124,11 +124,11 @@ void arm_add_q15(
#if defined (ARM_MATH_DSP)
/* read 2 times 2 samples at a time from sourceA */
inA1 = read_q15x2_ia ((q15_t **) &pSrcA);
inA2 = read_q15x2_ia ((q15_t **) &pSrcA);
inA1 = read_q15x2_ia (&pSrcA);
inA2 = read_q15x2_ia (&pSrcA);
/* read 2 times 2 samples at a time from sourceB */
inB1 = read_q15x2_ia ((q15_t **) &pSrcB);
inB2 = read_q15x2_ia ((q15_t **) &pSrcB);
inB1 = read_q15x2_ia (&pSrcB);
inB2 = read_q15x2_ia (&pSrcB);
/* Add and store 2 times 2 samples at a time */
write_q15x2_ia (&pDst, __QADD16(inA1, inB1));

@ -119,7 +119,7 @@ void arm_add_q7(
#if defined (ARM_MATH_DSP)
/* Add and store result in destination buffer (4 samples at a time). */
write_q7x4_ia (&pDst, __QADD8 (read_q7x4_ia ((q7_t **) &pSrcA), read_q7x4_ia ((q7_t **) &pSrcB)));
write_q7x4_ia (&pDst, __QADD8 (read_q7x4_ia (&pSrcA), read_q7x4_ia (&pSrcB)));
#else
*pDst++ = (q7_t) __SSAT ((q15_t) *pSrcA++ + *pSrcB++, 8);
*pDst++ = (q7_t) __SSAT ((q15_t) *pSrcA++ + *pSrcB++, 8);

@ -124,8 +124,8 @@ void arm_dot_prod_q15(
#if defined (ARM_MATH_DSP)
/* Calculate dot product and store result in a temporary buffer. */
sum = __SMLALD(read_q15x2_ia ((q15_t **) &pSrcA), read_q15x2_ia ((q15_t **) &pSrcB), sum);
sum = __SMLALD(read_q15x2_ia ((q15_t **) &pSrcA), read_q15x2_ia ((q15_t **) &pSrcB), sum);
sum = __SMLALD(read_q15x2_ia (&pSrcA), read_q15x2_ia (&pSrcB), sum);
sum = __SMLALD(read_q15x2_ia (&pSrcA), read_q15x2_ia (&pSrcB), sum);
#else
sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);
sum += (q63_t)((q31_t) *pSrcA++ * *pSrcB++);

@ -129,9 +129,9 @@ void arm_dot_prod_q7(
#if defined (ARM_MATH_DSP)
/* read 4 samples at a time from sourceA */
input1 = read_q7x4_ia ((q7_t **) &pSrcA);
input1 = read_q7x4_ia (&pSrcA);
/* read 4 samples at a time from sourceB */
input2 = read_q7x4_ia ((q7_t **) &pSrcB);
input2 = read_q7x4_ia (&pSrcB);
/* extract two q7_t samples to q15_t samples */
inA1 = __SXTB16(__ROR(input1, 8));

@ -123,13 +123,13 @@ void arm_mult_q15(
#if defined (ARM_MATH_DSP)
/* read 2 samples at a time from sourceA */
inA1 = read_q15x2_ia ((q15_t **) &pSrcA);
inA1 = read_q15x2_ia (&pSrcA);
/* read 2 samples at a time from sourceB */
inB1 = read_q15x2_ia ((q15_t **) &pSrcB);
inB1 = read_q15x2_ia (&pSrcB);
/* read 2 samples at a time from sourceA */
inA2 = read_q15x2_ia ((q15_t **) &pSrcA);
inA2 = read_q15x2_ia (&pSrcA);
/* read 2 samples at a time from sourceB */
inB2 = read_q15x2_ia ((q15_t **) &pSrcB);
inB2 = read_q15x2_ia (&pSrcB);
/* multiply mul = sourceA * sourceB */
mul1 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));

@ -118,10 +118,10 @@ void arm_negate_q15(
#if defined (ARM_MATH_DSP)
/* Negate and store result in destination buffer (2 samples at a time). */
in1 = read_q15x2_ia ((q15_t **) &pSrc);
in1 = read_q15x2_ia (&pSrc);
write_q15x2_ia (&pDst, __QSUB16(0, in1));
in1 = read_q15x2_ia ((q15_t **) &pSrc);
in1 = read_q15x2_ia (&pSrc);
write_q15x2_ia (&pDst, __QSUB16(0, in1));
#else
in = *pSrc++;

@ -116,7 +116,7 @@ void arm_negate_q7(
#if defined (ARM_MATH_DSP)
/* Negate and store result in destination buffer (4 samples at a time). */
in1 = read_q7x4_ia ((q7_t **) &pSrc);
in1 = read_q7x4_ia (&pSrc);
write_q7x4_ia (&pDst, __QSUB8(0, in1));
#else
in = *pSrc++;

@ -122,8 +122,8 @@ void arm_offset_q15(
#if defined (ARM_MATH_DSP)
/* Add offset and store result in destination buffer (2 samples at a time). */
write_q15x2_ia (&pDst, __QADD16(read_q15x2_ia ((q15_t **) &pSrc), offset_packed));
write_q15x2_ia (&pDst, __QADD16(read_q15x2_ia ((q15_t **) &pSrc), offset_packed));
write_q15x2_ia (&pDst, __QADD16(read_q15x2_ia (&pSrc), offset_packed));
write_q15x2_ia (&pDst, __QADD16(read_q15x2_ia (&pSrc), offset_packed));
#else
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);
*pDst++ = (q15_t) __SSAT(((q31_t) *pSrc++ + offset), 16);

@ -121,7 +121,7 @@ void arm_offset_q7(
#if defined (ARM_MATH_DSP)
/* Add offset and store result in destination buffer (4 samples at a time). */
write_q7x4_ia (&pDst, __QADD8(read_q7x4_ia ((q7_t **) &pSrc), offset_packed));
write_q7x4_ia (&pDst, __QADD8(read_q7x4_ia (&pSrc), offset_packed));
#else
*pDst++ = (q7_t) __SSAT((q15_t) *pSrc++ + offset, 8);
*pDst++ = (q7_t) __SSAT((q15_t) *pSrc++ + offset, 8);

@ -136,8 +136,8 @@ void arm_scale_q15(
#if defined (ARM_MATH_DSP)
/* read 2 times 2 samples at a time from source */
inA1 = read_q15x2_ia ((q15_t **) &pSrc);
inA2 = read_q15x2_ia ((q15_t **) &pSrc);
inA1 = read_q15x2_ia (&pSrc);
inA2 = read_q15x2_ia (&pSrc);
/* Scale inputs and store result in temporary variables
* in single cycle by packing the outputs */

@ -125,11 +125,11 @@ void arm_sub_q15(
#if defined (ARM_MATH_DSP)
/* read 2 times 2 samples at a time from sourceA */
inA1 = read_q15x2_ia ((q15_t **) &pSrcA);
inA2 = read_q15x2_ia ((q15_t **) &pSrcA);
inA1 = read_q15x2_ia (&pSrcA);
inA2 = read_q15x2_ia (&pSrcA);
/* read 2 times 2 samples at a time from sourceB */
inB1 = read_q15x2_ia ((q15_t **) &pSrcB);
inB2 = read_q15x2_ia ((q15_t **) &pSrcB);
inB1 = read_q15x2_ia (&pSrcB);
inB2 = read_q15x2_ia (&pSrcB);
/* Subtract and store 2 times 2 samples at a time */
write_q15x2_ia (&pDst, __QSUB16(inA1, inB1));

@ -117,7 +117,7 @@ void arm_sub_q7(
#if defined (ARM_MATH_DSP)
/* Subtract and store result in destination buffer (4 samples at a time). */
write_q7x4_ia (&pDst, __QSUB8(read_q7x4_ia ((q7_t **) &pSrcA), read_q7x4_ia ((q7_t **) &pSrcB)));
write_q7x4_ia (&pDst, __QSUB8(read_q7x4_ia (&pSrcA), read_q7x4_ia (&pSrcB)));
#else
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);
*pDst++ = (q7_t) __SSAT((q15_t) *pSrcA++ - *pSrcB++, 8);

@ -122,11 +122,11 @@ void arm_cmplx_conj_q15(
/* Calculate Complex Conjugate and store result in destination buffer. */
#if defined (ARM_MATH_DSP)
in1 = read_q15x2_ia ((q15_t **) &pSrc);
in2 = read_q15x2_ia ((q15_t **) &pSrc);
in3 = read_q15x2_ia ((q15_t **) &pSrc);
in4 = read_q15x2_ia ((q15_t **) &pSrc);
#if defined (ARM_MATH_DSP)
in1 = read_q15x2_ia (&pSrc);
in2 = read_q15x2_ia (&pSrc);
in3 = read_q15x2_ia (&pSrc);
in4 = read_q15x2_ia (&pSrc);
#ifndef ARM_MATH_BIG_ENDIAN
in1 = __QASX(0, in1);

@ -133,20 +133,20 @@ void arm_cmplx_mag_fast_q15(
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
#if defined (ARM_MATH_DSP)
in = read_q15x2_ia ((q15_t **) &pSrc);
in = read_q15x2_ia (&pSrc);
acc0 = __SMUAD(in, in);
/* store result in 2.14 format in destination buffer. */
arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
in = read_q15x2_ia ((q15_t **) &pSrc);
in = read_q15x2_ia (&pSrc);
acc0 = __SMUAD(in, in);
arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
in = read_q15x2_ia ((q15_t **) &pSrc);
in = read_q15x2_ia (&pSrc);
acc0 = __SMUAD(in, in);
arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
in = read_q15x2_ia ((q15_t **) &pSrc);
in = read_q15x2_ia (&pSrc);
acc0 = __SMUAD(in, in);
arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
#else
@ -196,7 +196,7 @@ void arm_cmplx_mag_fast_q15(
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
#if defined (ARM_MATH_DSP)
in = read_q15x2_ia ((q15_t **) &pSrc);
in = read_q15x2_ia (&pSrc);
acc0 = __SMUAD(in, in);
/* store result in 2.14 format in destination buffer. */

@ -167,23 +167,23 @@ void arm_cmplx_mag_q15(
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
#if defined (ARM_MATH_DSP)
in = read_q15x2_ia ((q15_t **) &pSrc);
in = read_q15x2_ia (&pSrc);
acc0 = __SMUAD(in, in);
/* store result in 2.14 format in destination buffer. */
arm_sqrt_q31(acc0 >> 1 , &res);
*pDst++ = res >> 16;
in = read_q15x2_ia ((q15_t **) &pSrc);
in = read_q15x2_ia (&pSrc);
acc0 = __SMUAD(in, in);
arm_sqrt_q31(acc0 >> 1 , &res);
*pDst++ = res >> 16;
in = read_q15x2_ia ((q15_t **) &pSrc);
in = read_q15x2_ia (&pSrc);
acc0 = __SMUAD(in, in);
arm_sqrt_q31(acc0 >> 1 , &res);
*pDst++ = res >> 16;
in = read_q15x2_ia ((q15_t **) &pSrc);
in = read_q15x2_ia (&pSrc);
acc0 = __SMUAD(in, in);
arm_sqrt_q31(acc0 >> 1 , &res);
*pDst++ = res >> 16;
@ -238,7 +238,7 @@ void arm_cmplx_mag_q15(
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
#if defined (ARM_MATH_DSP)
in = read_q15x2_ia ((q15_t **) &pSrc);
in = read_q15x2_ia (&pSrc);
acc0 = __SMUAD(in, in);
/* store result in 2.14 format in destination buffer. */

@ -131,20 +131,20 @@ void arm_cmplx_mag_squared_q15(
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
#if defined (ARM_MATH_DSP)
in = read_q15x2_ia ((q15_t **) &pSrc);
in = read_q15x2_ia (&pSrc);
acc0 = __SMUAD(in, in);
/* store result in 3.13 format in destination buffer. */
*pDst++ = (q15_t) (acc0 >> 17);
in = read_q15x2_ia ((q15_t **) &pSrc);
in = read_q15x2_ia (&pSrc);
acc0 = __SMUAD(in, in);
*pDst++ = (q15_t) (acc0 >> 17);
in = read_q15x2_ia ((q15_t **) &pSrc);
in = read_q15x2_ia (&pSrc);
acc0 = __SMUAD(in, in);
*pDst++ = (q15_t) (acc0 >> 17);
in = read_q15x2_ia ((q15_t **) &pSrc);
in = read_q15x2_ia (&pSrc);
acc0 = __SMUAD(in, in);
*pDst++ = (q15_t) (acc0 >> 17);
#else
@ -193,7 +193,7 @@ void arm_cmplx_mag_squared_q15(
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
#if defined (ARM_MATH_DSP)
in = read_q15x2_ia ((q15_t **) &pSrc);
in = read_q15x2_ia (&pSrc);
acc0 = __SMUAD(in, in);
/* store result in 3.13 format in destination buffer. */

@ -133,10 +133,10 @@ void arm_cmplx_mult_real_q15(
#if defined (ARM_MATH_DSP)
/* read 2 complex numbers both real and imaginary from complex input buffer */
inA1 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
inA2 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
inA1 = read_q15x2_ia (&pSrcCmplx);
inA2 = read_q15x2_ia (&pSrcCmplx);
/* read 2 real values at a time from real input buffer */
inB1 = read_q15x2_ia ((q15_t **) &pSrcReal);
inB1 = read_q15x2_ia (&pSrcReal);
/* multiply complex number with real numbers */
#ifndef ARM_MATH_BIG_ENDIAN
@ -161,9 +161,9 @@ void arm_cmplx_mult_real_q15(
write_q15x2_ia (&pCmplxDst, __PKHBT(out1, out2, 16));
write_q15x2_ia (&pCmplxDst, __PKHBT(out3, out4, 16));
inA1 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
inA2 = read_q15x2_ia ((q15_t **) &pSrcCmplx);
inB1 = read_q15x2_ia ((q15_t **) &pSrcReal);
inA1 = read_q15x2_ia (&pSrcCmplx);
inA2 = read_q15x2_ia (&pSrcCmplx);
inB1 = read_q15x2_ia (&pSrcReal);
#ifndef ARM_MATH_BIG_ENDIAN
mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1));

@ -79,13 +79,13 @@ void arm_biquad_cascade_df1_fast_q15(
do
{
/* Read the b0 and 0 coefficients using SIMD */
b0 = read_q15x2_ia ((q15_t **) &pCoeffs);
b0 = read_q15x2_ia (&pCoeffs);
/* Read the b1 and b2 coefficients using SIMD */
b1 = read_q15x2_ia ((q15_t **) &pCoeffs);
b1 = read_q15x2_ia (&pCoeffs);
/* Read the a1 and a2 coefficients using SIMD */
a1 = read_q15x2_ia ((q15_t **) &pCoeffs);
a1 = read_q15x2_ia (&pCoeffs);
/* Read the input state values from the state buffer: x[n-1], x[n-2] */
state_in = read_q15x2_ia (&pState);
@ -109,7 +109,7 @@ void arm_biquad_cascade_df1_fast_q15(
{
/* Read the input */
in = read_q15x2_ia ((q15_t **) &pIn);
in = read_q15x2_ia (&pIn);
/* out = b0 * x[n] + 0 * 0 */
out = __SMUAD(b0, in);

@ -525,7 +525,7 @@ void arm_fir_q15(
while (tapCnt > 0U)
{
/* Read the first two coefficients using SIMD: b[N] and b[N-1] coefficients */
c0 = read_q15x2_ia ((q15_t **) &pb);
c0 = read_q15x2_ia (&pb);
/* acc0 += b[N] * x[n-N] + b[N-1] * x[n-N-1] */
acc0 = __SMLALD(x0, c0, acc0);
@ -557,7 +557,7 @@ void arm_fir_q15(
acc3 = __SMLALDX(x1, c0, acc3);
/* Read coefficients b[N-2], b[N-3] */
c0 = read_q15x2_ia ((q15_t **) &pb);
c0 = read_q15x2_ia (&pb);
/* acc0 += b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */
acc0 = __SMLALD(x2, c0, acc0);
@ -590,7 +590,7 @@ void arm_fir_q15(
if ((numTaps & 0x3U) != 0U)
{
/* Read last two coefficients */
c0 = read_q15x2_ia ((q15_t **) &pb);
c0 = read_q15x2_ia (&pb);
/* Perform the multiply-accumulates */
acc0 = __SMLALD(x0, c0, acc0);

@ -461,8 +461,8 @@ arm_status arm_mat_cmplx_mult_q15(
#if defined (ARM_MATH_DSP)
/* read real and imag values from pSrcA and pSrcB buffer */
pSourceA = read_q15x2_ia ((q15_t **) &pInA);
pSourceB = read_q15x2_ia ((q15_t **) &pInB);
pSourceA = read_q15x2_ia (&pInA);
pSourceB = read_q15x2_ia (&pInB);
/* Multiply and Accumlates */
#ifdef ARM_MATH_BIG_ENDIAN
@ -475,8 +475,8 @@ arm_status arm_mat_cmplx_mult_q15(
sumImag += (q63_t) prod2;
/* read real and imag values from pSrcA and pSrcB buffer */
pSourceA = read_q15x2_ia ((q15_t **) &pInA);
pSourceB = read_q15x2_ia ((q15_t **) &pInB);
pSourceA = read_q15x2_ia (&pInA);
pSourceB = read_q15x2_ia (&pInB);
/* Multiply and Accumlates */
#ifdef ARM_MATH_BIG_ENDIAN
@ -534,8 +534,8 @@ arm_status arm_mat_cmplx_mult_q15(
#if defined (ARM_MATH_DSP)
/* read real and imag values from pSrcA and pSrcB buffer */
pSourceA = read_q15x2_ia ((q15_t **) &pInA);
pSourceB = read_q15x2_ia ((q15_t **) &pInB);
pSourceA = read_q15x2_ia (&pInA);
pSourceB = read_q15x2_ia (&pInB);
/* Multiply and Accumlates */
#ifdef ARM_MATH_BIG_ENDIAN

@ -125,7 +125,7 @@ arm_status arm_mat_mult_fast_q15(
#if defined (ARM_MATH_DSP)
/* Read two elements from row */
in = read_q15x2_ia ((q15_t **) &pInB);
in = read_q15x2_ia (&pInB);
/* Unpack and store one element in destination */
#ifndef ARM_MATH_BIG_ENDIAN
@ -147,7 +147,7 @@ arm_status arm_mat_mult_fast_q15(
/* Update pointer px to point to next row of transposed matrix */
px += numRowsB;
in = read_q15x2_ia ((q15_t **) &pInB);
in = read_q15x2_ia (&pInB);
#ifndef ARM_MATH_BIG_ENDIAN
*px = (q15_t) in;
#else
@ -271,11 +271,11 @@ arm_status arm_mat_mult_fast_q15(
#if defined (ARM_MATH_DSP)
/* read real and imag values from pSrcA and pSrcB buffer */
inA1 = read_q15x2_ia ((q15_t **) &pInA);
inB1 = read_q15x2_ia ((q15_t **) &pInB);
inA1 = read_q15x2_ia (&pInA);
inB1 = read_q15x2_ia (&pInB);
inA2 = read_q15x2_ia ((q15_t **) &pInA2);
inB2 = read_q15x2_ia ((q15_t **) &pInB2);
inA2 = read_q15x2_ia (&pInA2);
inB2 = read_q15x2_ia (&pInB2);
/* Multiply and Accumulates */
sum = __SMLAD(inA1, inB1, sum);
@ -389,10 +389,10 @@ arm_status arm_mat_mult_fast_q15(
/* matrix multiplication */
while (colCnt > 0U)
{
inA1 = read_q15x2_ia ((q15_t **) &pInA);
inA2 = read_q15x2_ia ((q15_t **) &pInA);
inB1 = read_q15x2_ia ((q15_t **) &pInB);
inB2 = read_q15x2_ia ((q15_t **) &pInB);
inA1 = read_q15x2_ia (&pInA);
inA2 = read_q15x2_ia (&pInA);
inB1 = read_q15x2_ia (&pInB);
inB2 = read_q15x2_ia (&pInB);
sum = __SMLAD(inA1, inB1, sum);
sum = __SMLAD(inA2, inB2, sum);
@ -441,10 +441,10 @@ arm_status arm_mat_mult_fast_q15(
/* matrix multiplication */
while (colCnt > 0U)
{
inA1 = read_q15x2_ia ((q15_t **) &pInA);
inA2 = read_q15x2_ia ((q15_t **) &pInA);
inB1 = read_q15x2_ia ((q15_t **) &pInB);
inB2 = read_q15x2_ia ((q15_t **) &pInB);
inA1 = read_q15x2_ia (&pInA);
inA2 = read_q15x2_ia (&pInA);
inB1 = read_q15x2_ia (&pInB);
inB2 = read_q15x2_ia (&pInB);
sum = __SMLAD(inA1, inB1, sum);
sum = __SMLAD(inA2, inB2, sum);

@ -695,11 +695,11 @@ arm_status arm_mat_mult_q15(
/* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */
/* read real and imag values from pSrcA and pSrcB buffer */
inA1 = read_q15x2_ia ((q15_t **) &pInA);
inB1 = read_q15x2_ia ((q15_t **) &pInB);
inA1 = read_q15x2_ia (&pInA);
inB1 = read_q15x2_ia (&pInB);
inA2 = read_q15x2_ia ((q15_t **) &pInA);
inB2 = read_q15x2_ia ((q15_t **) &pInB);
inA2 = read_q15x2_ia (&pInA);
inB2 = read_q15x2_ia (&pInB);
/* Multiply and Accumulates */
sum = __SMLALD(inA1, inB1, sum);

@ -177,8 +177,8 @@ arm_status arm_mat_scale_q15(
#if defined (ARM_MATH_DSP)
/* read 2 times 2 samples at a time from source */
inA1 = read_q15x2_ia ((q15_t **) &pIn);
inA2 = read_q15x2_ia ((q15_t **) &pIn);
inA1 = read_q15x2_ia (&pIn);
inA2 = read_q15x2_ia (&pIn);
/* Scale inputs and store result in temporary variables
* in single cycle by packing the outputs */

@ -167,8 +167,8 @@ arm_status arm_mat_sub_q15(
/* Subtract, Saturate and store result in destination buffer. */
#if defined (ARM_MATH_DSP)
write_q15x2_ia (&pOut, __QSUB16(read_q15x2_ia ((q15_t **) &pInA), read_q15x2_ia ((q15_t **) &pInB)));
write_q15x2_ia (&pOut, __QSUB16(read_q15x2_ia ((q15_t **) &pInA), read_q15x2_ia ((q15_t **) &pInB)));
write_q15x2_ia (&pOut, __QSUB16(read_q15x2_ia (&pInA), read_q15x2_ia (&pInB)));
write_q15x2_ia (&pOut, __QSUB16(read_q15x2_ia (&pInA), read_q15x2_ia (&pInB)));
#else
*pOut++ = (q15_t) __SSAT(((q31_t) * pInA++ - *pInB++), 16);
*pOut++ = (q15_t) __SSAT(((q31_t) * pInA++ - *pInB++), 16);

@ -140,7 +140,7 @@ arm_status arm_mat_trans_q15(
while (col > 0U) /* column loop */
{
/* Read two elements from row */
in = read_q15x2_ia ((q15_t **) &pIn);
in = read_q15x2_ia (&pIn);
/* Unpack and store one element in destination */
#ifndef ARM_MATH_BIG_ENDIAN
@ -163,7 +163,7 @@ arm_status arm_mat_trans_q15(
pOut += nRows;
/* Read two elements from row */
in = read_q15x2_ia ((q15_t **) &pIn);
in = read_q15x2_ia (&pIn);
/* Unpack and store one element in destination */
#ifndef ARM_MATH_BIG_ENDIAN

@ -311,16 +311,16 @@ void arm_mat_vec_mult_q15(const arm_matrix_instance_q15 *pSrcMat, const q15_t *p
// Main loop: matrix-vector multiplication
while (colCnt > 0u) {
// Read 2 values from vector
vecData = read_q15x2_ia ((q15_t **) &pInVec);
vecData = read_q15x2_ia (&pInVec);
// Read 8 values from the matrix - 2 values from each of 4 rows, and do multiply accumulate
matData = read_q15x2_ia ((q15_t **) &pInA1);
matData = read_q15x2_ia (&pInA1);
sum1 = __SMLALD(matData, vecData, sum1);
matData = read_q15x2_ia ((q15_t **) &pInA2);
matData = read_q15x2_ia (&pInA2);
sum2 = __SMLALD(matData, vecData, sum2);
matData = read_q15x2_ia ((q15_t **) &pInA3);
matData = read_q15x2_ia (&pInA3);
sum3 = __SMLALD(matData, vecData, sum3);
matData = read_q15x2_ia ((q15_t **) &pInA4);
matData = read_q15x2_ia (&pInA4);
sum4 = __SMLALD(matData, vecData, sum4);
// Decrement the loop counter
@ -361,10 +361,10 @@ void arm_mat_vec_mult_q15(const arm_matrix_instance_q15 *pSrcMat, const q15_t *p
colCnt = numCols >> 2;
while (colCnt > 0) {
vecData = read_q15x2_ia ((q15_t **) &pInVec);
vecData2 = read_q15x2_ia ((q15_t **) &pInVec);
matData = read_q15x2_ia ((q15_t **) &pInA1);
matData2 = read_q15x2_ia ((q15_t **) &pInA1);
vecData = read_q15x2_ia (&pInVec);
vecData2 = read_q15x2_ia (&pInVec);
matData = read_q15x2_ia (&pInA1);
matData2 = read_q15x2_ia (&pInA1);
sum = __SMLALD(matData, vecData, sum);
sum = __SMLALD(matData2, vecData2, sum);
colCnt--;

@ -325,26 +325,26 @@ void arm_mat_vec_mult_q7(const arm_matrix_instance_q7 *pSrcMat, const q7_t *pVec
while (colCnt > 0u) {
// Read 4 values from vector
vecData = read_q7x4_ia ((q7_t **) &pInVec);
vecData = read_q7x4_ia (&pInVec);
vecData2 = __SXTB16(__ROR(vecData, 8));
vecData = __SXTB16(vecData);
// Read 16 values from the matrix - 4 values from each of 4 rows, and do multiply accumulate
matData = read_q7x4_ia ((q7_t **) &pInA1);
matData = read_q7x4_ia (&pInA1);
matData2 = __SXTB16(__ROR(matData, 8));
matData = __SXTB16(matData);
sum1 = __SMLAD(matData, vecData, sum1);
sum1 = __SMLAD(matData2, vecData2, sum1);
matData = read_q7x4_ia ((q7_t **) &pInA2);
matData = read_q7x4_ia (&pInA2);
matData2 = __SXTB16(__ROR(matData, 8));
matData = __SXTB16(matData);
sum2 = __SMLAD(matData, vecData, sum2);
sum2 = __SMLAD(matData2, vecData2, sum2);
matData = read_q7x4_ia ((q7_t **) &pInA3);
matData = read_q7x4_ia (&pInA3);
matData2 = __SXTB16(__ROR(matData, 8));
matData = __SXTB16(matData);
sum3 = __SMLAD(matData, vecData, sum3);
sum3 = __SMLAD(matData2, vecData2, sum3);
matData = read_q7x4_ia ((q7_t **) &pInA4);
matData = read_q7x4_ia (&pInA4);
matData2 = __SXTB16(__ROR(matData, 8));
matData = __SXTB16(matData);
sum4 = __SMLAD(matData, vecData, sum4);
@ -391,10 +391,10 @@ void arm_mat_vec_mult_q7(const arm_matrix_instance_q7 *pSrcMat, const q7_t *pVec
colCnt = numCols >> 2;
while (colCnt > 0) {
vecData = read_q7x4_ia ((q7_t **) &pInVec);
vecData = read_q7x4_ia (&pInVec);
vecData2 = __SXTB16(__ROR(vecData, 8));
vecData = __SXTB16(vecData);
matData = read_q7x4_ia ((q7_t **) &pInA1);
matData = read_q7x4_ia (&pInA1);
matData2 = __SXTB16(__ROR(matData, 8));
matData = __SXTB16(matData);
sum = __SMLAD(matData, vecData, sum);

@ -114,11 +114,11 @@ void arm_mean_q15(
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
in = read_q15x2_ia ((q15_t **) &pSrc);
in = read_q15x2_ia (&pSrc);
sum += ((in << 16U) >> 16U);
sum += (in >> 16U);
in = read_q15x2_ia ((q15_t **) &pSrc);
in = read_q15x2_ia (&pSrc);
sum += ((in << 16U) >> 16U);
sum += (in >> 16U);

@ -113,7 +113,7 @@ void arm_mean_q7(
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
in = read_q7x4_ia ((q7_t **) &pSrc);
in = read_q7x4_ia (&pSrc);
sum += ((in << 24U) >> 24U);
sum += ((in << 16U) >> 24U);
sum += ((in << 8U) >> 24U);

@ -122,10 +122,10 @@ void arm_power_q15(
/* Compute Power and store result in a temporary variable, sum. */
#if defined (ARM_MATH_DSP)
in32 = read_q15x2_ia ((q15_t **) &pSrc);
in32 = read_q15x2_ia (&pSrc);
sum = __SMLALD(in32, in32, sum);
in32 = read_q15x2_ia ((q15_t **) &pSrc);
in32 = read_q15x2_ia (&pSrc);
sum = __SMLALD(in32, in32, sum);
#else
in = *pSrc++;

@ -122,7 +122,7 @@ void arm_power_q7(
/* Compute Power and store result in a temporary variable, sum. */
#if defined (ARM_MATH_DSP)
in32 = read_q7x4_ia ((q7_t **) &pSrc);
in32 = read_q7x4_ia (&pSrc);
in1 = __SXTB16(__ROR(in32, 8));
in2 = __SXTB16(in32);

@ -93,10 +93,10 @@ void arm_rms_q15(
/* Compute sum of squares and store result in a temporary variable. */
#if defined (ARM_MATH_DSP)
in32 = read_q15x2_ia ((q15_t **) &pSrc);
in32 = read_q15x2_ia (&pSrc);
sum = __SMLALD(in32, in32, sum);
in32 = read_q15x2_ia ((q15_t **) &pSrc);
in32 = read_q15x2_ia (&pSrc);
sum = __SMLALD(in32, in32, sum);
#else
in = *pSrc++;

@ -100,12 +100,12 @@ void arm_std_q15(
/* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
/* Compute sum and store result in a temporary variable, sum. */
#if defined (ARM_MATH_DSP)
in32 = read_q15x2_ia ((q15_t **) &pSrc);
in32 = read_q15x2_ia (&pSrc);
sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
sum += ((in32 << 16U) >> 16U);
sum += (in32 >> 16U);
in32 = read_q15x2_ia ((q15_t **) &pSrc);
in32 = read_q15x2_ia (&pSrc);
sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
sum += ((in32 << 16U) >> 16U);
sum += (in32 >> 16U);

@ -154,12 +154,12 @@ void arm_var_q15(
/* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
/* Compute sum and store result in a temporary variable, sum. */
#if defined (ARM_MATH_DSP)
in32 = read_q15x2_ia ((q15_t **) &pSrc);
in32 = read_q15x2_ia (&pSrc);
sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
sum += ((in32 << 16U) >> 16U);
sum += (in32 >> 16U);
in32 = read_q15x2_ia ((q15_t **) &pSrc);
in32 = read_q15x2_ia (&pSrc);
sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
sum += ((in32 << 16U) >> 16U);
sum += (in32 >> 16U);

@ -95,8 +95,8 @@ void arm_copy_q15(
/* C = A */
/* read 2 times 2 samples at a time */
write_q15x2_ia (&pDst, read_q15x2_ia ((q15_t **) &pSrc));
write_q15x2_ia (&pDst, read_q15x2_ia ((q15_t **) &pSrc));
write_q15x2_ia (&pDst, read_q15x2_ia (&pSrc));
write_q15x2_ia (&pDst, read_q15x2_ia (&pSrc));
/* Decrement loop counter */
blkCnt--;

@ -98,7 +98,7 @@ void arm_copy_q7(
/* C = A */
/* read 4 samples at a time */
write_q7x4_ia (&pDst, read_q7x4_ia ((q7_t **) &pSrc));
write_q7x4_ia (&pDst, read_q7x4_ia (&pSrc));
/* Decrement loop counter */
blkCnt--;

@ -117,8 +117,8 @@ void arm_q15_to_q31(
/* C = (q31_t)A << 16 */
/* Convert from q15 to q31 and store result in destination buffer */
in1 = read_q15x2_ia ((q15_t **) &pIn);
in2 = read_q15x2_ia ((q15_t **) &pIn);
in1 = read_q15x2_ia (&pIn);
in2 = read_q15x2_ia (&pIn);
#ifndef ARM_MATH_BIG_ENDIAN

@ -119,8 +119,8 @@ void arm_q15_to_q7(
/* Convert from q15 to q7 and store result in destination buffer */
#if defined (ARM_MATH_DSP)
in1 = read_q15x2_ia ((q15_t **) &pIn);
in2 = read_q15x2_ia ((q15_t **) &pIn);
in1 = read_q15x2_ia (&pIn);
in2 = read_q15x2_ia (&pIn);
#ifndef ARM_MATH_BIG_ENDIAN

@ -121,7 +121,7 @@ void arm_q7_to_q15(
/* Convert from q7 to q15 and store result in destination buffer */
#if defined (ARM_MATH_DSP)
in = read_q7x4_ia ((q7_t **) &pIn);
in = read_q7x4_ia (&pIn);
/* rotatate in by 8 and extend two q7_t values to q15_t values */
in1 = __SXTB16(__ROR(in, 8));

@ -113,7 +113,7 @@ void arm_q7_to_q31(
/* C = (q31_t) A << 24 */
/* Convert from q7 to q31 and store result in destination buffer */
in = read_q7x4_ia ((q7_t **) &pIn);
in = read_q7x4_ia (&pIn);
#ifndef ARM_MATH_BIG_ENDIAN

@ -718,7 +718,7 @@ void arm_cfft_radix4by2_q15(
for (i = n2; i > 0; i--)
{
coeff = read_q15x2_ia ((q15_t **) &pC);
coeff = read_q15x2_ia (&pC);
T = read_q15x2 (pSi);
T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */
@ -817,7 +817,7 @@ void arm_cfft_radix4by2_inverse_q15(
for (i = n2; i > 0; i--)
{
coeff = read_q15x2_ia ((q15_t **) &pC);
coeff = read_q15x2_ia (&pC);
T = read_q15x2 (pSi);
T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */

@ -495,16 +495,16 @@ void arm_radix4_butterfly_q15(
do
{
/* Read xa (real), ya(imag) input */
xaya = read_q15x2_ia ((q15_t **) &ptr1);
xaya = read_q15x2_ia (&ptr1);
/* Read xb (real), yb(imag) input */
xbyb = read_q15x2_ia ((q15_t **) &ptr1);
xbyb = read_q15x2_ia (&ptr1);
/* Read xc (real), yc(imag) input */
xcyc = read_q15x2_ia ((q15_t **) &ptr1);
xcyc = read_q15x2_ia (&ptr1);
/* Read xd (real), yd(imag) input */
xdyd = read_q15x2_ia ((q15_t **) &ptr1);
xdyd = read_q15x2_ia (&ptr1);
/* R = packed((ya + yc), (xa + xc)) */
R = __QADD16(xaya, xcyc);
@ -1358,16 +1358,16 @@ void arm_radix4_butterfly_inverse_q15(
do
{
/* Read xa (real), ya(imag) input */
xaya = read_q15x2_ia ((q15_t **) &ptr1);
xaya = read_q15x2_ia (&ptr1);
/* Read xb (real), yb(imag) input */
xbyb = read_q15x2_ia ((q15_t **) &ptr1);
xbyb = read_q15x2_ia (&ptr1);
/* Read xc (real), yc(imag) input */
xcyc = read_q15x2_ia ((q15_t **) &ptr1);
xcyc = read_q15x2_ia (&ptr1);
/* Read xd (real), yd(imag) input */
xdyd = read_q15x2_ia ((q15_t **) &ptr1);
xdyd = read_q15x2_ia (&ptr1);
/* R = packed((ya + yc), (xa + xc)) */
R = __QADD16(xaya, xcyc);

Loading…
Cancel
Save