Global MISRA-C Rule 10.6 fix up: Unsigned constant values with U suffix, uppercase instead of lowercase. (Issue #227)

pull/19/head
Jonatan Antoni 9 years ago
parent b665acba90
commit 2208df0e14

@ -151,8 +151,8 @@
/* TYPE_FROM_ABBREV(q15), \ */
/* ifft_flag) */
CFFT_FAMILY_DEFINE_ALL_TESTS(forward, 0u);
CFFT_FAMILY_DEFINE_ALL_TESTS(inverse, 1u);
CFFT_FAMILY_DEFINE_ALL_TESTS(forward, 0U);
CFFT_FAMILY_DEFINE_ALL_TESTS(inverse, 1U);
/*--------------------------------------------------------------------------------*/
/* Collect all tests in a group */

@ -61,8 +61,8 @@ FFT fast function test template. Arguments are: function configuration suffix
return JTEST_TEST_PASSED; \
}
RFFT_FAST_DEFINE_TEST(forward, 0u);
RFFT_FAST_DEFINE_TEST(inverse, 1u);
RFFT_FAST_DEFINE_TEST(forward, 0U);
RFFT_FAST_DEFINE_TEST(inverse, 1U);
/*--------------------------------------------------------------------------------*/
/* Collect all tests in a group */

@ -26,11 +26,11 @@
/* Initialize the RFFT and CFFT Instances */ \
arm_rfft_init_##suffix( \
&rfft_inst_fut, \
(uint32_t) fftlen, ifft_flag, 1u); \
(uint32_t) fftlen, ifft_flag, 1U); \
\
arm_rfft_init_##suffix( \
&rfft_inst_ref, \
(uint32_t) fftlen, ifft_flag, 1u); \
(uint32_t) fftlen, ifft_flag, 1U); \
\
if (ifft_flag) \
{ \
@ -74,11 +74,11 @@
return JTEST_TEST_PASSED; \
}
RFFT_DEFINE_TEST(q31, forward, 0u, TYPE_FROM_ABBREV(q31), TYPE_FROM_ABBREV(q31));
RFFT_DEFINE_TEST(q15, forward, 0u, TYPE_FROM_ABBREV(q15), TYPE_FROM_ABBREV(q15));
//RFFT_DEFINE_TEST(f32, inverse, 1u, TYPE_FROM_ABBREV(f32), TYPE_FROM_ABBREV(f32));
RFFT_DEFINE_TEST(q31, inverse, 1u, TYPE_FROM_ABBREV(q31), TYPE_FROM_ABBREV(q31));
RFFT_DEFINE_TEST(q15, inverse, 1u, TYPE_FROM_ABBREV(q15), TYPE_FROM_ABBREV(q15));
RFFT_DEFINE_TEST(q31, forward, 0U, TYPE_FROM_ABBREV(q31), TYPE_FROM_ABBREV(q31));
RFFT_DEFINE_TEST(q15, forward, 0U, TYPE_FROM_ABBREV(q15), TYPE_FROM_ABBREV(q15));
//RFFT_DEFINE_TEST(f32, inverse, 1U, TYPE_FROM_ABBREV(f32), TYPE_FROM_ABBREV(f32));
RFFT_DEFINE_TEST(q31, inverse, 1U, TYPE_FROM_ABBREV(q31), TYPE_FROM_ABBREV(q31));
RFFT_DEFINE_TEST(q15, inverse, 1U, TYPE_FROM_ABBREV(q15), TYPE_FROM_ABBREV(q15));
/*--------------------------------------------------------------------------------*/
/* Collect all tests in a group */

@ -35,7 +35,7 @@ q31_t ref_pid_q31(
acc += (q63_t) S->A2 * S->state[1];
/* convert output to 1.31 format to add y[n-1] */
out = (q31_t) (acc >> 31u);
out = (q31_t) (acc >> 31U);
/* out += y[n-1] */
out += S->state[2];

@ -31,7 +31,7 @@ void ref_biquad_cascade_df2T_f32(
sample = blockSize;
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
@ -66,7 +66,7 @@ void ref_biquad_cascade_df2T_f32(
/* decrement the loop counter */
stage--;
} while (stage > 0u);
} while (stage > 0U);
}
@ -103,7 +103,7 @@ void ref_biquad_cascade_stereo_df2T_f32(
sample = blockSize;
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn1a = *pIn++; //Channel a
@ -145,7 +145,7 @@ void ref_biquad_cascade_stereo_df2T_f32(
/* decrement the loop counter */
stage--;
} while (stage > 0u);
} while (stage > 0U);
}
@ -180,7 +180,7 @@ void ref_biquad_cascade_df2T_f64(
sample = blockSize;
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
@ -215,7 +215,7 @@ void ref_biquad_cascade_df2T_f64(
/* decrement the loop counter */
stage--;
} while (stage > 0u);
} while (stage > 0U);
}
void ref_biquad_cascade_df1_f32(
@ -255,7 +255,7 @@ void ref_biquad_cascade_df1_f32(
sample = blockSize;
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
@ -297,7 +297,7 @@ void ref_biquad_cascade_df1_f32(
/* decrement the loop counter */
stage--;
} while (stage > 0u);
} while (stage > 0U);
}
void ref_biquad_cas_df1_32x64_q31(
@ -318,8 +318,8 @@ void ref_biquad_cas_df1_32x64_q31(
int32_t shift = (int32_t) S->postShift + 1; /* Shift to be applied to the output */
uint32_t sample, stage = S->numStages; /* loop counters */
q31_t acc_l, acc_h; /* temporary output */
uint32_t uShift = ((uint32_t) S->postShift + 1u);
uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */
uint32_t uShift = ((uint32_t) S->postShift + 1U);
uint32_t lShift = 32U - uShift; /* Shift to be applied to the output */
do
{
@ -338,7 +338,7 @@ void ref_biquad_cas_df1_32x64_q31(
sample = blockSize;
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
@ -396,8 +396,8 @@ void ref_biquad_cascade_df1_q31(
uint32_t blockSize)
{
q63_t acc; /* accumulator */
uint32_t uShift = ((uint32_t) S->postShift + 1u);
uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */
uint32_t uShift = ((uint32_t) S->postShift + 1U);
uint32_t lShift = 32U - uShift; /* Shift to be applied to the output */
q31_t *pIn = pSrc; /* input pointer initialization */
q31_t *pOut = pDst; /* output pointer initialization */
q31_t *pState = S->pState; /* pState pointer initialization */
@ -428,7 +428,7 @@ void ref_biquad_cascade_df1_q31(
sample = blockSize;
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
@ -518,7 +518,7 @@ void ref_biquad_cascade_df1_fast_q31(
sample = blockSize;
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
@ -597,7 +597,7 @@ void ref_biquad_cascade_df1_fast_q15(
sample = blockSize;
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
@ -672,7 +672,7 @@ void ref_biquad_cascade_df1_q15(
sample = blockSize;
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;

@ -73,7 +73,7 @@ void ref_conv_q31(
}
/* Store the output in the destination buffer */
pDst[i] = (q31_t)(sum >> 31u);
pDst[i] = (q31_t)(sum >> 31U);
}
}
@ -106,7 +106,7 @@ void ref_conv_fast_q31(
}
/* Store the output in the destination buffer */
pDst[i] = (q31_t)(sum << 1u);
pDst[i] = (q31_t)(sum << 1U);
}
}
@ -166,7 +166,7 @@ void ref_conv_q15(
}
/* Store the output in the destination buffer */
pDst[i] = ref_sat_q15(sum >> 15u);
pDst[i] = ref_sat_q15(sum >> 15U);
}
}
@ -202,7 +202,7 @@ arm_status ref_conv_partial_fast_opt_q15(
}
/* Store the output in the destination buffer */
pDst[i] = ref_sat_q15(sum >> 15u);
pDst[i] = ref_sat_q15(sum >> 15U);
}
return ARM_MATH_SUCCESS;
@ -236,7 +236,7 @@ void ref_conv_fast_q15(
}
/* Store the output in the destination buffer */
pDst[i] = sum >> 15u;
pDst[i] = sum >> 15U;
}
}
@ -270,7 +270,7 @@ void ref_conv_fast_opt_q15(
}
/* Store the output in the destination buffer */
pDst[i] = ref_sat_q15(sum >> 15u);
pDst[i] = ref_sat_q15(sum >> 15U);
}
}

@ -8,11 +8,11 @@ void ref_correlate_f32(
float32_t * pDst)
{
float32_t *pIn1 = pSrcA; /* inputA pointer */
float32_t *pIn2 = pSrcB + (srcBLen - 1u); /* inputB pointer */
float32_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */
float32_t sum; /* Accumulator */
uint32_t i = 0u, j; /* loop counters */
uint32_t inv = 0u; /* Reverse order flag */
uint32_t tot = 0u; /* Length */
uint32_t i = 0U, j; /* loop counters */
uint32_t inv = 0U; /* Reverse order flag */
uint32_t tot = 0U; /* Length */
/* The algorithm implementation is based on the lengths of the inputs.
* srcB is always made to slide across srcA.
@ -32,7 +32,7 @@ void ref_correlate_f32(
*/
/* Calculate the length of the remaining sequence */
tot = srcALen + srcBLen - 2u;
tot = srcALen + srcBLen - 2U;
if (srcALen > srcBLen)
{
@ -46,7 +46,7 @@ void ref_correlate_f32(
pIn1 = pSrcB;
/* Initialization to the end of inputA pointer */
pIn2 = pSrcA + srcALen - 1u;
pIn2 = pSrcA + srcALen - 1U;
/* Initialisation of the pointer after zero padding */
pDst += tot;
@ -61,13 +61,13 @@ void ref_correlate_f32(
}
/* Loop to calculate convolution for output length number of times */
for (i = 0u; i <= tot; i++)
for (i = 0U; i <= tot; i++)
{
/* Initialize sum with zero to carry on MAC operations */
sum = 0.0f;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0u; j <= i; j++)
for (j = 0U; j <= i; j++)
{
/* Check the array limitations */
if ((i - j < srcBLen) && (j < srcALen))
@ -92,14 +92,14 @@ void ref_correlate_q31(
q31_t * pDst)
{
q31_t *pIn1 = pSrcA; /* inputA pointer */
q31_t *pIn2 = pSrcB + (srcBLen - 1u); /* inputB pointer */
q31_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */
q63_t sum; /* Accumulators */
uint32_t i = 0u, j; /* loop counters */
uint32_t inv = 0u; /* Reverse order flag */
uint32_t tot = 0u; /* Length */
uint32_t i = 0U, j; /* loop counters */
uint32_t inv = 0U; /* Reverse order flag */
uint32_t tot = 0U; /* Length */
/* Calculate the length of the remaining sequence */
tot = ((srcALen + srcBLen) - 2u);
tot = ((srcALen + srcBLen) - 2U);
if (srcALen > srcBLen)
{
@ -116,7 +116,7 @@ void ref_correlate_q31(
pIn1 = pSrcB;
/* Initialization to the end of inputA pointer */
pIn2 = pSrcA + (srcALen - 1u);
pIn2 = pSrcA + (srcALen - 1U);
/* Initialisation of the pointer after zero padding */
pDst = pDst + tot;
@ -132,13 +132,13 @@ void ref_correlate_q31(
}
/* Loop to calculate correlation for output length number of times */
for (i = 0u; i <= tot; i++)
for (i = 0U; i <= tot; i++)
{
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to correlation equation */
for (j = 0u; j <= i; j++)
for (j = 0U; j <= i; j++)
{
/* Check the array limitations */
if ((((i - j) < srcBLen) && (j < srcALen)))
@ -149,9 +149,9 @@ void ref_correlate_q31(
}
/* Store the output in the destination buffer */
if (inv == 1)
*pDst-- = (q31_t)(sum >> 31u);
*pDst-- = (q31_t)(sum >> 31U);
else
*pDst++ = (q31_t)(sum >> 31u);
*pDst++ = (q31_t)(sum >> 31U);
}
}
@ -163,14 +163,14 @@ void ref_correlate_fast_q31(
q31_t * pDst)
{
q31_t *pIn1 = pSrcA; /* inputA pointer */
q31_t *pIn2 = pSrcB + (srcBLen - 1u); /* inputB pointer */
q31_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */
q63_t sum; /* Accumulators */
uint32_t i = 0u, j; /* loop counters */
uint32_t inv = 0u; /* Reverse order flag */
uint32_t tot = 0u; /* Length */
uint32_t i = 0U, j; /* loop counters */
uint32_t inv = 0U; /* Reverse order flag */
uint32_t tot = 0U; /* Length */
/* Calculate the length of the remaining sequence */
tot = ((srcALen + srcBLen) - 2u);
tot = ((srcALen + srcBLen) - 2U);
if (srcALen > srcBLen)
{
@ -187,7 +187,7 @@ void ref_correlate_fast_q31(
pIn1 = pSrcB;
/* Initialization to the end of inputA pointer */
pIn2 = pSrcA + (srcALen - 1u);
pIn2 = pSrcA + (srcALen - 1U);
/* Initialisation of the pointer after zero padding */
pDst = pDst + tot;
@ -203,13 +203,13 @@ void ref_correlate_fast_q31(
}
/* Loop to calculate correlation for output length number of times */
for (i = 0u; i <= tot; i++)
for (i = 0U; i <= tot; i++)
{
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to correlation equation */
for (j = 0u; j <= i; j++)
for (j = 0U; j <= i; j++)
{
/* Check the array limitations */
if ((((i - j) < srcBLen) && (j < srcALen)))
@ -221,9 +221,9 @@ void ref_correlate_fast_q31(
}
/* Store the output in the destination buffer */
if (inv == 1)
*pDst-- = (q31_t)(sum << 1u);
*pDst-- = (q31_t)(sum << 1U);
else
*pDst++ = (q31_t)(sum << 1u);
*pDst++ = (q31_t)(sum << 1U);
}
}
@ -235,14 +235,14 @@ void ref_correlate_q15(
q15_t * pDst)
{
q15_t *pIn1 = pSrcA; /* inputA pointer */
q15_t *pIn2 = pSrcB + (srcBLen - 1u); /* inputB pointer */
q15_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */
q63_t sum; /* Accumulators */
uint32_t i = 0u, j; /* loop counters */
uint32_t inv = 0u; /* Reverse order flag */
uint32_t tot = 0u; /* Length */
uint32_t i = 0U, j; /* loop counters */
uint32_t inv = 0U; /* Reverse order flag */
uint32_t tot = 0U; /* Length */
/* Calculate the length of the remaining sequence */
tot = ((srcALen + srcBLen) - 2u);
tot = ((srcALen + srcBLen) - 2U);
if (srcALen > srcBLen)
{
@ -259,7 +259,7 @@ void ref_correlate_q15(
pIn1 = pSrcB;
/* Initialization to the end of inputA pointer */
pIn2 = pSrcA + (srcALen - 1u);
pIn2 = pSrcA + (srcALen - 1U);
/* Initialisation of the pointer after zero padding */
pDst = pDst + tot;
@ -275,13 +275,13 @@ void ref_correlate_q15(
}
/* Loop to calculate convolution for output length number of times */
for (i = 0u; i <= tot; i++)
for (i = 0U; i <= tot; i++)
{
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0u; j <= i; j++)
for (j = 0U; j <= i; j++)
{
/* Check the array limitations */
if ((((i - j) < srcBLen) && (j < srcALen)))
@ -292,9 +292,9 @@ void ref_correlate_q15(
}
/* Store the output in the destination buffer */
if (inv == 1)
*pDst-- = (q15_t) ref_sat_q15(sum >> 15u);
*pDst-- = (q15_t) ref_sat_q15(sum >> 15U);
else
*pDst++ = (q15_t) ref_sat_q15(sum >> 15u);
*pDst++ = (q15_t) ref_sat_q15(sum >> 15U);
}
}
@ -306,14 +306,14 @@ void ref_correlate_fast_q15(
q15_t * pDst)
{
q15_t *pIn1 = pSrcA; /* inputA pointer */
q15_t *pIn2 = pSrcB + (srcBLen - 1u); /* inputB pointer */
q15_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */
q63_t sum; /* Accumulators */
uint32_t i = 0u, j; /* loop counters */
uint32_t inv = 0u; /* Reverse order flag */
uint32_t tot = 0u; /* Length */
uint32_t i = 0U, j; /* loop counters */
uint32_t inv = 0U; /* Reverse order flag */
uint32_t tot = 0U; /* Length */
/* Calculate the length of the remaining sequence */
tot = ((srcALen + srcBLen) - 2u);
tot = ((srcALen + srcBLen) - 2U);
if (srcALen > srcBLen)
{
@ -330,7 +330,7 @@ void ref_correlate_fast_q15(
pIn1 = pSrcB;
/* Initialization to the end of inputA pointer */
pIn2 = pSrcA + (srcALen - 1u);
pIn2 = pSrcA + (srcALen - 1U);
/* Initialisation of the pointer after zero padding */
pDst = pDst + tot;
@ -346,13 +346,13 @@ void ref_correlate_fast_q15(
}
/* Loop to calculate convolution for output length number of times */
for (i = 0u; i <= tot; i++)
for (i = 0U; i <= tot; i++)
{
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0u; j <= i; j++)
for (j = 0U; j <= i; j++)
{
/* Check the array limitations */
if ((((i - j) < srcBLen) && (j < srcALen)))
@ -363,9 +363,9 @@ void ref_correlate_fast_q15(
}
/* Store the output in the destination buffer */
if (inv == 1)
*pDst-- = (q15_t)(sum >> 15u);
*pDst-- = (q15_t)(sum >> 15U);
else
*pDst++ = (q15_t)(sum >> 15u);
*pDst++ = (q15_t)(sum >> 15U);
}
}
@ -378,14 +378,14 @@ void ref_correlate_fast_opt_q15(
q15_t * pScratch)
{
q15_t *pIn1 = pSrcA; /* inputA pointer */
q15_t *pIn2 = pSrcB + (srcBLen - 1u); /* inputB pointer */
q15_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */
q31_t sum; /* Accumulators */
uint32_t i = 0u, j; /* loop counters */
uint32_t inv = 0u; /* Reverse order flag */
uint32_t tot = 0u; /* Length */
uint32_t i = 0U, j; /* loop counters */
uint32_t inv = 0U; /* Reverse order flag */
uint32_t tot = 0U; /* Length */
/* Calculate the length of the remaining sequence */
tot = ((srcALen + srcBLen) - 2u);
tot = ((srcALen + srcBLen) - 2U);
if (srcALen > srcBLen)
{
@ -402,7 +402,7 @@ void ref_correlate_fast_opt_q15(
pIn1 = pSrcB;
/* Initialization to the end of inputA pointer */
pIn2 = pSrcA + (srcALen - 1u);
pIn2 = pSrcA + (srcALen - 1U);
/* Initialisation of the pointer after zero padding */
pDst = pDst + tot;
@ -418,13 +418,13 @@ void ref_correlate_fast_opt_q15(
}
/* Loop to calculate convolution for output length number of times */
for (i = 0u; i <= tot; i++)
for (i = 0U; i <= tot; i++)
{
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0u; j <= i; j++)
for (j = 0U; j <= i; j++)
{
/* Check the array limitations */
if ((((i - j) < srcBLen) && (j < srcALen)))
@ -435,9 +435,9 @@ void ref_correlate_fast_opt_q15(
}
/* Store the output in the destination buffer */
if (inv == 1)
*pDst-- = (q15_t) ref_sat_q15(sum >> 15u);
*pDst-- = (q15_t) ref_sat_q15(sum >> 15U);
else
*pDst++ = (q15_t) ref_sat_q15(sum >> 15u);
*pDst++ = (q15_t) ref_sat_q15(sum >> 15U);
}
}
@ -449,14 +449,14 @@ void ref_correlate_q7(
q7_t * pDst)
{
q7_t *pIn1 = pSrcA; /* inputA pointer */
q7_t *pIn2 = pSrcB + (srcBLen - 1u); /* inputB pointer */
q7_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */
q31_t sum; /* Accumulator */
uint32_t i = 0u, j; /* loop counters */
uint32_t inv = 0u; /* Reverse order flag */
uint32_t tot = 0u; /* Length */
uint32_t i = 0U, j; /* loop counters */
uint32_t inv = 0U; /* Reverse order flag */
uint32_t tot = 0U; /* Length */
/* Calculate the length of the remaining sequence */
tot = ((srcALen + srcBLen) - 2u);
tot = ((srcALen + srcBLen) - 2U);
if (srcALen > srcBLen)
{
@ -473,7 +473,7 @@ void ref_correlate_q7(
pIn1 = pSrcB;
/* Initialization to the end of inputA pointer */
pIn2 = pSrcA + (srcALen - 1u);
pIn2 = pSrcA + (srcALen - 1U);
/* Initialisation of the pointer after zero padding */
pDst = pDst + tot;
@ -489,13 +489,13 @@ void ref_correlate_q7(
}
/* Loop to calculate convolution for output length number of times */
for (i = 0u; i <= tot; i++)
for (i = 0U; i <= tot; i++)
{
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0u; j <= i; j++)
for (j = 0U; j <= i; j++)
{
/* Check the array limitations */
if ((((i - j) < srcBLen) && (j < srcALen)))
@ -506,8 +506,8 @@ void ref_correlate_q7(
}
/* Store the output in the destination buffer */
if (inv == 1)
*pDst-- = (q7_t) __SSAT((sum >> 7u), 8u);
*pDst-- = (q7_t) __SSAT((sum >> 7U), 8U);
else
*pDst++ = (q7_t) __SSAT((sum >> 7u), 8u);
*pDst++ = (q7_t) __SSAT((sum >> 7U), 8U);
}
}

@ -15,9 +15,9 @@ void ref_fir_f32(
/* S->pState points to state array which contains previous frame (numTaps - 1) samples */
/* pStateCurnt points to the location where the new input data should be written */
pStateCurnt = &(S->pState[(numTaps - 1u)]);
pStateCurnt = &(S->pState[(numTaps - 1U)]);
while (blockSize > 0u)
while (blockSize > 0U)
{
/* Copy one sample at a time into state buffer */
*pStateCurnt++ = *pSrc++;
@ -69,9 +69,9 @@ void ref_fir_q31(
/* S->pState points to state array which contains previous frame (numTaps - 1) samples */
/* pStateCurnt points to the location where the new input data should be written */
pStateCurnt = &(S->pState[(numTaps - 1u)]);
pStateCurnt = &(S->pState[(numTaps - 1U)]);
while (blockSize > 0u)
while (blockSize > 0U)
{
/* Copy one sample at a time into state buffer */
*pStateCurnt++ = *pSrc++;
@ -123,9 +123,9 @@ void ref_fir_fast_q31(
/* S->pState points to state array which contains previous frame (numTaps - 1) samples */
/* pStateCurnt points to the location where the new input data should be written */
pStateCurnt = &(S->pState[(numTaps - 1u)]);
pStateCurnt = &(S->pState[(numTaps - 1U)]);
while (blockSize > 0u)
while (blockSize > 0U)
{
/* Copy one sample at a time into state buffer */
*pStateCurnt++ = *pSrc++;
@ -177,9 +177,9 @@ void ref_fir_q15(
/* S->pState points to state array which contains previous frame (numTaps - 1) samples */
/* pStateCurnt points to the location where the new input data should be written */
pStateCurnt = &(S->pState[(numTaps - 1u)]);
pStateCurnt = &(S->pState[(numTaps - 1U)]);
while (blockSize > 0u)
while (blockSize > 0U)
{
/* Copy one sample at a time into state buffer */
*pStateCurnt++ = *pSrc++;
@ -231,9 +231,9 @@ void ref_fir_fast_q15(
/* S->pState points to state array which contains previous frame (numTaps - 1) samples */
/* pStateCurnt points to the location where the new input data should be written */
pStateCurnt = &(S->pState[(numTaps - 1u)]);
pStateCurnt = &(S->pState[(numTaps - 1U)]);
while (blockSize > 0u)
while (blockSize > 0U)
{
/* Copy one sample at a time into state buffer */
*pStateCurnt++ = *pSrc++;
@ -285,9 +285,9 @@ void ref_fir_q7(
/* S->pState points to state array which contains previous frame (numTaps - 1) samples */
/* pStateCurnt points to the location where the new input data should be written */
pStateCurnt = &(S->pState[(numTaps - 1u)]);
pStateCurnt = &(S->pState[(numTaps - 1U)]);
while (blockSize > 0u)
while (blockSize > 0U)
{
/* Copy one sample at a time into state buffer */
*pStateCurnt++ = *pSrc++;

@ -16,12 +16,12 @@ void ref_fir_decimate_f32(
/* S->pState buffer contains previous frame (numTaps - 1) samples */
/* pStateCurnt points to the location where the new input data should be written */
pStateCurnt = S->pState + numTaps - 1u;
pStateCurnt = S->pState + numTaps - 1U;
/* Total number of output samples to be computed */
blkCnt = blockSize / S->M;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Copy decimation factor number of new input samples into the state buffer */
i = S->M;
@ -64,10 +64,10 @@ void ref_fir_decimate_f32(
pStateCurnt = S->pState;
/* Copy numTaps number of values */
i = numTaps - 1u;
i = numTaps - 1U;
/* copy data */
while (i > 0u)
while (i > 0U)
{
*pStateCurnt++ = *pState++;
@ -92,12 +92,12 @@ void ref_fir_decimate_q31(
/* S->pState buffer contains previous frame (numTaps - 1) samples */
/* pStateCurnt points to the location where the new input data should be written */
pStateCurnt = S->pState + numTaps - 1u;
pStateCurnt = S->pState + numTaps - 1U;
/* Total number of output samples to be computed */
blkCnt = blockSize / S->M;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Copy decimation factor number of new input samples into the state buffer */
i = S->M;
@ -141,10 +141,10 @@ void ref_fir_decimate_q31(
/* Points to the start of the state buffer */
pStateCurnt = S->pState;
i = numTaps - 1u;
i = numTaps - 1U;
/* copy data */
while (i > 0u)
while (i > 0U)
{
*pStateCurnt++ = *pState++;
@ -169,12 +169,12 @@ void ref_fir_decimate_fast_q31(
/* S->pState buffer contains previous frame (numTaps - 1) samples */
/* pStateCurnt points to the location where the new input data should be written */
pStateCurnt = S->pState + numTaps - 1u;
pStateCurnt = S->pState + numTaps - 1U;
/* Total number of output samples to be computed */
blkCnt = blockSize / S->M;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Copy decimation factor number of new input samples into the state buffer */
i = S->M;
@ -218,10 +218,10 @@ void ref_fir_decimate_fast_q31(
/* Points to the start of the state buffer */
pStateCurnt = S->pState;
i = numTaps - 1u;
i = numTaps - 1U;
/* copy data */
while (i > 0u)
while (i > 0U)
{
*pStateCurnt++ = *pState++;
@ -246,12 +246,12 @@ void ref_fir_decimate_q15(
/* S->pState buffer contains previous frame (numTaps - 1) samples */
/* pStateCurnt points to the location where the new input data should be written */
pStateCurnt = S->pState + numTaps - 1u;
pStateCurnt = S->pState + numTaps - 1U;
/* Total number of output samples to be computed */
blkCnt = blockSize / S->M;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Copy decimation factor number of new input samples into the state buffer */
i = S->M;
@ -295,10 +295,10 @@ void ref_fir_decimate_q15(
/* Points to the start of the state buffer */
pStateCurnt = S->pState;
i = numTaps - 1u;
i = numTaps - 1U;
/* copy data */
while (i > 0u)
while (i > 0U)
{
*pStateCurnt++ = *pState++;
@ -323,12 +323,12 @@ void ref_fir_decimate_fast_q15(
/* S->pState buffer contains previous frame (numTaps - 1) samples */
/* pStateCurnt points to the location where the new input data should be written */
pStateCurnt = S->pState + numTaps - 1u;
pStateCurnt = S->pState + numTaps - 1U;
/* Total number of output samples to be computed */
blkCnt = blockSize / S->M;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Copy decimation factor number of new input samples into the state buffer */
i = S->M;
@ -372,10 +372,10 @@ void ref_fir_decimate_fast_q15(
/* Points to the start of the state buffer */
pStateCurnt = S->pState;
i = numTaps - 1u;
i = numTaps - 1U;
/* copy data */
while (i > 0u)
while (i > 0U)
{
*pStateCurnt++ = *pState++;

@ -23,7 +23,7 @@ void ref_fir_interpolate_f32(
blkCnt = blockSize;
/* Loop over the blockSize. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Copy new input sample into the state buffer */
*pStateCurnt++ = *pSrc++;
@ -31,7 +31,7 @@ void ref_fir_interpolate_f32(
/* Loop over the Interpolation factor. */
i = S->L;
while (i > 0u)
while (i > 0U)
{
/* Set accumulator to zero */
sum = 0.0f;
@ -45,7 +45,7 @@ void ref_fir_interpolate_f32(
/* Loop over the polyPhase length */
tapCnt = phaseLen;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Perform the multiply-accumulate */
sum += *ptr1++ * *ptr2;
@ -79,9 +79,9 @@ void ref_fir_interpolate_f32(
/* Points to the start of the state buffer */
pStateCurnt = S->pState;
tapCnt = phaseLen - 1u;
tapCnt = phaseLen - 1U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
*pStateCurnt++ = *pState++;
@ -118,7 +118,7 @@ void ref_fir_interpolate_q31(
blkCnt = blockSize;
/* Loop over the blockSize. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Copy new input sample into the state buffer */
*pStateCurnt++ = *pSrc++;
@ -126,7 +126,7 @@ void ref_fir_interpolate_q31(
/* Loop over the Interpolation factor. */
i = S->L;
while (i > 0u)
while (i > 0U)
{
/* Set accumulator to zero */
sum = 0;
@ -139,7 +139,7 @@ void ref_fir_interpolate_q31(
tapCnt = phaseLen;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Read the coefficient */
c0 = *(ptr2);
@ -179,10 +179,10 @@ void ref_fir_interpolate_q31(
/* Points to the start of the state buffer */
pStateCurnt = S->pState;
tapCnt = phaseLen - 1u;
tapCnt = phaseLen - 1U;
/* copy data */
while (tapCnt > 0u)
while (tapCnt > 0U)
{
*pStateCurnt++ = *pState++;
@ -216,7 +216,7 @@ void ref_fir_interpolate_q15(
blkCnt = blockSize;
/* Loop over the blockSize. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Copy new input sample into the state buffer */
*pStateCurnt++ = *pSrc++;
@ -224,7 +224,7 @@ void ref_fir_interpolate_q15(
/* Loop over the Interpolation factor. */
i = S->L;
while (i > 0u)
while (i > 0U)
{
/* Set accumulator to zero */
sum = 0;
@ -238,7 +238,7 @@ void ref_fir_interpolate_q15(
/* Loop over the polyPhase length */
tapCnt = (uint32_t)phaseLen;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Read the coefficient */
c0 = *ptr2;
@ -278,9 +278,9 @@ void ref_fir_interpolate_q15(
/* Points to the start of the state buffer */
pStateCurnt = S->pState;
i = (uint32_t) phaseLen - 1u;
i = (uint32_t) phaseLen - 1U;
while (i > 0u)
while (i > 0U)
{
*pStateCurnt++ = *pState++;

@ -18,7 +18,7 @@ void ref_fir_lattice_f32(
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* f0(n) = x(n) */
fcurr = *pSrc++;
@ -45,10 +45,10 @@ void ref_fir_lattice_f32(
for next stage processing */
fcurr = fnext;
stageCnt = (numStages - 1u);
stageCnt = (numStages - 1U);
/* stage loop */
while (stageCnt > 0u)
while (stageCnt > 0U)
{
/* read g2(n) from state buffer */
gcurr = *px;
@ -94,7 +94,7 @@ void ref_fir_lattice_q31(
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* f0(n) = x(n) */
fcurr = *pSrc++;
@ -120,10 +120,10 @@ void ref_fir_lattice_q31(
for next stage processing */
fcurr = fnext;
stageCnt = (numStages - 1u);
stageCnt = (numStages - 1U);
/* stage loop */
while (stageCnt > 0u)
while (stageCnt > 0U)
{
/* read g2(n) from state buffer */
gcurr = *px;
@ -171,7 +171,7 @@ void ref_fir_lattice_q15(
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* f0(n) = x(n) */
fcurnt = *pSrc++;
@ -187,12 +187,12 @@ void ref_fir_lattice_q15(
/* for sample 1 processing */
/* f1(n) = f0(n) + K1 * g0(n-1) */
fnext = ((gcurnt * (*pk)) >> 15u) + fcurnt;
fnext = ((gcurnt * (*pk)) >> 15U) + fcurnt;
fnext = ref_sat_q15(fnext);
/* g1(n) = f0(n) * K1 + g0(n-1) */
gnext = ((fcurnt * (*pk++)) >> 15u) + gcurnt;
gnext = ((fcurnt * (*pk++)) >> 15U) + gcurnt;
gnext = ref_sat_q15(gnext);
/* save f0(n) in state buffer */
@ -202,10 +202,10 @@ void ref_fir_lattice_q15(
for next stage processing */
fcurnt = fnext;
stageCnt = (numStages - 1u);
stageCnt = (numStages - 1U);
/* stage loop */
while (stageCnt > 0u)
while (stageCnt > 0U)
{
/* read g1(n-1) from state buffer */
gcurnt = *px;
@ -215,11 +215,11 @@ void ref_fir_lattice_q15(
/* Sample processing for K2, K3.... */
/* f2(n) = f1(n) + K2 * g1(n-1) */
fnext = ((gcurnt * (*pk)) >> 15u) + fcurnt;
fnext = ((gcurnt * (*pk)) >> 15U) + fcurnt;
fnext = ref_sat_q15(fnext);
/* g2(n) = f1(n) * K2 + g1(n-1) */
gnext = ((fcurnt * (*pk++)) >> 15u) + gcurnt;
gnext = ((fcurnt * (*pk++)) >> 15U) + gcurnt;
gnext = ref_sat_q15(gnext);

@ -52,7 +52,7 @@ void ref_fir_sparse_f32(
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Perform Multiplications and store in destination buffer */
*pOut++ = *px++ * coeff;
@ -62,9 +62,9 @@ void ref_fir_sparse_f32(
}
/* Loop over the number of taps. */
tapCnt = (uint32_t) numTaps - 1u;
tapCnt = (uint32_t) numTaps - 1U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Load the coefficient value and
* increment the coefficient buffer for the next set of state values */
@ -95,7 +95,7 @@ void ref_fir_sparse_f32(
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Perform Multiply-Accumulate */
*pOut++ += *px++ * coeff;
@ -162,7 +162,7 @@ void ref_fir_sparse_q31(
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Perform Multiplications and store in the destination buffer */
*pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32);
@ -172,9 +172,9 @@ void ref_fir_sparse_q31(
}
/* Loop over the number of taps. */
tapCnt = (uint32_t) numTaps - 1u;
tapCnt = (uint32_t) numTaps - 1U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Load the coefficient value and
* increment the coefficient buffer for the next set of state values */
@ -205,7 +205,7 @@ void ref_fir_sparse_q31(
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Perform Multiply-Accumulate */
out = *pOut;
@ -226,7 +226,7 @@ void ref_fir_sparse_q31(
/* Output is converted into 1.31 format. */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
in = *pOut << 1;
*pOut++ = in;
@ -290,7 +290,7 @@ void ref_fir_sparse_q15(
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Perform multiplication and store in the scratch buffer */
*pScratchOut++ = ((q31_t) * px++ * coeff);
@ -300,9 +300,9 @@ void ref_fir_sparse_q15(
}
/* Loop over the number of taps. */
tapCnt = (uint32_t) numTaps - 1u;
tapCnt = (uint32_t) numTaps - 1U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Load the coefficient value and
* increment the coefficient buffer for the next set of state values */
@ -332,7 +332,7 @@ void ref_fir_sparse_q15(
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Perform Multiply-Accumulate */
*pScratchOut++ += (q31_t) * px++ * coeff;
@ -350,7 +350,7 @@ void ref_fir_sparse_q15(
/* Loop over the blockSize. */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
*pOut++ = (q15_t) __SSAT(*pScr2++ >> 15, 16);
blkCnt--;
@ -413,7 +413,7 @@ void ref_fir_sparse_q7(
/* Loop over the blockSize */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Perform multiplication and store in the scratch buffer */
*pScratchOut++ = ((q31_t) * px++ * coeff);
@ -423,9 +423,9 @@ void ref_fir_sparse_q7(
}
/* Loop over the number of taps. */
tapCnt = (uint32_t) numTaps - 1u;
tapCnt = (uint32_t) numTaps - 1U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Load the coefficient value and
* increment the coefficient buffer for the next set of state values */
@ -456,7 +456,7 @@ void ref_fir_sparse_q7(
/* Loop over the blockSize */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Perform Multiply-Accumulate */
in = *pScratchOut + ((q31_t) * px++ * coeff);
@ -475,7 +475,7 @@ void ref_fir_sparse_q7(
/* Loop over the blockSize. */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
*pOut++ = (q7_t) __SSAT(*pScr2++ >> 7, 8);

@ -18,7 +18,7 @@ void ref_iir_lattice_f32(
pState = &S->pState[0];
/* Sample processing */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Read Sample from input buffer */
/* fN(n) = x(n) */
@ -38,7 +38,7 @@ void ref_iir_lattice_f32(
/* Process sample for numStages */
tapCnt = numStages;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
gcurr = *px1++;
/* Process sample for last taps */
@ -63,7 +63,7 @@ void ref_iir_lattice_f32(
*pDst++ = acc;
/* Advance the state pointer by 1 to process the next group of samples */
pState = pState + 1u;
pState = pState + 1U;
blkCnt--;
}
@ -77,7 +77,7 @@ void ref_iir_lattice_f32(
tapCnt = numStages;
/* Copy the data */
while (tapCnt > 0u)
while (tapCnt > 0U)
{
*pStateCurnt++ = *pState++;
@ -104,7 +104,7 @@ void ref_iir_lattice_q31(
pState = &S->pState[0];
/* Sample processing */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Read Sample from input buffer */
/* fN(n) = x(n) */
@ -123,7 +123,7 @@ void ref_iir_lattice_q31(
tapCnt = numStages;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
gcurr = *px1++;
/* Process sample */
@ -152,10 +152,10 @@ void ref_iir_lattice_q31(
*px2++ = fnext;
/* write out into pDst */
*pDst++ = (q31_t) (acc >> 31u);
*pDst++ = (q31_t) (acc >> 31U);
/* Advance the state pointer by 1 to process the next group of samples */
pState = pState + 1u;
pState = pState + 1U;
blkCnt--;
}
@ -169,7 +169,7 @@ void ref_iir_lattice_q31(
tapCnt = numStages;
/* Copy the remaining q31_t data */
while (tapCnt > 0u)
while (tapCnt > 0U)
{
*pStateCurnt++ = *pState++;
@ -198,7 +198,7 @@ void ref_iir_lattice_q15(
pState = &S->pState[0];
/* Sample processing */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Read Sample from input buffer */
/* fN(n) = x(n) */
@ -217,7 +217,7 @@ void ref_iir_lattice_q15(
tapCnt = numStages;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
gcurr = *px1++;
/* Process sample */
@ -248,7 +248,7 @@ void ref_iir_lattice_q15(
*pDst++ = out;
/* Advance the state pointer by 1 to process the next group of samples */
pState = pState + 1u;
pState = pState + 1U;
blkCnt--;
}
@ -261,7 +261,7 @@ void ref_iir_lattice_q15(
stgCnt = numStages;
/* copy data */
while (stgCnt > 0u)
while (stgCnt > 0U)
{
*pStateCurnt++ = *pState++;

@ -22,11 +22,11 @@ void ref_lms_f32(
/* S->pState points to state array which contains previous frame (numTaps - 1) samples */
/* pStateCurnt points to the location where the new input data should be written */
pStateCurnt = &(S->pState[numTaps - 1u]);
pStateCurnt = &(S->pState[numTaps - 1U]);
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Copy the new input sample into the state buffer */
*pStateCurnt++ = *pSrc++;
@ -99,9 +99,9 @@ void ref_lms_norm_f32(
/* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
/* pStateCurnt points to the location where the new input data should be written */
pStateCurnt = &(S->pState[numTaps - 1u]);
pStateCurnt = &(S->pState[numTaps - 1U]);
for(blkCnt = blockSize; blkCnt > 0u; blkCnt--)
for(blkCnt = blockSize; blkCnt > 0U; blkCnt--)
{
/* Copy the new input sample into the state buffer */
*pStateCurnt++ = *pSrc;
@ -179,13 +179,13 @@ void ref_lms_q31(
q31_t coef; /* Temporary variable for coef */
q31_t acc_l, acc_h; /* temporary input */
uint32_t uShift = (uint32_t)S->postShift + 1;
uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */
uint32_t lShift = 32U - uShift; /* Shift to be applied to the output */
/* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
/* pStateCurnt points to the location where the new input data should be written */
pStateCurnt = &(S->pState[(numTaps - 1u)]);
pStateCurnt = &(S->pState[(numTaps - 1U)]);
for(blkCnt = blockSize; blkCnt > 0u; blkCnt--)
for(blkCnt = blockSize; blkCnt > 0U; blkCnt--)
{
/* Copy the new input sample into the state buffer */
*pStateCurnt++ = *pSrc++;
@ -202,7 +202,7 @@ void ref_lms_q31(
/* Loop over numTaps number of values */
tapCnt = numTaps;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Perform the multiply-accumulate */
acc += (q63_t)(*px++) * (*pb++);
@ -241,7 +241,7 @@ void ref_lms_q31(
/* Loop over numTaps number of values */
tapCnt = numTaps;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Perform the multiply-accumulate */
coef = (q31_t)(((q63_t) alpha * (*px++)) >> 32);
@ -260,11 +260,11 @@ void ref_lms_q31(
/* Points to the start of the pState buffer */
pStateCurnt = S->pState;
/* Copy (numTaps - 1u) samples */
/* Copy (numTaps - 1U) samples */
tapCnt = numTaps - 1;
/* Copy the data */
while (tapCnt > 0u)
while (tapCnt > 0U)
{
*pStateCurnt++ = *pState++;
@ -296,17 +296,17 @@ void ref_lms_norm_q31(
q63_t errorXmu; /* Temporary variables to store error and mu product and reciprocal of energy */
q31_t coef; /* Temporary variable for coef */
q31_t acc_l, acc_h; /* temporary input */
uint32_t uShift = ((uint32_t) S->postShift + 1u);
uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */
uint32_t uShift = ((uint32_t) S->postShift + 1U);
uint32_t lShift = 32U - uShift; /* Shift to be applied to the output */
energy = S->energy;
x0 = S->x0;
/* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
/* pStateCurnt points to the location where the new input data should be written */
pStateCurnt = &(S->pState[(numTaps - 1u)]);
pStateCurnt = &(S->pState[(numTaps - 1U)]);
for(blkCnt = blockSize; blkCnt > 0u; blkCnt--)
for(blkCnt = blockSize; blkCnt > 0U; blkCnt--)
{
/* Copy the new input sample into the state buffer */
@ -331,7 +331,7 @@ void ref_lms_norm_q31(
/* Loop over numTaps number of values */
tapCnt = numTaps;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Perform the multiply-accumulate */
acc += ((q63_t) (*px++)) * (*pb++);
@ -372,13 +372,13 @@ void ref_lms_norm_q31(
/* Loop over numTaps number of values */
tapCnt = numTaps;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Perform the multiply-accumulate */
/* coef is in 2.30 format */
coef = (q31_t)(((q63_t)w * (*px++)) >> 32);
/* get coef in 1.31 format by left shifting */
*pb = ref_sat_q31((q63_t)*pb + (coef << 1u));
*pb = ref_sat_q31((q63_t)*pb + (coef << 1U));
/* update coefficient buffer to next coefficient */
pb++;
@ -404,11 +404,11 @@ void ref_lms_norm_q31(
/* Points to the start of the pState buffer */
pStateCurnt = S->pState;
/* Loop for (numTaps - 1u) samples copy */
/* Loop for (numTaps - 1U) samples copy */
tapCnt = numTaps - 1;
/* Copy the remaining q31_t data */
while (tapCnt > 0u)
while (tapCnt > 0U)
{
*pStateCurnt++ = *pState++;
@ -443,9 +443,9 @@ void ref_lms_q15(
/* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
/* pStateCurnt points to the location where the new input data should be written */
pStateCurnt = &(S->pState[(numTaps - 1u)]);
pStateCurnt = &(S->pState[(numTaps - 1U)]);
for(blkCnt = blockSize; blkCnt > 0u; blkCnt--)
for(blkCnt = blockSize; blkCnt > 0U; blkCnt--)
{
/* Copy the new input sample into the state buffer */
*pStateCurnt++ = *pSrc++;
@ -462,7 +462,7 @@ void ref_lms_q15(
/* Loop over numTaps number of values */
tapCnt = numTaps;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Perform the multiply-accumulate */
acc += (q63_t)((q31_t)(*px++) * (*pb++));
@ -504,7 +504,7 @@ void ref_lms_q15(
/* Loop over numTaps number of values */
tapCnt = numTaps;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Perform the multiply-accumulate */
coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
@ -522,11 +522,11 @@ void ref_lms_q15(
/* Points to the start of the pState buffer */
pStateCurnt = S->pState;
/* Copy (numTaps - 1u) samples */
/* Copy (numTaps - 1U) samples */
tapCnt = numTaps - 1;
/* Copy the data */
while (tapCnt > 0u)
while (tapCnt > 0U)
{
*pStateCurnt++ = *pState++;
@ -568,9 +568,9 @@ void ref_lms_norm_q15(
/* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
/* pStateCurnt points to the location where the new input data should be written */
pStateCurnt = &(S->pState[(numTaps - 1u)]);
pStateCurnt = &(S->pState[(numTaps - 1U)]);
for(blkCnt = blockSize; blkCnt > 0u; blkCnt--)
for(blkCnt = blockSize; blkCnt > 0U; blkCnt--)
{
/* Copy the new input sample into the state buffer */
*pStateCurnt++ = *pSrc;
@ -594,7 +594,7 @@ void ref_lms_norm_q15(
/* Loop over numTaps number of values */
tapCnt = numTaps;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Perform the multiply-accumulate */
acc += (q31_t)*px++ * (*pb++);
@ -653,7 +653,7 @@ void ref_lms_norm_q15(
/* Loop over numTaps number of values */
tapCnt = numTaps;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Perform the multiply-accumulate */
coef = *pb + (((q31_t)w * (*px++)) >> 15);
@ -667,7 +667,7 @@ void ref_lms_norm_q15(
x0 = *pState;
/* Advance state pointer by 1 for the next sample */
pState = pState + 1u;
pState = pState + 1U;
}
/* Save energy and x0 values for the next frame */
@ -681,11 +681,11 @@ void ref_lms_norm_q15(
/* Points to the start of the pState buffer */
pStateCurnt = S->pState;
/* copy (numTaps - 1u) data */
/* copy (numTaps - 1U) data */
tapCnt = numTaps - 1;
/* copy data */
while (tapCnt > 0u)
while (tapCnt > 0U)
{
*pStateCurnt++ = *pState++;

@ -590,8 +590,8 @@ extern "C"
while ((data & mask) == 0)
{
count += 1u;
mask = mask >> 1u;
count += 1U;
mask = mask >> 1U;
}
return (count);
@ -633,7 +633,7 @@ extern "C"
/* calculation of reciprocal value */
/* running approximation for two iterations */
for (i = 0u; i < 2u; i++)
for (i = 0U; i < 2U; i++)
{
tempVal = (uint32_t) (((q63_t) in * out) >> 31);
tempVal = 0x7FFFFFFFu - tempVal;
@ -646,7 +646,7 @@ extern "C"
*dst = out;
/* return num of signbits of out = 1/in value */
return (signBits + 1u);
return (signBits + 1U);
}
@ -684,7 +684,7 @@ extern "C"
/* calculation of reciprocal value */
/* running approximation for two iterations */
for (i = 0u; i < 2u; i++)
for (i = 0U; i < 2U; i++)
{
tempVal = (uint32_t) (((q31_t) in * out) >> 15);
tempVal = 0x7FFFu - tempVal;
@ -4924,7 +4924,7 @@ void arm_rfft_fast_f32(
acc += (q63_t) S->A2 * S->state[1];
/* convert output to 1.31 format to add y[n-1] */
out = (q31_t) (acc >> 31u);
out = (q31_t) (acc >> 31U);
/* out += y[n-1] */
out += S->state[2];
@ -5608,7 +5608,7 @@ void arm_rfft_fast_f32(
y += ((q31_t) (((q63_t) y1 * fract) >> 32));
/* Convert y to 1.31 format */
return (y << 1u);
return (y << 1U);
}
}
@ -5892,7 +5892,7 @@ void arm_rfft_fast_f32(
int32_t srcInc,
uint32_t blockSize)
{
uint32_t i = 0u;
uint32_t i = 0U;
int32_t wOffset;
/* Copy the value of Index pointer that points
@ -5902,7 +5902,7 @@ void arm_rfft_fast_f32(
/* Loop over the blockSize */
i = blockSize;
while (i > 0u)
while (i > 0U)
{
/* copy the input sample to the circular buffer */
circBuffer[wOffset] = *src;
@ -5939,7 +5939,7 @@ void arm_rfft_fast_f32(
int32_t dstInc,
uint32_t blockSize)
{
uint32_t i = 0u;
uint32_t i = 0U;
int32_t rOffset, dst_end;
/* Copy the value of Index pointer that points
@ -5950,7 +5950,7 @@ void arm_rfft_fast_f32(
/* Loop over the blockSize */
i = blockSize;
while (i > 0u)
while (i > 0U)
{
/* copy the sample from the circular buffer to the destination buffer */
*dst = circBuffer[rOffset];
@ -5992,7 +5992,7 @@ void arm_rfft_fast_f32(
int32_t srcInc,
uint32_t blockSize)
{
uint32_t i = 0u;
uint32_t i = 0U;
int32_t wOffset;
/* Copy the value of Index pointer that points
@ -6002,7 +6002,7 @@ void arm_rfft_fast_f32(
/* Loop over the blockSize */
i = blockSize;
while (i > 0u)
while (i > 0U)
{
/* copy the input sample to the circular buffer */
circBuffer[wOffset] = *src;
@ -6050,7 +6050,7 @@ void arm_rfft_fast_f32(
/* Loop over the blockSize */
i = blockSize;
while (i > 0u)
while (i > 0U)
{
/* copy the sample from the circular buffer to the destination buffer */
*dst = circBuffer[rOffset];
@ -6092,7 +6092,7 @@ void arm_rfft_fast_f32(
int32_t srcInc,
uint32_t blockSize)
{
uint32_t i = 0u;
uint32_t i = 0U;
int32_t wOffset;
/* Copy the value of Index pointer that points
@ -6102,7 +6102,7 @@ void arm_rfft_fast_f32(
/* Loop over the blockSize */
i = blockSize;
while (i > 0u)
while (i > 0U)
{
/* copy the input sample to the circular buffer */
circBuffer[wOffset] = *src;
@ -6150,7 +6150,7 @@ void arm_rfft_fast_f32(
/* Loop over the blockSize */
i = blockSize;
while (i > 0u)
while (i > 0U)
{
/* copy the sample from the circular buffer to the destination buffer */
*dst = circBuffer[rOffset];
@ -6928,7 +6928,7 @@ void arm_rfft_fast_f32(
/* 20 bits for the fractional part */
/* shift left xfract by 11 to keep 1.31 format */
xfract = (X & 0x000FFFFF) << 11u;
xfract = (X & 0x000FFFFF) << 11U;
/* Read two nearest output values from the index */
x1 = pYData[(rI) + (int32_t)nCols * (cI) ];
@ -6936,7 +6936,7 @@ void arm_rfft_fast_f32(
/* 20 bits for the fractional part */
/* shift left yfract by 11 to keep 1.31 format */
yfract = (Y & 0x000FFFFF) << 11u;
yfract = (Y & 0x000FFFFF) << 11U;
/* Read two nearest output values from the index */
y1 = pYData[(rI) + (int32_t)nCols * (cI + 1) ];
@ -7020,19 +7020,19 @@ void arm_rfft_fast_f32(
/* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */
/* convert 13.35 to 13.31 by right shifting and out is in 1.31 */
out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4u);
out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4U);
acc = ((q63_t) out * (0xFFFFF - yfract));
/* x2 * (xfract) * (1-yfract) in 1.51 and adding to acc */
out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4u);
out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4U);
acc += ((q63_t) out * (xfract));
/* y1 * (1 - xfract) * (yfract) in 1.51 and adding to acc */
out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4u);
out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4U);
acc += ((q63_t) out * (yfract));
/* y2 * (xfract) * (yfract) in 1.51 and adding to acc */
out = (q31_t) (((q63_t) y2 * (xfract)) >> 4u);
out = (q31_t) (((q63_t) y2 * (xfract)) >> 4U);
acc += ((q63_t) out * (yfract));
/* acc is in 13.51 format and down shift acc by 36 times */

@ -73,11 +73,11 @@ void arm_abs_f32(
float32_t in1, in2, in3, in4; /* temporary variables */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute and then store the results in the destination buffer. */
@ -115,10 +115,10 @@ void arm_abs_f32(
/* Update source pointer to process next sampels */
pSrc += 4u;
pSrc += 4U;
/* Update destination pointer to process next sampels */
pDst += 4u;
pDst += 4U;
/* Decrement the loop counter */
blkCnt--;
@ -126,7 +126,7 @@ void arm_abs_f32(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
@ -137,7 +137,7 @@ void arm_abs_f32(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute and then store the results in the destination buffer. */

@ -67,12 +67,12 @@ void arm_abs_q15(
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
simd = __SIMD32_CONST(pDst);
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = |A| */
/* Read two inputs */
@ -121,9 +121,9 @@ void arm_abs_q15(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = |A| */
/* Read the input */
@ -145,7 +145,7 @@ void arm_abs_q15(
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = |A| */
/* Read the input */

@ -65,11 +65,11 @@ void arm_abs_q31(
q31_t in1, in2, in3, in4;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute of input (if -1 then saturated to 0x7fffffff) and then store the results in the destination buffer. */
@ -89,7 +89,7 @@ void arm_abs_q31(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
@ -100,7 +100,7 @@ void arm_abs_q31(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute value of the input (if -1 then saturated to 0x7fffffff) and then store the results in the destination buffer. */

@ -69,11 +69,11 @@ void arm_abs_q7(
q31_t out1, out2, out3, out4; /* temporary output variables */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = |A| */
/* Read inputs */
@ -109,8 +109,8 @@ void arm_abs_q7(
*(pDst + 3) = (q7_t) out4;
/* update pointers to process next samples */
pSrc += 4u;
pDst += 4u;
pSrc += 4U;
pDst += 4U;
/* Decrement the loop counter */
blkCnt--;
@ -118,7 +118,7 @@ void arm_abs_q7(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
/* Run the below code for Cortex-M0 */
@ -126,7 +126,7 @@ void arm_abs_q7(
#endif /* #define ARM_MATH_CM0_FAMILY */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = |A| */
/* Read the input */

@ -73,11 +73,11 @@ void arm_add_f32(
float32_t inB1, inB2, inB3, inB4; /* temporary input variables */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
@ -100,9 +100,9 @@ void arm_add_f32(
*(pDst + 3) = inA4 + inB4;
/* update pointers to process next samples */
pSrcA += 4u;
pSrcB += 4u;
pDst += 4u;
pSrcA += 4U;
pSrcB += 4U;
pDst += 4U;
/* Decrement the loop counter */
@ -111,7 +111,7 @@ void arm_add_f32(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
@ -122,7 +122,7 @@ void arm_add_f32(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */

@ -65,11 +65,11 @@ void arm_add_q15(
q31_t inA1, inA2, inB1, inB2;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
@ -87,9 +87,9 @@ void arm_add_q15(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
@ -108,7 +108,7 @@ void arm_add_q15(
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */

@ -67,11 +67,11 @@ void arm_add_q31(
q31_t inB1, inB2, inB3, inB4;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
@ -96,9 +96,9 @@ void arm_add_q31(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
@ -117,7 +117,7 @@ void arm_add_q31(
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */

@ -65,11 +65,11 @@ void arm_add_q7(
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
@ -81,9 +81,9 @@ void arm_add_q7(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
@ -102,7 +102,7 @@ void arm_add_q7(
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */

@ -74,11 +74,11 @@ void arm_dot_prod_f32(
/* Run the below code for Cortex-M4 and Cortex-M3 */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and then store the result in a temporary buffer */
@ -93,7 +93,7 @@ void arm_dot_prod_f32(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
@ -105,7 +105,7 @@ void arm_dot_prod_f32(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and then store the result in a temporary buffer. */

@ -69,11 +69,11 @@ void arm_dot_prod_q15(
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and then store the result in a temporary buffer. */
@ -86,9 +86,9 @@ void arm_dot_prod_q15(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and then store the results in a temporary buffer. */
@ -106,7 +106,7 @@ void arm_dot_prod_q15(
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and then store the results in a temporary buffer. */

@ -72,11 +72,11 @@ void arm_dot_prod_q31(
q31_t inB1, inB2, inB3, inB4;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and then store the result in a temporary buffer. */
@ -89,10 +89,10 @@ void arm_dot_prod_q31(
inB3 = *pSrcB++;
inB4 = *pSrcB++;
sum += ((q63_t) inA1 * inB1) >> 14u;
sum += ((q63_t) inA2 * inB2) >> 14u;
sum += ((q63_t) inA3 * inB3) >> 14u;
sum += ((q63_t) inA4 * inB4) >> 14u;
sum += ((q63_t) inA1 * inB1) >> 14U;
sum += ((q63_t) inA2 * inB2) >> 14U;
sum += ((q63_t) inA3 * inB3) >> 14U;
sum += ((q63_t) inA4 * inB4) >> 14U;
/* Decrement the loop counter */
blkCnt--;
@ -100,7 +100,7 @@ void arm_dot_prod_q31(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
@ -112,11 +112,11 @@ void arm_dot_prod_q31(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Calculate dot product and then store the result in a temporary buffer. */
sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u;
sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14U;
/* Decrement the loop counter */
blkCnt--;

@ -74,11 +74,11 @@ void arm_dot_prod_q7(
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* read 4 samples at a time from sourceA */
input1 = *__SIMD32(pSrcA)++;
@ -104,9 +104,9 @@ void arm_dot_prod_q7(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Dot product and then store the results in a temporary buffer. */
@ -125,7 +125,7 @@ void arm_dot_prod_q7(
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
/* Dot product and then store the results in a temporary buffer. */

@ -73,11 +73,11 @@ void arm_mult_f32(
float32_t out1, out2, out3, out4; /* temporary output variables */
/* loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply the inputs and store the results in output buffer */
@ -125,9 +125,9 @@ void arm_mult_f32(
/* update pointers to process next samples */
pSrcA += 4u;
pSrcB += 4u;
pDst += 4u;
pSrcA += 4U;
pSrcB += 4U;
pDst += 4U;
/* Decrement the blockSize loop counter */
blkCnt--;
@ -135,7 +135,7 @@ void arm_mult_f32(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
@ -146,7 +146,7 @@ void arm_mult_f32(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply the inputs and store the results in output buffer */

@ -68,11 +68,11 @@ void arm_mult_q15(
q31_t mul1, mul2, mul3, mul4; /* temporary variables */
/* loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* read two samples at a time from sourceA */
inA1 = *__SIMD32(pSrcA)++;
@ -114,7 +114,7 @@ void arm_mult_q15(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
@ -126,7 +126,7 @@ void arm_mult_q15(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply the inputs and store the result in the destination buffer */

@ -67,11 +67,11 @@ void arm_mult_q31(
q31_t out1, out2, out3, out4; /* temporary output variables */
/* loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply the inputs and then store the results in the destination buffer. */
@ -94,10 +94,10 @@ void arm_mult_q31(
out3 = __SSAT(out3, 31);
out4 = __SSAT(out4, 31);
*pDst++ = out1 << 1u;
*pDst++ = out2 << 1u;
*pDst++ = out3 << 1u;
*pDst++ = out4 << 1u;
*pDst++ = out1 << 1U;
*pDst++ = out2 << 1U;
*pDst++ = out3 << 1U;
*pDst++ = out4 << 1U;
/* Decrement the blockSize loop counter */
blkCnt--;
@ -105,9 +105,9 @@ void arm_mult_q31(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply the inputs and then store the results in the destination buffer. */
@ -115,7 +115,7 @@ void arm_mult_q31(
inB1 = *pSrcB++;
out1 = ((q63_t) inA1 * inB1) >> 32;
out1 = __SSAT(out1, 31);
*pDst++ = out1 << 1u;
*pDst++ = out1 << 1U;
/* Decrement the blockSize loop counter */
blkCnt--;
@ -129,7 +129,7 @@ void arm_mult_q31(
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply the inputs and then store the results in the destination buffer. */

@ -65,11 +65,11 @@ void arm_mult_q7(
q7_t out1, out2, out3, out4; /* Temporary variables to store the product */
/* loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply the inputs and store the results in temporary variables */
@ -87,7 +87,7 @@ void arm_mult_q7(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
@ -99,7 +99,7 @@ void arm_mult_q7(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A * B */
/* Multiply the inputs and store the result in the destination buffer */

@ -73,11 +73,11 @@ void arm_negate_f32(
float32_t in1, in2, in3, in4; /* temporary variables */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* read inputs from source */
in1 = *pSrc;
@ -98,8 +98,8 @@ void arm_negate_f32(
*(pDst + 3) = in4;
/* update pointers to process next samples */
pSrc += 4u;
pDst += 4u;
pSrc += 4U;
pDst += 4U;
/* Decrement the loop counter */
blkCnt--;
@ -107,7 +107,7 @@ void arm_negate_f32(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
@ -118,7 +118,7 @@ void arm_negate_f32(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and then store the results in the destination buffer. */

@ -70,11 +70,11 @@ void arm_negate_q15(
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = -A */
/* Read two inputs at a time */
@ -94,8 +94,8 @@ void arm_negate_q15(
/* update pointers to process next samples */
pSrc += 4u;
pDst += 4u;
pSrc += 4U;
pDst += 4U;
/* Decrement the loop counter */
blkCnt--;
@ -103,7 +103,7 @@ void arm_negate_q15(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
@ -114,7 +114,7 @@ void arm_negate_q15(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and then store the result in the destination buffer. */

@ -64,11 +64,11 @@ void arm_negate_q31(
q31_t in1, in2, in3, in4;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and then store the results in the destination buffer. */
@ -88,7 +88,7 @@ void arm_negate_q31(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
@ -100,7 +100,7 @@ void arm_negate_q31(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and then store the result in the destination buffer. */

@ -66,11 +66,11 @@ void arm_negate_q7(
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = -A */
/* Read four inputs */
@ -85,7 +85,7 @@ void arm_negate_q7(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
@ -96,7 +96,7 @@ void arm_negate_q7(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = -A */
/* Negate and then store the results in the destination buffer. */ \

@ -75,11 +75,11 @@ void arm_offset_f32(
float32_t in1, in2, in3, in4;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and then store the results in the destination buffer. */
@ -118,8 +118,8 @@ void arm_offset_f32(
*(pDst + 3) = in4;
/* update pointers to process next samples */
pSrc += 4u;
pDst += 4u;
pSrc += 4U;
pDst += 4U;
/* Decrement the loop counter */
blkCnt--;
@ -127,7 +127,7 @@ void arm_offset_f32(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
@ -138,7 +138,7 @@ void arm_offset_f32(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and then store the result in the destination buffer. */

@ -66,14 +66,14 @@ void arm_offset_q15(
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
offset_packed = __PKHBT(offset, offset, 16);
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and then store the results in the destination buffer, 2 samples at a time. */
@ -86,9 +86,9 @@ void arm_offset_q15(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and then store the results in the destination buffer. */
@ -105,7 +105,7 @@ void arm_offset_q15(
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and then store the results in the destination buffer. */

@ -66,11 +66,11 @@ void arm_offset_q31(
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and then store the results in the destination buffer. */
@ -90,9 +90,9 @@ void arm_offset_q31(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and then store the result in the destination buffer. */
@ -109,7 +109,7 @@ void arm_offset_q31(
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and then store the result in the destination buffer. */

@ -66,14 +66,14 @@ void arm_offset_q7(
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
offset_packed = __PACKq7(offset, offset, offset, offset);
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and then store the results in the destination bufferfor 4 samples at a time. */
@ -85,9 +85,9 @@ void arm_offset_q7(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and then store the result in the destination buffer. */
@ -104,7 +104,7 @@ void arm_offset_q7(
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and then store the result in the destination buffer. */

@ -87,11 +87,11 @@ void arm_scale_f32(
float32_t in1, in2, in3, in4; /* temporary variabels */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale the input and then store the results in the destination buffer. */
@ -121,8 +121,8 @@ void arm_scale_f32(
*(pDst + 3) = in4;
/* update pointers to process next samples */
pSrc += 4u;
pDst += 4u;
pSrc += 4U;
pDst += 4U;
/* Decrement the loop counter */
blkCnt--;
@ -130,7 +130,7 @@ void arm_scale_f32(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
@ -141,7 +141,7 @@ void arm_scale_f32(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale the input and then store the result in the destination buffer. */

@ -72,11 +72,11 @@ void arm_scale_q15(
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Reading 2 inputs from memory */
inA1 = *__SIMD32(pSrc)++;
@ -112,9 +112,9 @@ void arm_scale_q15(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale the input and then store the result in the destination buffer. */
@ -131,7 +131,7 @@ void arm_scale_q15(
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale the input and then store the result in the destination buffer. */

@ -73,13 +73,13 @@ void arm_scale_q31(
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
if (sign == 0u)
if (sign == 0U)
{
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* read four inputs from source */
in1 = *pSrc;
@ -121,8 +121,8 @@ void arm_scale_q31(
*(pDst + 3) = out4;
/* Update pointers to process next sampels */
pSrc += 4u;
pDst += 4u;
pSrc += 4U;
pDst += 4U;
/* Decrement the loop counter */
blkCnt--;
@ -133,7 +133,7 @@ void arm_scale_q31(
{
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* read four inputs from source */
in1 = *pSrc;
@ -162,8 +162,8 @@ void arm_scale_q31(
*(pDst + 3) = out4;
/* Update pointers to process next sampels */
pSrc += 4u;
pDst += 4u;
pSrc += 4U;
pDst += 4U;
/* Decrement the loop counter */
blkCnt--;
@ -171,7 +171,7 @@ void arm_scale_q31(
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
@ -184,7 +184,7 @@ void arm_scale_q31(
if (sign == 0)
{
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale the input and then store the result in the destination buffer. */
@ -204,7 +204,7 @@ void arm_scale_q31(
}
else
{
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale the input and then store the result in the destination buffer. */

@ -69,12 +69,12 @@ void arm_scale_q7(
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Reading 4 inputs from memory */
in1 = *pSrc++;
@ -99,9 +99,9 @@ void arm_scale_q7(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale the input and then store the result in the destination buffer. */
@ -118,7 +118,7 @@ void arm_scale_q7(
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A * scale */
/* Scale the input and then store the result in the destination buffer. */

@ -68,17 +68,17 @@ void arm_shift_q15(
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* Getting the sign of shiftBits */
sign = (shiftBits & 0x80);
/* If the shift value is positive then do right shift else left shift */
if (sign == 0u)
if (sign == 0U)
{
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Read 2 inputs */
in1 = *pSrc++;
@ -118,9 +118,9 @@ void arm_shift_q15(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A << shiftBits */
/* Shift and then store the results in the destination buffer. */
@ -134,7 +134,7 @@ void arm_shift_q15(
{
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Read 2 inputs */
in1 = *pSrc++;
@ -175,9 +175,9 @@ void arm_shift_q15(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
/* Shift the inputs and then store the results in the destination buffer. */
@ -196,12 +196,12 @@ void arm_shift_q15(
sign = (shiftBits & 0x80);
/* If the shift value is positive then do right shift else left shift */
if (sign == 0u)
if (sign == 0U)
{
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A << shiftBits */
/* Shift and then store the results in the destination buffer. */
@ -216,7 +216,7 @@ void arm_shift_q15(
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
/* Shift the inputs and then store the results in the destination buffer. */

@ -84,14 +84,14 @@ void arm_shift_q31(
q31_t out1, out2, out3, out4; /* Temporary output variables */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
if (sign == 0u)
if (sign == 0U)
{
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A << shiftBits */
/* Shift the input and then store the results in the destination buffer. */
@ -122,8 +122,8 @@ void arm_shift_q31(
*(pDst + 3) = out4;
/* Update destination pointer to process next sampels */
pSrc += 4u;
pDst += 4u;
pSrc += 4U;
pDst += 4U;
/* Decrement the loop counter */
blkCnt--;
@ -134,7 +134,7 @@ void arm_shift_q31(
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
/* Shift the input and then store the results in the destination buffer. */
@ -149,8 +149,8 @@ void arm_shift_q31(
*(pDst + 3) = (in4 >> -shiftBits);
pSrc += 4u;
pDst += 4u;
pSrc += 4U;
pDst += 4U;
blkCnt--;
}
@ -159,7 +159,7 @@ void arm_shift_q31(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
@ -172,11 +172,11 @@ void arm_shift_q31(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A (>> or <<) shiftBits */
/* Shift the input and then store the result in the destination buffer. */
*pDst++ = (sign == 0u) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) :
*pDst++ = (sign == 0U) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) :
(*pSrc++ >> -shiftBits);
/* Decrement the loop counter */

@ -75,17 +75,17 @@ void arm_shift_q7(
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* Getting the sign of shiftBits */
sign = (shiftBits & 0x80);
/* If the shift value is positive then do right shift else left shift */
if (sign == 0u)
if (sign == 0U)
{
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A << shiftBits */
/* Read 4 inputs */
@ -100,7 +100,7 @@ void arm_shift_q7(
__SSAT((in3 << shiftBits), 8),
__SSAT((in4 << shiftBits), 8));
/* Update source pointer to process next sampels */
pSrc += 4u;
pSrc += 4U;
/* Decrement the loop counter */
blkCnt--;
@ -108,9 +108,9 @@ void arm_shift_q7(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A << shiftBits */
/* Shift the input and then store the result in the destination buffer. */
@ -125,7 +125,7 @@ void arm_shift_q7(
shiftBits = -shiftBits;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
/* Read 4 inputs */
@ -139,7 +139,7 @@ void arm_shift_q7(
(in3 >> shiftBits), (in4 >> shiftBits));
pSrc += 4u;
pSrc += 4U;
/* Decrement the loop counter */
blkCnt--;
@ -147,9 +147,9 @@ void arm_shift_q7(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
/* Shift the input and then store the result in the destination buffer. */
@ -169,12 +169,12 @@ void arm_shift_q7(
sign = (shiftBits & 0x80);
/* If the shift value is positive then do right shift else left shift */
if (sign == 0u)
if (sign == 0U)
{
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A << shiftBits */
/* Shift the input and then store the result in the destination buffer. */
@ -189,7 +189,7 @@ void arm_shift_q7(
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A >> shiftBits */
/* Shift the input and then store the result in the destination buffer. */

@ -74,11 +74,11 @@ void arm_sub_f32(
float32_t inB1, inB2, inB3, inB4; /* temporary variables */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and then store the results in the destination buffer. */
@ -101,9 +101,9 @@ void arm_sub_f32(
/* Update pointers to process next sampels */
pSrcA += 4u;
pSrcB += 4u;
pDst += 4u;
pSrcA += 4U;
pSrcB += 4U;
pDst += 4U;
/* Decrement the loop counter */
blkCnt--;
@ -111,7 +111,7 @@ void arm_sub_f32(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
#else
@ -122,7 +122,7 @@ void arm_sub_f32(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and then store the results in the destination buffer. */

@ -67,11 +67,11 @@ void arm_sub_q15(
q31_t inB1, inB2;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and then store the results in the destination buffer two samples at a time. */
@ -89,9 +89,9 @@ void arm_sub_q15(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and then store the result in the destination buffer. */
@ -108,7 +108,7 @@ void arm_sub_q15(
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and then store the result in the destination buffer. */

@ -67,11 +67,11 @@ void arm_sub_q31(
q31_t inB1, inB2, inB3, inB4;
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and then store the results in the destination buffer. */
@ -96,9 +96,9 @@ void arm_sub_q31(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and then store the result in the destination buffer. */
@ -115,7 +115,7 @@ void arm_sub_q31(
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and then store the result in the destination buffer. */

@ -64,11 +64,11 @@ void arm_sub_q7(
/* Run the below code for Cortex-M4 and Cortex-M3 */
/*loop Unrolling */
blkCnt = blockSize >> 2u;
blkCnt = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and then store the results in the destination buffer 4 samples at a time. */
@ -80,9 +80,9 @@ void arm_sub_q7(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize % 0x4u;
blkCnt = blockSize % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and then store the result in the destination buffer. */
@ -99,7 +99,7 @@ void arm_sub_q7(
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C = A - B */
/* Subtract and then store the result in the destination buffer. */

@ -143,49 +143,49 @@ const arm_cfft_instance_q15 arm_cfft_sR_q15_len4096 = {
/* Floating-point structs */
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len32 = {
{ 16, twiddleCoef_32, armBitRevIndexTable32, ARMBITREVINDEXTABLE_16_TABLE_LENGTH },
32u,
32U,
(float32_t *)twiddleCoef_rfft_32
};
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len64 = {
{ 32, twiddleCoef_32, armBitRevIndexTable32, ARMBITREVINDEXTABLE_32_TABLE_LENGTH },
64u,
64U,
(float32_t *)twiddleCoef_rfft_64
};
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len128 = {
{ 64, twiddleCoef_64, armBitRevIndexTable64, ARMBITREVINDEXTABLE_64_TABLE_LENGTH },
128u,
128U,
(float32_t *)twiddleCoef_rfft_128
};
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len256 = {
{ 128, twiddleCoef_128, armBitRevIndexTable128, ARMBITREVINDEXTABLE_128_TABLE_LENGTH },
256u,
256U,
(float32_t *)twiddleCoef_rfft_256
};
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len512 = {
{ 256, twiddleCoef_256, armBitRevIndexTable256, ARMBITREVINDEXTABLE_256_TABLE_LENGTH },
512u,
512U,
(float32_t *)twiddleCoef_rfft_512
};
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len1024 = {
{ 512, twiddleCoef_512, armBitRevIndexTable512, ARMBITREVINDEXTABLE_512_TABLE_LENGTH },
1024u,
1024U,
(float32_t *)twiddleCoef_rfft_1024
};
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len2048 = {
{ 1024, twiddleCoef_1024, armBitRevIndexTable1024, ARMBITREVINDEXTABLE_1024_TABLE_LENGTH },
2048u,
2048U,
(float32_t *)twiddleCoef_rfft_2048
};
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len4096 = {
{ 2048, twiddleCoef_2048, armBitRevIndexTable2048, ARMBITREVINDEXTABLE_2048_TABLE_LENGTH },
4096u,
4096U,
(float32_t *)twiddleCoef_rfft_4096
};
@ -195,90 +195,90 @@ extern const q31_t realCoefAQ31[8192];
extern const q31_t realCoefBQ31[8192];
const arm_rfft_instance_q31 arm_rfft_sR_q31_len32 = {
32u,
32U,
0,
1,
256u,
256U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len16
};
const arm_rfft_instance_q31 arm_rfft_sR_q31_len64 = {
64u,
64U,
0,
1,
128u,
128U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len32
};
const arm_rfft_instance_q31 arm_rfft_sR_q31_len128 = {
128u,
128U,
0,
1,
64u,
64U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len64
};
const arm_rfft_instance_q31 arm_rfft_sR_q31_len256 = {
256u,
256U,
0,
1,
32u,
32U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len128
};
const arm_rfft_instance_q31 arm_rfft_sR_q31_len512 = {
512u,
512U,
0,
1,
16u,
16U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len256
};
const arm_rfft_instance_q31 arm_rfft_sR_q31_len1024 = {
1024u,
1024U,
0,
1,
8u,
8U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len512
};
const arm_rfft_instance_q31 arm_rfft_sR_q31_len2048 = {
2048u,
2048U,
0,
1,
4u,
4U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len1024
};
const arm_rfft_instance_q31 arm_rfft_sR_q31_len4096 = {
4096u,
4096U,
0,
1,
2u,
2U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len2048
};
const arm_rfft_instance_q31 arm_rfft_sR_q31_len8192 = {
8192u,
8192U,
0,
1,
1u,
1U,
(q31_t*)realCoefAQ31,
(q31_t*)realCoefBQ31,
&arm_cfft_sR_q31_len4096
@ -289,90 +289,90 @@ extern const q15_t realCoefAQ15[8192];
extern const q15_t realCoefBQ15[8192];
const arm_rfft_instance_q15 arm_rfft_sR_q15_len32 = {
32u,
32U,
0,
1,
256u,
256U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len16
};
const arm_rfft_instance_q15 arm_rfft_sR_q15_len64 = {
64u,
64U,
0,
1,
128u,
128U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len32
};
const arm_rfft_instance_q15 arm_rfft_sR_q15_len128 = {
128u,
128U,
0,
1,
64u,
64U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len64
};
const arm_rfft_instance_q15 arm_rfft_sR_q15_len256 = {
256u,
256U,
0,
1,
32u,
32U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len128
};
const arm_rfft_instance_q15 arm_rfft_sR_q15_len512 = {
512u,
512U,
0,
1,
16u,
16U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len256
};
const arm_rfft_instance_q15 arm_rfft_sR_q15_len1024 = {
1024u,
1024U,
0,
1,
8u,
8U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len512
};
const arm_rfft_instance_q15 arm_rfft_sR_q15_len2048 = {
2048u,
2048U,
0,
1,
4u,
4U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len1024
};
const arm_rfft_instance_q15 arm_rfft_sR_q15_len4096 = {
4096u,
4096U,
0,
1,
2u,
2U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len2048
};
const arm_rfft_instance_q15 arm_rfft_sR_q15_len8192 = {
8192u,
8192U,
0,
1,
1u,
1U,
(q15_t*)realCoefAQ15,
(q15_t*)realCoefBQ15,
&arm_cfft_sR_q15_len4096

@ -82,11 +82,11 @@ void arm_cmplx_conj_f32(
float32_t inI1, inI2, inI3, inI4;
/*loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
@ -131,13 +131,13 @@ void arm_cmplx_conj_f32(
pDst[5] = inI3;
/* increment source pointer by 8 to process next sampels */
pSrc += 8u;
pSrc += 8U;
/* store imaginary sample to destination */
pDst[7] = inI4;
/* increment destination pointer by 8 to store next samples */
pDst += 8u;
pDst += 8U;
/* Decrement the loop counter */
blkCnt--;
@ -145,7 +145,7 @@ void arm_cmplx_conj_f32(
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
blkCnt = numSamples % 0x4U;
#else
@ -154,7 +154,7 @@ void arm_cmplx_conj_f32(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* realOut + j (imagOut) = realIn + j (-1) imagIn */
/* Calculate Complex Conjugate and then store the results in the destination buffer. */

@ -64,11 +64,11 @@ void arm_cmplx_conj_q15(
q31_t zero = 0;
/*loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
@ -109,9 +109,9 @@ void arm_cmplx_conj_q15(
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
blkCnt = numSamples % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
@ -128,7 +128,7 @@ void arm_cmplx_conj_q15(
/* Run the below code for Cortex-M0 */
while (numSamples > 0u)
while (numSamples > 0U)
{
/* realOut + j (imagOut) = realIn+ j (-1) imagIn */
/* Calculate Complex Conjugate and then store the results in the destination buffer. */

@ -65,11 +65,11 @@ void arm_cmplx_conj_q31(
q31_t inI1, inI2, inI3, inI4; /* Temporary imaginary variables */
/*loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
@ -125,14 +125,14 @@ void arm_cmplx_conj_q31(
pDst[3] = inI2;
/* increment source pointer by 8 to proecess next samples */
pSrc += 8u;
pSrc += 8U;
/* store imaginary input samples */
pDst[5] = inI3;
pDst[7] = inI4;
/* increment destination pointer by 8 to process next samples */
pDst += 8u;
pDst += 8U;
/* Decrement the loop counter */
blkCnt--;
@ -140,7 +140,7 @@ void arm_cmplx_conj_q31(
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
blkCnt = numSamples % 0x4U;
#else
@ -150,7 +150,7 @@ void arm_cmplx_conj_q31(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
/* Calculate Complex Conjugate and then store the results in the destination buffer. */

@ -89,11 +89,11 @@ void arm_cmplx_dot_prod_f32(
uint32_t blkCnt; /* loop counter */
/*loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
@ -141,9 +141,9 @@ void arm_cmplx_dot_prod_f32(
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples & 0x3u;
blkCnt = numSamples & 0x3U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
@ -163,7 +163,7 @@ void arm_cmplx_dot_prod_f32(
/* Run the below code for Cortex-M0 */
while (numSamples > 0u)
while (numSamples > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;

@ -72,11 +72,11 @@ void arm_cmplx_dot_prod_q15(
/*loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
@ -124,9 +124,9 @@ void arm_cmplx_dot_prod_q15(
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
blkCnt = numSamples % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
@ -146,7 +146,7 @@ void arm_cmplx_dot_prod_q15(
/* Run the below code for Cortex-M0 */
while (numSamples > 0u)
while (numSamples > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;

@ -73,11 +73,11 @@ void arm_cmplx_dot_prod_q31(
/*loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
@ -125,9 +125,9 @@ void arm_cmplx_dot_prod_q31(
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
blkCnt = numSamples % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
@ -147,7 +147,7 @@ void arm_cmplx_dot_prod_q31(
/* Run the below code for Cortex-M0 */
while (numSamples > 0u)
while (numSamples > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;

@ -82,11 +82,11 @@ void arm_cmplx_mag_f32(
uint32_t blkCnt; /* loop counter */
/*loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
@ -114,9 +114,9 @@ void arm_cmplx_mag_f32(
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
blkCnt = numSamples % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
realIn = *pSrc++;
@ -132,7 +132,7 @@ void arm_cmplx_mag_f32(
/* Run the below code for Cortex-M0 */
while (numSamples > 0u)
while (numSamples > 0U)
{
/* out = sqrt((real * real) + (imag * imag)) */
realIn = *pSrc++;

@ -66,11 +66,11 @@ void arm_cmplx_mag_q15(
/*loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
@ -96,9 +96,9 @@ void arm_cmplx_mag_q15(
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
blkCnt = numSamples % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
in1 = *__SIMD32(pSrc)++;
@ -116,7 +116,7 @@ void arm_cmplx_mag_q15(
/* Run the below code for Cortex-M0 */
q15_t real, imag; /* Temporary variables to hold input values */
while (numSamples > 0u)
while (numSamples > 0U)
{
/* out = sqrt(real * real + imag * imag) */
real = *pSrc++;

@ -68,11 +68,11 @@ void arm_cmplx_mag_q31(
/*loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* read complex input from source buffer */
real1 = pSrc[0];
@ -130,13 +130,13 @@ void arm_cmplx_mag_q31(
arm_sqrt_q31(out1, &pDst[2]);
/* increment destination by 8 to process next samples */
pSrc += 8u;
pSrc += 8U;
/* calculate square root */
arm_sqrt_q31(out3, &pDst[3]);
/* increment destination by 4 to process next samples */
pDst += 4u;
pDst += 4U;
/* Decrement the loop counter */
blkCnt--;
@ -144,7 +144,7 @@ void arm_cmplx_mag_q31(
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
blkCnt = numSamples % 0x4U;
#else
@ -153,7 +153,7 @@ void arm_cmplx_mag_q31(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;

@ -86,11 +86,11 @@ void arm_cmplx_mag_squared_f32(
float32_t out1, out2, out3, out4; /* Temporary variables to hold output values */
/*loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
/* read real input sample from source buffer */
@ -160,13 +160,13 @@ void arm_cmplx_mag_squared_f32(
pDst[2] = out3;
/* increment destination pointer by 8 to process next samples */
pSrc += 8u;
pSrc += 8U;
/* store output to destination */
pDst[3] = out4;
/* increment destination pointer by 4 to process next samples */
pDst += 4u;
pDst += 4U;
/* Decrement the loop counter */
blkCnt--;
@ -174,7 +174,7 @@ void arm_cmplx_mag_squared_f32(
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
blkCnt = numSamples % 0x4U;
#else
@ -184,7 +184,7 @@ void arm_cmplx_mag_squared_f32(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;

@ -64,11 +64,11 @@ void arm_cmplx_mag_squared_q15(
q31_t acc2, acc3;
/*loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
in1 = *__SIMD32(pSrc)++;
@ -93,9 +93,9 @@ void arm_cmplx_mag_squared_q15(
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
blkCnt = numSamples % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
in1 = *__SIMD32(pSrc)++;
@ -113,7 +113,7 @@ void arm_cmplx_mag_squared_q15(
/* Run the below code for Cortex-M0 */
q15_t real, imag; /* Temporary variables to store real and imaginary values */
while (numSamples > 0u)
while (numSamples > 0U)
{
/* out = ((real * real) + (imag * imag)) */
real = *pSrc++;

@ -65,11 +65,11 @@ void arm_cmplx_mag_squared_q31(
uint32_t blkCnt; /* loop counter */
/* loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
@ -106,9 +106,9 @@ void arm_cmplx_mag_squared_q31(
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
blkCnt = numSamples % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
@ -126,7 +126,7 @@ void arm_cmplx_mag_squared_q31(
/* Run the below code for Cortex-M0 */
while (numSamples > 0u)
while (numSamples > 0U)
{
/* out = ((real * real) + (imag * imag)) */
real = *pSrc++;

@ -86,11 +86,11 @@ void arm_cmplx_mult_cmplx_f32(
/* loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
@ -145,18 +145,18 @@ void arm_cmplx_mult_cmplx_f32(
acc4 = (b2 * c2);
*(pDst + 4) = acc1;
pSrcA += 8u;
pSrcA += 8U;
acc3 -= (b2 * d2);
acc4 += (a2 * d2);
*(pDst + 5) = acc2;
pSrcB += 8u;
pSrcB += 8U;
*(pDst + 6) = acc3;
*(pDst + 7) = acc4;
pDst += 8u;
pDst += 8U;
/* Decrement the numSamples loop counter */
blkCnt--;
@ -164,7 +164,7 @@ void arm_cmplx_mult_cmplx_f32(
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
blkCnt = numSamples % 0x4U;
#else
@ -173,7 +173,7 @@ void arm_cmplx_mult_cmplx_f32(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */

@ -64,11 +64,11 @@ void arm_cmplx_mult_cmplx_q15(
uint32_t blkCnt; /* loop counters */
/* loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
@ -126,9 +126,9 @@ void arm_cmplx_mult_cmplx_q15(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
blkCnt = numSamples % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
@ -152,7 +152,7 @@ void arm_cmplx_mult_cmplx_q15(
/* Run the below code for Cortex-M0 */
while (numSamples > 0u)
while (numSamples > 0U)
{
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */

@ -68,11 +68,11 @@ void arm_cmplx_mult_cmplx_q31(
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
@ -174,9 +174,9 @@ void arm_cmplx_mult_cmplx_q31(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
blkCnt = numSamples % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
@ -212,11 +212,11 @@ void arm_cmplx_mult_cmplx_q31(
/* Run the below code for Cortex-M0 */
/* loop Unrolling */
blkCnt = numSamples >> 1u;
blkCnt = numSamples >> 1U;
/* First part of the processing with loop unrolling. Compute 2 outputs at a time.
** a second loop below computes the remaining 1 sample. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
@ -272,9 +272,9 @@ void arm_cmplx_mult_cmplx_q31(
/* If the blockSize is not a multiple of 2, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x2u;
blkCnt = numSamples % 0x2U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */

@ -89,11 +89,11 @@ void arm_cmplx_mult_real_f32(
float32_t out5, out6, out7, out8; /* Temporary variables to hold output data */
/* loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[2 * i] = A[2 * i] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
@ -164,20 +164,20 @@ void arm_cmplx_mult_real_f32(
pCmplxDst[4] = out5;
/* incremnet complex input buffer by 8 to process next samples */
pSrcCmplx += 8u;
pSrcCmplx += 8U;
/* store result to destination bufer */
pCmplxDst[5] = out6;
/* increment real input buffer by 4 to process next samples */
pSrcReal += 4u;
pSrcReal += 4U;
/* store result to destination bufer */
pCmplxDst[6] = out7;
pCmplxDst[7] = out8;
/* increment destination buffer by 8 to process next sampels */
pCmplxDst += 8u;
pCmplxDst += 8U;
/* Decrement the numSamples loop counter */
blkCnt--;
@ -185,7 +185,7 @@ void arm_cmplx_mult_real_f32(
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
blkCnt = numSamples % 0x4U;
#else
@ -194,7 +194,7 @@ void arm_cmplx_mult_real_f32(
#endif /* #if defined (ARM_MATH_DSP) */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[2 * i] = A[2 * i] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */

@ -70,11 +70,11 @@ void arm_cmplx_mult_real_q15(
q31_t mul1, mul2, mul3, mul4; /* Temporary variables to hold intermediate data */
/* loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[2 * i] = A[2 * i] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
@ -103,10 +103,10 @@ void arm_cmplx_mult_real_q15(
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* saturate the result */
out1 = (q15_t) __SSAT(mul1 >> 15u, 16);
out2 = (q15_t) __SSAT(mul2 >> 15u, 16);
out3 = (q15_t) __SSAT(mul3 >> 15u, 16);
out4 = (q15_t) __SSAT(mul4 >> 15u, 16);
out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
out4 = (q15_t) __SSAT(mul4 >> 15U, 16);
/* pack real and imaginary outputs and store them to destination */
*__SIMD32(pCmplxDst)++ = __PKHBT(out1, out2, 16);
@ -132,10 +132,10 @@ void arm_cmplx_mult_real_q15(
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
out1 = (q15_t) __SSAT(mul1 >> 15u, 16);
out2 = (q15_t) __SSAT(mul2 >> 15u, 16);
out3 = (q15_t) __SSAT(mul3 >> 15u, 16);
out4 = (q15_t) __SSAT(mul4 >> 15u, 16);
out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
out4 = (q15_t) __SSAT(mul4 >> 15U, 16);
*__SIMD32(pCmplxDst)++ = __PKHBT(out1, out2, 16);
*__SIMD32(pCmplxDst)++ = __PKHBT(out3, out4, 16);
@ -146,9 +146,9 @@ void arm_cmplx_mult_real_q15(
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
blkCnt = numSamples % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[2 * i] = A[2 * i] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
@ -167,7 +167,7 @@ void arm_cmplx_mult_real_q15(
/* Run the below code for Cortex-M0 */
while (numSamples > 0u)
while (numSamples > 0U)
{
/* realOut = realA * realB. */
/* imagOut = imagA * realB. */

@ -69,11 +69,11 @@ void arm_cmplx_mult_real_q31(
q31_t out1, out2, out3, out4; /* Temporary variables to hold output data */
/* loop Unrolling */
blkCnt = numSamples >> 2u;
blkCnt = numSamples >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[2 * i] = A[2 * i] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
@ -151,9 +151,9 @@ void arm_cmplx_mult_real_q31(
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = numSamples % 0x4u;
blkCnt = numSamples % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* C[2 * i] = A[2 * i] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
@ -187,7 +187,7 @@ void arm_cmplx_mult_real_q31(
/* Run the below code for Cortex-M0 */
while (numSamples > 0u)
while (numSamples > 0U)
{
/* realOut = realA * realB. */
/* imagReal = imagA * realB. */

@ -64,7 +64,7 @@ void arm_pid_init_f32(
if (resetStateFlag)
{
/* Clear the state buffer. The size will be always 3 samples */
memset(S->state, 0, 3u * sizeof(float32_t));
memset(S->state, 0, 3U * sizeof(float32_t));
}
}

@ -74,7 +74,7 @@ void arm_pid_init_q15(
if (resetStateFlag)
{
/* Clear the state buffer. The size will be always 3 samples */
memset(S->state, 0, 3u * sizeof(q15_t));
memset(S->state, 0, 3U * sizeof(q15_t));
}
#else
@ -98,7 +98,7 @@ void arm_pid_init_q15(
if (resetStateFlag)
{
/* Clear the state buffer. The size will be always 3 samples */
memset(S->state, 0, 3u * sizeof(q15_t));
memset(S->state, 0, 3U * sizeof(q15_t));
}
#endif /* #if defined (ARM_MATH_DSP) */

@ -85,7 +85,7 @@ void arm_pid_init_q31(
if (resetStateFlag)
{
/* Clear the state buffer. The size will be always 3 samples */
memset(S->state, 0, 3u * sizeof(q31_t));
memset(S->state, 0, 3U * sizeof(q31_t));
}
}

@ -45,7 +45,7 @@ void arm_pid_reset_f32(
{
/* Clear the state buffer. The size will be always 3 samples */
memset(S->state, 0, 3u * sizeof(float32_t));
memset(S->state, 0, 3U * sizeof(float32_t));
}
/**

@ -44,7 +44,7 @@ void arm_pid_reset_q15(
arm_pid_instance_q15 * S)
{
/* Reset state to zero, The size will be always 3 samples */
memset(S->state, 0, 3u * sizeof(q15_t));
memset(S->state, 0, 3U * sizeof(q15_t));
}
/**

@ -45,7 +45,7 @@ void arm_pid_reset_q31(
{
/* Clear the state buffer. The size will be always 3 samples */
memset(S->state, 0, 3u * sizeof(q31_t));
memset(S->state, 0, 3U * sizeof(q31_t));
}
/**

@ -87,7 +87,7 @@ void arm_biquad_cas_df1_32x64_init_q31(
S->pCoeffs = pCoeffs;
/* Clear state buffer and size is always 4 * numStages */
memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(q63_t));
memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(q63_t));
/* Assign state pointer */
S->pState = pState;

@ -190,8 +190,8 @@ void arm_biquad_cas_df1_32x64_q31(
int32_t shift = (int32_t) S->postShift + 1; /* Shift to be applied to the output */
uint32_t sample, stage = S->numStages; /* loop counters */
q31_t acc_l, acc_h; /* temporary output */
uint32_t uShift = ((uint32_t) S->postShift + 1u);
uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */
uint32_t uShift = ((uint32_t) S->postShift + 1U);
uint32_t lShift = 32U - uShift; /* Shift to be applied to the output */
#if defined (ARM_MATH_DSP)
@ -219,11 +219,11 @@ void arm_biquad_cas_df1_32x64_q31(
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
*/
sample = blockSize >> 2u;
sample = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
@ -297,7 +297,7 @@ void arm_biquad_cas_df1_32x64_q31(
/* The result is converted to 1.31 */
/* Store the output in the destination buffer. */
*(pOut + 1u) = acc_h;
*(pOut + 1U) = acc_h;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
@ -329,7 +329,7 @@ void arm_biquad_cas_df1_32x64_q31(
acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
/* Store the output in the destination buffer in 1.31 format. */
*(pOut + 2u) = acc_h;
*(pOut + 2U) = acc_h;
/* Read the fourth input into Xn, to reuse the value */
Xn = *pIn++;
@ -363,7 +363,7 @@ void arm_biquad_cas_df1_32x64_q31(
acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
/* Store the output in the destination buffer in 1.31 format. */
*(pOut + 3u) = acc_h;
*(pOut + 3U) = acc_h;
/* Every time after the output is computed state should be updated. */
/* The states should be updated as: */
@ -375,7 +375,7 @@ void arm_biquad_cas_df1_32x64_q31(
Xn1 = Xn;
/* update output pointer */
pOut += 4u;
pOut += 4U;
/* decrement the loop counter */
sample--;
@ -383,9 +383,9 @@ void arm_biquad_cas_df1_32x64_q31(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
sample = (blockSize & 0x3u);
sample = (blockSize & 0x3U);
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
@ -476,7 +476,7 @@ void arm_biquad_cas_df1_32x64_q31(
sample = blockSize;
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;

@ -207,11 +207,11 @@ void arm_biquad_cascade_df1_f32(
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
*/
sample = blockSize >> 2u;
sample = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (sample > 0u)
while (sample > 0U)
{
/* Read the first input */
Xn = *pIn++;
@ -286,9 +286,9 @@ void arm_biquad_cascade_df1_f32(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
sample = blockSize & 0x3u;
sample = blockSize & 0x3U;
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
@ -331,7 +331,7 @@ void arm_biquad_cascade_df1_f32(
/* decrement the loop counter */
stage--;
} while (stage > 0u);
} while (stage > 0U);
#else
@ -358,7 +358,7 @@ void arm_biquad_cascade_df1_f32(
sample = blockSize;
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
@ -400,7 +400,7 @@ void arm_biquad_cascade_df1_f32(
/* decrement the loop counter */
stage--;
} while (stage > 0u);
} while (stage > 0U);
#endif /* #if defined (ARM_MATH_DSP) */

@ -104,11 +104,11 @@ void arm_biquad_cascade_df1_fast_q15(
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
*/
sample = blockSize >> 1u;
sample = blockSize >> 1U;
/* First part of the processing with loop unrolling. Compute 2 outputs at a time.
** a second loop below computes the remaining 1 sample. */
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
@ -198,7 +198,7 @@ void arm_biquad_cascade_df1_fast_q15(
/* If the blockSize is not a multiple of 2, compute any remaining output samples here.
** No loop unrolling is used. */
if ((blockSize & 0x1u) != 0u)
if ((blockSize & 0x1U) != 0U)
{
/* Read the input */
in = *pIn++;
@ -264,7 +264,7 @@ void arm_biquad_cascade_df1_fast_q15(
/* Decrement the loop counter */
stage--;
} while (stage > 0u);
} while (stage > 0U);
}

@ -100,11 +100,11 @@ void arm_biquad_cascade_df1_fast_q31(
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
*/
sample = blockSize >> 2u;
sample = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn;
@ -130,7 +130,7 @@ void arm_biquad_cascade_df1_fast_q31(
Yn2 = acc << shift;
/* Read the second input */
Xn2 = *(pIn + 1u);
Xn2 = *(pIn + 1U);
/* Store the output in the destination buffer. */
*pOut = Yn2;
@ -156,10 +156,10 @@ void arm_biquad_cascade_df1_fast_q31(
Yn1 = acc << shift;
/* Read the third input */
Xn1 = *(pIn + 2u);
Xn1 = *(pIn + 2U);
/* Store the output in the destination buffer. */
*(pOut + 1u) = Yn1;
*(pOut + 1U) = Yn1;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
/* acc = b0 * x[n] */
@ -182,11 +182,11 @@ void arm_biquad_cascade_df1_fast_q31(
Yn2 = acc << shift;
/* Read the forth input */
Xn = *(pIn + 3u);
Xn = *(pIn + 3U);
/* Store the output in the destination buffer. */
*(pOut + 2u) = Yn2;
pIn += 4u;
*(pOut + 2U) = Yn2;
pIn += 4U;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
/* acc = b0 * x[n] */
@ -217,8 +217,8 @@ void arm_biquad_cascade_df1_fast_q31(
Xn1 = Xn;
/* Store the output in the destination buffer. */
*(pOut + 3u) = Yn1;
pOut += 4u;
*(pOut + 3U) = Yn1;
pOut += 4U;
/* decrement the loop counter */
sample--;
@ -226,9 +226,9 @@ void arm_biquad_cascade_df1_fast_q31(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
sample = (blockSize & 0x3u);
sample = (blockSize & 0x3U);
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;

@ -86,7 +86,7 @@ void arm_biquad_cascade_df1_init_f32(
S->pCoeffs = pCoeffs;
/* Clear state buffer and size is always 4 * numStages */
memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(float32_t));
memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(float32_t));
/* Assign state pointer */
S->pState = pState;

@ -88,7 +88,7 @@ void arm_biquad_cascade_df1_init_q15(
S->pCoeffs = pCoeffs;
/* Clear state buffer and size is always 4 * numStages */
memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(q15_t));
memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(q15_t));
/* Assign state pointer */
S->pState = pState;

@ -87,7 +87,7 @@ void arm_biquad_cascade_df1_init_q31(
S->pCoeffs = pCoeffs;
/* Clear state buffer and size is always 4 * numStages */
memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(q31_t));
memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(q31_t));
/* Assign state pointer */
S->pState = pState;

@ -109,11 +109,11 @@ void arm_biquad_cascade_df1_q15(
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
*/
sample = blockSize >> 1u;
sample = blockSize >> 1U;
/* First part of the processing with loop unrolling. Compute 2 outputs at a time.
** a second loop below computes the remaining 1 sample. */
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
@ -220,7 +220,7 @@ void arm_biquad_cascade_df1_q15(
/* If the blockSize is not a multiple of 2, compute any remaining output samples here.
** No loop unrolling is used. */
if ((blockSize & 0x1u) != 0u)
if ((blockSize & 0x1U) != 0U)
{
/* Read the input */
in = *pIn++;
@ -295,7 +295,7 @@ void arm_biquad_cascade_df1_q15(
/* Decrement the loop counter */
stage--;
} while (stage > 0u);
} while (stage > 0U);
#else
@ -334,7 +334,7 @@ void arm_biquad_cascade_df1_q15(
sample = blockSize;
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;

@ -65,8 +65,8 @@ void arm_biquad_cascade_df1_q31(
uint32_t blockSize)
{
q63_t acc; /* accumulator */
uint32_t uShift = ((uint32_t) S->postShift + 1u);
uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */
uint32_t uShift = ((uint32_t) S->postShift + 1U);
uint32_t lShift = 32U - uShift; /* Shift to be applied to the output */
q31_t *pIn = pSrc; /* input pointer initialization */
q31_t *pOut = pDst; /* output pointer initialization */
q31_t *pState = S->pState; /* pState pointer initialization */
@ -104,11 +104,11 @@ void arm_biquad_cascade_df1_q31(
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
*/
sample = blockSize >> 2u;
sample = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
@ -246,9 +246,9 @@ void arm_biquad_cascade_df1_q31(
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
sample = (blockSize & 0x3u);
sample = (blockSize & 0x3U);
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;
@ -327,7 +327,7 @@ void arm_biquad_cascade_df1_q31(
sample = blockSize;
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn = *pIn++;

@ -171,19 +171,19 @@ uint32_t blockSize)
b2 = pCoeffs[2];
a1 = pCoeffs[3];
/* Apply loop unrolling and compute 16 output values simultaneously. */
sample = blockSize >> 4u;
sample = blockSize >> 4U;
a2 = pCoeffs[4];
/*Reading the state values */
d1 = pState[0];
d2 = pState[1];
pCoeffs += 5u;
pCoeffs += 5U;
/* First part of the processing with loop unrolling. Compute 16 outputs at a time.
** a second loop below computes the remaining 1 to 15 samples. */
while (sample > 0u) {
while (sample > 0U) {
/* y[n] = b0 * x[n] + d1 */
/* d1 = b1 * x[n] + a1 * y[n] + d2 */
@ -366,7 +366,7 @@ uint32_t blockSize)
}
sample = blockSize & 0xFu;
while (sample > 0u) {
while (sample > 0U) {
Xn1 = *pIn;
acc1 = b0 * Xn1 + d1;
@ -392,12 +392,12 @@ uint32_t blockSize)
/* decrement the loop counter */
stage--;
pState += 2u;
pState += 2U;
/*Reset the output working pointer */
pOut = pDst;
} while (stage > 0u);
} while (stage > 0U);
#elif defined(ARM_MATH_CM0_FAMILY)
@ -419,7 +419,7 @@ uint32_t blockSize)
sample = blockSize;
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn1 = *pIn++;
@ -454,7 +454,7 @@ uint32_t blockSize)
/* decrement the loop counter */
stage--;
} while (stage > 0u);
} while (stage > 0U);
#else
@ -480,11 +480,11 @@ uint32_t blockSize)
d2 = pState[1];
/* Apply loop unrolling and compute 4 output values simultaneously. */
sample = blockSize >> 2u;
sample = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (sample > 0u) {
while (sample > 0U) {
/* y[n] = b0 * x[n] + d1 */
/* d1 = b1 * x[n] + a1 * y[n] + d2 */
@ -546,8 +546,8 @@ uint32_t blockSize)
sample--;
}
sample = blockSize & 0x3u;
while (sample > 0u) {
sample = blockSize & 0x3U;
while (sample > 0U) {
Xn1 = *pIn++;
p0 = b0 * Xn1;
@ -578,7 +578,7 @@ uint32_t blockSize)
/* decrement the loop counter */
stage--;
} while (stage > 0u);
} while (stage > 0U);
#endif

@ -171,19 +171,19 @@ uint32_t blockSize)
b2 = pCoeffs[2];
a1 = pCoeffs[3];
/* Apply loop unrolling and compute 16 output values simultaneously. */
sample = blockSize >> 4u;
sample = blockSize >> 4U;
a2 = pCoeffs[4];
/*Reading the state values */
d1 = pState[0];
d2 = pState[1];
pCoeffs += 5u;
pCoeffs += 5U;
/* First part of the processing with loop unrolling. Compute 16 outputs at a time.
** a second loop below computes the remaining 1 to 15 samples. */
while (sample > 0u) {
while (sample > 0U) {
/* y[n] = b0 * x[n] + d1 */
/* d1 = b1 * x[n] + a1 * y[n] + d2 */
@ -366,7 +366,7 @@ uint32_t blockSize)
}
sample = blockSize & 0xFu;
while (sample > 0u) {
while (sample > 0U) {
Xn1 = *pIn;
acc1 = b0 * Xn1 + d1;
@ -392,12 +392,12 @@ uint32_t blockSize)
/* decrement the loop counter */
stage--;
pState += 2u;
pState += 2U;
/*Reset the output working pointer */
pOut = pDst;
} while (stage > 0u);
} while (stage > 0U);
#elif defined(ARM_MATH_CM0_FAMILY)
@ -419,7 +419,7 @@ uint32_t blockSize)
sample = blockSize;
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn1 = *pIn++;
@ -454,7 +454,7 @@ uint32_t blockSize)
/* decrement the loop counter */
stage--;
} while (stage > 0u);
} while (stage > 0U);
#else
@ -480,11 +480,11 @@ uint32_t blockSize)
d2 = pState[1];
/* Apply loop unrolling and compute 4 output values simultaneously. */
sample = blockSize >> 2u;
sample = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (sample > 0u) {
while (sample > 0U) {
/* y[n] = b0 * x[n] + d1 */
/* d1 = b1 * x[n] + a1 * y[n] + d2 */
@ -546,8 +546,8 @@ uint32_t blockSize)
sample--;
}
sample = blockSize & 0x3u;
while (sample > 0u) {
sample = blockSize & 0x3U;
while (sample > 0U) {
Xn1 = *pIn++;
p0 = b0 * Xn1;
@ -578,7 +578,7 @@ uint32_t blockSize)
/* decrement the loop counter */
stage--;
} while (stage > 0u);
} while (stage > 0U);
#endif

@ -78,7 +78,7 @@ void arm_biquad_cascade_df2T_init_f32(
S->pCoeffs = pCoeffs;
/* Clear state buffer and size is always 2 * numStages */
memset(pState, 0, (2u * (uint32_t) numStages) * sizeof(float32_t));
memset(pState, 0, (2U * (uint32_t) numStages) * sizeof(float32_t));
/* Assign state pointer */
S->pState = pState;

@ -78,7 +78,7 @@ void arm_biquad_cascade_df2T_init_f64(
S->pCoeffs = pCoeffs;
/* Clear state buffer and size is always 2 * numStages */
memset(pState, 0, (2u * (uint32_t) numStages) * sizeof(float64_t));
memset(pState, 0, (2U * (uint32_t) numStages) * sizeof(float64_t));
/* Assign state pointer */
S->pState = pState;

@ -171,7 +171,7 @@ uint32_t blockSize)
b2 = pCoeffs[2];
a1 = pCoeffs[3];
/* Apply loop unrolling and compute 8 output values simultaneously. */
sample = blockSize >> 3u;
sample = blockSize >> 3U;
a2 = pCoeffs[4];
/*Reading the state values */
@ -180,11 +180,11 @@ uint32_t blockSize)
d1b = pState[2];
d2b = pState[3];
pCoeffs += 5u;
pCoeffs += 5U;
/* First part of the processing with loop unrolling. Compute 8 outputs at a time.
** a second loop below computes the remaining 1 to 7 samples. */
while (sample > 0u) {
while (sample > 0U) {
/* y[n] = b0 * x[n] + d1 */
/* d1 = b1 * x[n] + a1 * y[n] + d2 */
@ -367,8 +367,8 @@ uint32_t blockSize)
d2b += a2 * acc8b;
}
sample = blockSize & 0x7u;
while (sample > 0u) {
sample = blockSize & 0x7U;
while (sample > 0U) {
/* Read the input */
Xn1a = *pIn++; //Channel a
Xn1b = *pIn++; //Channel b
@ -405,11 +405,11 @@ uint32_t blockSize)
/* decrement the loop counter */
stage--;
pState += 4u;
pState += 4U;
/*Reset the output working pointer */
pOut = pDst;
} while (stage > 0u);
} while (stage > 0U);
#elif defined(ARM_MATH_CM0_FAMILY)
@ -433,7 +433,7 @@ uint32_t blockSize)
sample = blockSize;
while (sample > 0u)
while (sample > 0U)
{
/* Read the input */
Xn1a = *pIn++; //Channel a
@ -475,7 +475,7 @@ uint32_t blockSize)
/* decrement the loop counter */
stage--;
} while (stage > 0u);
} while (stage > 0U);
#else
@ -503,11 +503,11 @@ uint32_t blockSize)
d2b = pState[3];
/* Apply loop unrolling and compute 4 output values simultaneously. */
sample = blockSize >> 2u;
sample = blockSize >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (sample > 0u) {
while (sample > 0U) {
/* y[n] = b0 * x[n] + d1 */
/* d1 = b1 * x[n] + a1 * y[n] + d2 */
@ -613,8 +613,8 @@ uint32_t blockSize)
sample--;
}
sample = blockSize & 0x3u;
while (sample > 0u) {
sample = blockSize & 0x3U;
while (sample > 0U) {
Xn1a = *pIn++;
Xn1b = *pIn++;
@ -658,7 +658,7 @@ uint32_t blockSize)
/* decrement the loop counter */
stage--;
} while (stage > 0u);
} while (stage > 0U);
#endif

@ -78,7 +78,7 @@ void arm_biquad_cascade_stereo_df2T_init_f32(
S->pCoeffs = pCoeffs;
/* Clear state buffer and size is always 4 * numStages */
memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(float32_t));
memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(float32_t));
/* Assign state pointer */
S->pState = pState;

@ -168,8 +168,8 @@ void arm_conv_f32(
/* The algorithm is implemented in three stages.
The loop counters of each stage is initiated here. */
blockSize1 = srcBLen - 1u;
blockSize2 = srcALen - (srcBLen - 1u);
blockSize1 = srcBLen - 1U;
blockSize2 = srcALen - (srcBLen - 1U);
blockSize3 = blockSize1;
/* --------------------------
@ -184,7 +184,7 @@ void arm_conv_f32(
/* In this stage the MAC operations are increased by 1 for every iteration.
The count variable holds the number of MAC operations performed */
count = 1u;
count = 1U;
/* Working pointer of inputA */
px = pIn1;
@ -198,17 +198,17 @@ void arm_conv_f32(
* ----------------------*/
/* The first stage starts here */
while (blockSize1 > 0u)
while (blockSize1 > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0.0f;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = count >> 2u;
k = count >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* x[0] * y[srcBLen - 1] */
sum += *px++ * *py--;
@ -228,9 +228,9 @@ void arm_conv_f32(
/* If the count is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = count % 0x4u;
k = count % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulate */
sum += *px++ * *py--;
@ -267,11 +267,11 @@ void arm_conv_f32(
px = pIn1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1u);
pSrc2 = pIn2 + (srcBLen - 1U);
py = pSrc2;
/* count is index by which the pointer pIn1 to be incremented */
count = 0u;
count = 0U;
/* -------------------
* Stage2 process
@ -280,12 +280,12 @@ void arm_conv_f32(
/* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
* So, to loop unroll over blockSize2,
* srcBLen should be greater than or equal to 4 */
if (srcBLen >= 4u)
if (srcBLen >= 4U)
{
/* Loop unroll over blockSize2, by 4 */
blkCnt = blockSize2 >> 2u;
blkCnt = blockSize2 >> 2U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Set all accumulators to zero */
acc0 = 0.0f;
@ -299,7 +299,7 @@ void arm_conv_f32(
x2 = *(px++);
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
@ -328,7 +328,7 @@ void arm_conv_f32(
c0 = *(py--);
/* Read x[4] sample */
x0 = *(px + 1u);
x0 = *(px + 1U);
/* Perform the multiply-accumulate */
/* acc0 += x[1] * y[srcBLen - 2] */
@ -344,7 +344,7 @@ void arm_conv_f32(
c0 = *(py--);
/* Read x[5] sample */
x1 = *(px + 2u);
x1 = *(px + 2U);
/* Perform the multiply-accumulates */
/* acc0 += x[2] * y[srcBLen - 3] */
@ -360,8 +360,8 @@ void arm_conv_f32(
c0 = *(py--);
/* Read x[6] sample */
x2 = *(px + 3u);
px += 4u;
x2 = *(px + 3U);
px += 4U;
/* Perform the multiply-accumulates */
/* acc0 += x[3] * y[srcBLen - 4] */
@ -378,9 +378,9 @@ void arm_conv_f32(
/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Read y[srcBLen - 5] sample */
c0 = *(py--);
@ -414,7 +414,7 @@ void arm_conv_f32(
*pOut++ = acc3;
/* Increment the pointer pIn1 index, count by 4 */
count += 4u;
count += 4U;
/* Update the inputA and inputB pointers for next MAC calculation */
px = pIn1 + count;
@ -428,19 +428,19 @@ void arm_conv_f32(
/* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize2 % 0x4u;
blkCnt = blockSize2 % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0.0f;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += *px++ * *py--;
@ -454,9 +454,9 @@ void arm_conv_f32(
/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulate */
sum += *px++ * *py--;
@ -485,7 +485,7 @@ void arm_conv_f32(
* the blockSize2 loop cannot be unrolled by 4 */
blkCnt = blockSize2;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0.0f;
@ -493,7 +493,7 @@ void arm_conv_f32(
/* srcBLen number of MACS should be performed */
k = srcBLen;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulate */
sum += *px++ * *py--;
@ -533,28 +533,28 @@ void arm_conv_f32(
The blockSize3 variable holds the number of MAC operations performed */
/* Working pointer of inputA */
pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
px = pSrc1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1u);
pSrc2 = pIn2 + (srcBLen - 1U);
py = pSrc2;
/* -------------------
* Stage3 process
* ------------------*/
while (blockSize3 > 0u)
while (blockSize3 > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0.0f;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = blockSize3 >> 2u;
k = blockSize3 >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */
sum += *px++ * *py--;
@ -574,9 +574,9 @@ void arm_conv_f32(
/* If the blockSize3 is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = blockSize3 % 0x4u;
k = blockSize3 % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
/* sum += x[srcALen-1] * y[srcBLen-1] */
@ -607,13 +607,13 @@ void arm_conv_f32(
uint32_t i, j; /* loop counters */
/* Loop to calculate convolution for output length number of times */
for (i = 0u; i < ((srcALen + srcBLen) - 1u); i++)
for (i = 0U; i < ((srcALen + srcBLen) - 1U); i++)
{
/* Initialize sum with zero to carry out MAC operations */
sum = 0.0f;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0u; j <= i; j++)
for (j = 0U; j <= i; j++)
{
/* Check the array limitations */
if ((((i - j) < srcBLen) && (j < srcALen)))

@ -127,13 +127,13 @@ void arm_conv_fast_opt_q15(
px = pIn2;
/* Apply loop unrolling and do 4 Copies simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling copies 4 data points at a time.
** a second loop below copies for the remaining 1 to 3 samples. */
/* Copy smaller length input sequence in reverse order into second scratch buffer */
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner */
*pScr2-- = *px++;
@ -147,9 +147,9 @@ void arm_conv_fast_opt_q15(
/* If the count is not a multiple of 4, copy remaining samples here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner for remaining samples */
*pScr2-- = *px++;
@ -162,11 +162,11 @@ void arm_conv_fast_opt_q15(
pScr1 = pScratch1;
/* Assuming scratch1 buffer is aligned by 32-bit */
/* Fill (srcBLen - 1u) zeros in scratch1 buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1u));
/* Fill (srcBLen - 1U) zeros in scratch1 buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1U));
/* Update temporary scratch pointer */
pScr1 += (srcBLen - 1u);
pScr1 += (srcBLen - 1U);
/* Copy bigger length sequence(srcALen) samples in scratch1 buffer */
@ -181,11 +181,11 @@ void arm_conv_fast_opt_q15(
#else
/* Apply loop unrolling and do 4 Copies simultaneously. */
k = srcALen >> 2u;
k = srcALen >> 2U;
/* First part of the processing with loop unrolling copies 4 data points at a time.
** a second loop below copies for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner */
*pScr1++ = *pIn1++;
@ -199,9 +199,9 @@ void arm_conv_fast_opt_q15(
/* If the count is not a multiple of 4, copy remaining samples here.
** No loop unrolling is used. */
k = srcALen % 0x4u;
k = srcALen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner for remaining samples */
*pScr1++ = *pIn1++;
@ -215,20 +215,20 @@ void arm_conv_fast_opt_q15(
#ifndef UNALIGNED_SUPPORT_DISABLE
/* Fill (srcBLen - 1u) zeros at end of scratch buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1u));
/* Fill (srcBLen - 1U) zeros at end of scratch buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1U));
/* Update pointer */
pScr1 += (srcBLen - 1u);
pScr1 += (srcBLen - 1U);
#else
/* Apply loop unrolling and do 4 Copies simultaneously. */
k = (srcBLen - 1u) >> 2u;
k = (srcBLen - 1U) >> 2U;
/* First part of the processing with loop unrolling copies 4 data points at a time.
** a second loop below copies for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner */
*pScr1++ = 0;
@ -242,9 +242,9 @@ void arm_conv_fast_opt_q15(
/* If the count is not a multiple of 4, copy remaining samples here.
** No loop unrolling is used. */
k = (srcBLen - 1u) % 0x4u;
k = (srcBLen - 1U) % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner for remaining samples */
*pScr1++ = 0;
@ -266,7 +266,7 @@ void arm_conv_fast_opt_q15(
** a second loop below process for the remaining 1 to 3 samples. */
/* Actual convolution process starts here */
blkCnt = (srcALen + srcBLen - 1u) >> 2;
blkCnt = (srcALen + srcBLen - 1U) >> 2;
while (blkCnt > 0)
{
@ -285,16 +285,16 @@ void arm_conv_fast_opt_q15(
/* Read next two samples from scratch1 buffer */
x2 = *__SIMD32(pScr1)++;
tapCnt = (srcBLen) >> 2u;
tapCnt = (srcBLen) >> 2U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
#ifndef UNALIGNED_SUPPORT_DISABLE
/* Read four samples from smaller buffer */
y1 = _SIMD32_OFFSET(pIn2);
y2 = _SIMD32_OFFSET(pIn2 + 2u);
y2 = _SIMD32_OFFSET(pIn2 + 2U);
/* multiply and accumlate */
acc0 = __SMLAD(x1, y1, acc0);
@ -327,7 +327,7 @@ void arm_conv_fast_opt_q15(
acc3 = __SMLADX(x3, y1, acc3);
acc1 = __SMLADX(x3, y2, acc1);
x2 = _SIMD32_OFFSET(pScr1 + 2u);
x2 = _SIMD32_OFFSET(pScr1 + 2U);
#ifndef ARM_MATH_BIG_ENDIAN
x3 = __PKHBT(x2, x1, 0);
@ -412,8 +412,8 @@ void arm_conv_fast_opt_q15(
#endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
/* update scratch pointers */
pIn2 += 4u;
pScr1 += 4u;
pIn2 += 4U;
pScr1 += 4U;
/* Decrement the loop counter */
@ -421,12 +421,12 @@ void arm_conv_fast_opt_q15(
}
/* Update scratch pointer for remaining samples of smaller length sequence */
pScr1 -= 4u;
pScr1 -= 4U;
/* apply same above for remaining samples of smaller length sequence */
tapCnt = (srcBLen) & 3u;
tapCnt = (srcBLen) & 3U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* accumlate the results */
@ -435,7 +435,7 @@ void arm_conv_fast_opt_q15(
acc2 += (*pScr1++ * *pIn2);
acc3 += (*pScr1++ * *pIn2++);
pScr1 -= 3u;
pScr1 -= 3U;
/* Decrement the loop counter */
tapCnt--;
@ -470,12 +470,12 @@ void arm_conv_fast_opt_q15(
/* Initialization of inputB pointer */
pIn2 = py;
pScratch1 += 4u;
pScratch1 += 4U;
}
blkCnt = (srcALen + srcBLen - 1u) & 0x3;
blkCnt = (srcALen + srcBLen - 1U) & 0x3;
/* Calculate convolution for remaining samples of Bigger length sequence */
while (blkCnt > 0)
@ -486,9 +486,9 @@ void arm_conv_fast_opt_q15(
/* Clear Accumlators */
acc0 = 0;
tapCnt = (srcBLen) >> 1u;
tapCnt = (srcBLen) >> 1U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
acc0 += (*pScr1++ * *pIn2++);
@ -498,10 +498,10 @@ void arm_conv_fast_opt_q15(
tapCnt--;
}
tapCnt = (srcBLen) & 1u;
tapCnt = (srcBLen) & 1U;
/* apply same above for remaining samples of smaller length sequence */
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* accumlate the results */
@ -520,7 +520,7 @@ void arm_conv_fast_opt_q15(
/* Initialization of inputB pointer */
pIn2 = py;
pScratch1 += 1u;
pScratch1 += 1U;
}

@ -116,8 +116,8 @@ void arm_conv_fast_q15(
/* The algorithm is implemented in three stages.
The loop counters of each stage is initiated here. */
blockSize1 = srcBLen - 1u;
blockSize2 = srcALen - (srcBLen - 1u);
blockSize1 = srcBLen - 1U;
blockSize2 = srcALen - (srcBLen - 1U);
blockSize3 = blockSize1;
/* --------------------------
@ -132,7 +132,7 @@ void arm_conv_fast_q15(
/* In this stage the MAC operations are increased by 1 for every iteration.
The count variable holds the number of MAC operations performed */
count = 1u;
count = 1U;
/* Working pointer of inputA */
px = pIn1;
@ -150,7 +150,7 @@ void arm_conv_fast_q15(
/* Second part of this stage computes the MAC operations greater than or equal to 4 */
/* The first part of the stage starts here */
while ((count < 4u) && (blockSize1 > 0u))
while ((count < 4U) && (blockSize1 > 0U))
{
/* Accumulator is made zero for every iteration */
sum = 0;
@ -159,7 +159,7 @@ void arm_conv_fast_q15(
* inputA samples and inputB samples */
k = count;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum = __SMLAD(*px++, *py--, sum);
@ -188,17 +188,17 @@ void arm_conv_fast_q15(
* y[srcBLen] and y[srcBLen-1] coefficients, py is decremented by 1 */
py = py - 1;
while (blockSize1 > 0u)
while (blockSize1 > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = count >> 2u;
k = count >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
/* x[0], x[1] are multiplied with y[srcBLen - 1], y[srcBLen - 2] respectively */
@ -212,13 +212,13 @@ void arm_conv_fast_q15(
/* For the next MAC operations, the pointer py is used without SIMD
* So, py is incremented by 1 */
py = py + 1u;
py = py + 1U;
/* If the count is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = count % 0x4u;
k = count % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum = __SMLAD(*px++, *py--, sum);
@ -231,7 +231,7 @@ void arm_conv_fast_q15(
*pOut++ = (q15_t) (sum >> 15);
/* Update the inputA and inputB pointers for next MAC calculation */
py = pIn2 + (count - 1u);
py = pIn2 + (count - 1U);
px = pIn1;
/* Increment the MAC count */
@ -255,11 +255,11 @@ void arm_conv_fast_q15(
px = pIn1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1u);
pSrc2 = pIn2 + (srcBLen - 1U);
py = pSrc2;
/* count is the index by which the pointer pIn1 to be incremented */
count = 0u;
count = 0U;
/* --------------------
@ -269,14 +269,14 @@ void arm_conv_fast_q15(
/* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
* So, to loop unroll over blockSize2,
* srcBLen should be greater than or equal to 4 */
if (srcBLen >= 4u)
if (srcBLen >= 4U)
{
/* Loop unroll over blockSize2, by 4 */
blkCnt = blockSize2 >> 2u;
blkCnt = blockSize2 >> 2U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
py = py - 1u;
py = py - 1U;
/* Set all accumulators to zero */
acc0 = 0;
@ -289,11 +289,11 @@ void arm_conv_fast_q15(
x0 = *__SIMD32(px);
/* read x[1], x[2] samples */
x1 = _SIMD32_OFFSET(px+1);
px+= 2u;
px+= 2U;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
@ -335,7 +335,7 @@ void arm_conv_fast_q15(
/* Read x[5], x[6] */
x1 = _SIMD32_OFFSET(px+3);
px += 4u;
px += 4U;
/* acc2 += x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */
acc2 = __SMLADX(x0, c0, acc2);
@ -350,16 +350,16 @@ void arm_conv_fast_q15(
/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
if (k == 1u)
if (k == 1U)
{
/* Read y[srcBLen - 5] */
c0 = *(py+1);
#ifdef ARM_MATH_BIG_ENDIAN
c0 = c0 << 16u;
c0 = c0 << 16U;
#else
@ -378,7 +378,7 @@ void arm_conv_fast_q15(
acc3 = __SMLADX(x3, c0, acc3);
}
if (k == 2u)
if (k == 2U)
{
/* Read y[srcBLen - 5], y[srcBLen - 6] */
c0 = _SIMD32_OFFSET(py);
@ -388,7 +388,7 @@ void arm_conv_fast_q15(
/* Read x[9] */
x2 = _SIMD32_OFFSET(px+1);
px += 2u;
px += 2U;
/* Perform the multiply-accumulates */
acc0 = __SMLADX(x0, c0, acc0);
@ -397,7 +397,7 @@ void arm_conv_fast_q15(
acc3 = __SMLADX(x2, c0, acc3);
}
if (k == 3u)
if (k == 3U)
{
/* Read y[srcBLen - 5], y[srcBLen - 6] */
c0 = _SIMD32_OFFSET(py);
@ -418,7 +418,7 @@ void arm_conv_fast_q15(
c0 = *(py-1);
#ifdef ARM_MATH_BIG_ENDIAN
c0 = c0 << 16u;
c0 = c0 << 16U;
#else
c0 = c0 & 0x0000FFFF;
@ -426,7 +426,7 @@ void arm_conv_fast_q15(
/* Read x[10] */
x3 = _SIMD32_OFFSET(px+2);
px += 3u;
px += 3U;
/* Perform the multiply-accumulates */
acc0 = __SMLADX(x1, c0, acc0);
@ -449,7 +449,7 @@ void arm_conv_fast_q15(
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* Increment the pointer pIn1 index, count by 4 */
count += 4u;
count += 4U;
/* Update the inputA and inputB pointers for next MAC calculation */
px = pIn1 + count;
@ -461,19 +461,19 @@ void arm_conv_fast_q15(
/* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize2 % 0x4u;
blkCnt = blockSize2 % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += ((q31_t) * px++ * *py--);
@ -487,9 +487,9 @@ void arm_conv_fast_q15(
/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += ((q31_t) * px++ * *py--);
@ -518,7 +518,7 @@ void arm_conv_fast_q15(
* the blockSize2 loop cannot be unrolled by 4 */
blkCnt = blockSize2;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0;
@ -526,7 +526,7 @@ void arm_conv_fast_q15(
/* srcBLen number of MACS should be performed */
k = srcBLen;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulate */
sum += ((q31_t) * px++ * *py--);
@ -566,12 +566,12 @@ void arm_conv_fast_q15(
The blockSize3 variable holds the number of MAC operations performed */
/* Working pointer of inputA */
pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
px = pSrc1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1u);
pIn2 = pSrc2 - 1u;
pSrc2 = pIn2 + (srcBLen - 1U);
pIn2 = pSrc2 - 1U;
py = pIn2;
/* -------------------
@ -583,19 +583,19 @@ void arm_conv_fast_q15(
/* Second part of this stage computes the MAC operations less than or equal to 4 */
/* The first part of the stage starts here */
j = blockSize3 >> 2u;
j = blockSize3 >> 2U;
while ((j > 0u) && (blockSize3 > 0u))
while ((j > 0U) && (blockSize3 > 0U))
{
/* Accumulator is made zero for every iteration */
sum = 0;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = blockSize3 >> 2u;
k = blockSize3 >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* x[srcALen - srcBLen + 1], x[srcALen - srcBLen + 2] are multiplied
* with y[srcBLen - 1], y[srcBLen - 2] respectively */
@ -610,13 +610,13 @@ void arm_conv_fast_q15(
/* For the next MAC operations, the pointer py is used without SIMD
* So, py is incremented by 1 */
py = py + 1u;
py = py + 1U;
/* If the blockSize3 is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = blockSize3 % 0x4u;
k = blockSize3 % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* sum += x[srcALen - srcBLen + 5] * y[srcBLen - 5] */
sum = __SMLAD(*px++, *py--, sum);
@ -641,9 +641,9 @@ void arm_conv_fast_q15(
/* The second part of the stage starts here */
/* SIMD is not used for the next MAC operations,
* so pointer py is updated to read only one sample at a time */
py = py + 1u;
py = py + 1U;
while (blockSize3 > 0u)
while (blockSize3 > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0;
@ -651,7 +651,7 @@ void arm_conv_fast_q15(
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = blockSize3;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
/* sum += x[srcALen-1] * y[srcBLen-1] */
@ -720,8 +720,8 @@ void arm_conv_fast_q15(
/* The algorithm is implemented in three stages.
The loop counters of each stage is initiated here. */
blockSize1 = srcBLen - 1u;
blockSize2 = srcALen - (srcBLen - 1u);
blockSize1 = srcBLen - 1U;
blockSize2 = srcALen - (srcBLen - 1U);
blockSize3 = blockSize1;
/* --------------------------
@ -736,7 +736,7 @@ void arm_conv_fast_q15(
/* In this stage the MAC operations are increased by 1 for every iteration.
The count variable holds the number of MAC operations performed */
count = 1u;
count = 1U;
/* Working pointer of inputA */
px = pIn1;
@ -754,7 +754,7 @@ void arm_conv_fast_q15(
/* Second part of this stage computes the MAC operations greater than or equal to 4 */
/* The first part of the stage starts here */
while ((count < 4u) && (blockSize1 > 0u))
while ((count < 4U) && (blockSize1 > 0U))
{
/* Accumulator is made zero for every iteration */
sum = 0;
@ -763,7 +763,7 @@ void arm_conv_fast_q15(
* inputA samples and inputB samples */
k = count;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += ((q31_t) * px++ * *py--);
@ -792,19 +792,19 @@ void arm_conv_fast_q15(
* y[srcBLen] and y[srcBLen-1] coefficients, py is decremented by 1 */
py = py - 1;
while (blockSize1 > 0u)
while (blockSize1 > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = count >> 2u;
k = count >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
py++;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += ((q31_t) * px++ * *py--);
@ -818,9 +818,9 @@ void arm_conv_fast_q15(
/* If the count is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = count % 0x4u;
k = count % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += ((q31_t) * px++ * *py--);
@ -833,7 +833,7 @@ void arm_conv_fast_q15(
*pOut++ = (q15_t) (sum >> 15);
/* Update the inputA and inputB pointers for next MAC calculation */
py = pIn2 + (count - 1u);
py = pIn2 + (count - 1U);
px = pIn1;
/* Increment the MAC count */
@ -857,11 +857,11 @@ void arm_conv_fast_q15(
px = pIn1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1u);
pSrc2 = pIn2 + (srcBLen - 1U);
py = pSrc2;
/* count is the index by which the pointer pIn1 to be incremented */
count = 0u;
count = 0U;
/* --------------------
@ -871,14 +871,14 @@ void arm_conv_fast_q15(
/* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
* So, to loop unroll over blockSize2,
* srcBLen should be greater than or equal to 4 */
if (srcBLen >= 4u)
if (srcBLen >= 4U)
{
/* Loop unroll over blockSize2, by 4 */
blkCnt = blockSize2 >> 2u;
blkCnt = blockSize2 >> 2U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
py = py - 1u;
py = py - 1U;
/* Set all accumulators to zero */
acc0 = 0;
@ -905,7 +905,7 @@ void arm_conv_fast_q15(
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
@ -995,7 +995,7 @@ void arm_conv_fast_q15(
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
px += 4u;
px += 4U;
/* acc2 += x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */
acc2 = __SMLADX(x0, c0, acc2);
@ -1010,16 +1010,16 @@ void arm_conv_fast_q15(
/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
if (k == 1u)
if (k == 1U)
{
/* Read y[srcBLen - 5] */
c0 = *(py+1);
#ifdef ARM_MATH_BIG_ENDIAN
c0 = c0 << 16u;
c0 = c0 << 16U;
#else
@ -1050,7 +1050,7 @@ void arm_conv_fast_q15(
acc3 = __SMLADX(x3, c0, acc3);
}
if (k == 2u)
if (k == 2U)
{
/* Read y[srcBLen - 5], y[srcBLen - 6] */
a = *py;
@ -1083,7 +1083,7 @@ void arm_conv_fast_q15(
x2 = __PKHBT(a, b, 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
px += 2u;
px += 2U;
/* Perform the multiply-accumulates */
acc0 = __SMLADX(x0, c0, acc0);
@ -1092,7 +1092,7 @@ void arm_conv_fast_q15(
acc3 = __SMLADX(x2, c0, acc3);
}
if (k == 3u)
if (k == 3U)
{
/* Read y[srcBLen - 5], y[srcBLen - 6] */
a = *py;
@ -1136,7 +1136,7 @@ void arm_conv_fast_q15(
c0 = *(py-1);
#ifdef ARM_MATH_BIG_ENDIAN
c0 = c0 << 16u;
c0 = c0 << 16U;
#else
c0 = c0 & 0x0000FFFF;
@ -1156,7 +1156,7 @@ void arm_conv_fast_q15(
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
px += 3u;
px += 3U;
/* Perform the multiply-accumulates */
acc0 = __SMLADX(x1, c0, acc0);
@ -1172,7 +1172,7 @@ void arm_conv_fast_q15(
*pOut++ = (q15_t)(acc3 >> 15);
/* Increment the pointer pIn1 index, count by 4 */
count += 4u;
count += 4U;
/* Update the inputA and inputB pointers for next MAC calculation */
px = pIn1 + count;
@ -1184,19 +1184,19 @@ void arm_conv_fast_q15(
/* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize2 % 0x4u;
blkCnt = blockSize2 % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += ((q31_t) * px++ * *py--);
@ -1210,9 +1210,9 @@ void arm_conv_fast_q15(
/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += ((q31_t) * px++ * *py--);
@ -1241,7 +1241,7 @@ void arm_conv_fast_q15(
* the blockSize2 loop cannot be unrolled by 4 */
blkCnt = blockSize2;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0;
@ -1249,7 +1249,7 @@ void arm_conv_fast_q15(
/* srcBLen number of MACS should be performed */
k = srcBLen;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulate */
sum += ((q31_t) * px++ * *py--);
@ -1289,12 +1289,12 @@ void arm_conv_fast_q15(
The blockSize3 variable holds the number of MAC operations performed */
/* Working pointer of inputA */
pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
px = pSrc1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1u);
pIn2 = pSrc2 - 1u;
pSrc2 = pIn2 + (srcBLen - 1U);
pIn2 = pSrc2 - 1U;
py = pIn2;
/* -------------------
@ -1306,21 +1306,21 @@ void arm_conv_fast_q15(
/* Second part of this stage computes the MAC operations less than or equal to 4 */
/* The first part of the stage starts here */
j = blockSize3 >> 2u;
j = blockSize3 >> 2U;
while ((j > 0u) && (blockSize3 > 0u))
while ((j > 0U) && (blockSize3 > 0U))
{
/* Accumulator is made zero for every iteration */
sum = 0;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = blockSize3 >> 2u;
k = blockSize3 >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
py++;
while (k > 0u)
while (k > 0U)
{
sum += ((q31_t) * px++ * *py--);
sum += ((q31_t) * px++ * *py--);
@ -1332,9 +1332,9 @@ void arm_conv_fast_q15(
/* If the blockSize3 is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = blockSize3 % 0x4u;
k = blockSize3 % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* sum += x[srcALen - srcBLen + 5] * y[srcBLen - 5] */
sum += ((q31_t) * px++ * *py--);
@ -1359,9 +1359,9 @@ void arm_conv_fast_q15(
/* The second part of the stage starts here */
/* SIMD is not used for the next MAC operations,
* so pointer py is updated to read only one sample at a time */
py = py + 1u;
py = py + 1U;
while (blockSize3 > 0u)
while (blockSize3 > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0;
@ -1369,7 +1369,7 @@ void arm_conv_fast_q15(
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = blockSize3;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
/* sum += x[srcALen-1] * y[srcBLen-1] */

@ -117,8 +117,8 @@ void arm_conv_fast_q31(
/* The algorithm is implemented in three stages.
The loop counters of each stage is initiated here. */
blockSize1 = srcBLen - 1u;
blockSize2 = srcALen - (srcBLen - 1u);
blockSize1 = srcBLen - 1U;
blockSize2 = srcALen - (srcBLen - 1U);
blockSize3 = blockSize1;
/* --------------------------
@ -133,7 +133,7 @@ void arm_conv_fast_q31(
/* In this stage the MAC operations are increased by 1 for every iteration.
The count variable holds the number of MAC operations performed */
count = 1u;
count = 1U;
/* Working pointer of inputA */
px = pIn1;
@ -147,17 +147,17 @@ void arm_conv_fast_q31(
* ----------------------*/
/* The first stage starts here */
while (blockSize1 > 0u)
while (blockSize1 > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = count >> 2u;
k = count >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* x[0] * y[srcBLen - 1] */
sum = (q31_t) ((((q63_t) sum << 32) +
@ -181,9 +181,9 @@ void arm_conv_fast_q31(
/* If the count is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = count % 0x4u;
k = count % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulate */
sum = (q31_t) ((((q63_t) sum << 32) +
@ -221,11 +221,11 @@ void arm_conv_fast_q31(
px = pIn1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1u);
pSrc2 = pIn2 + (srcBLen - 1U);
py = pSrc2;
/* count is index by which the pointer pIn1 to be incremented */
count = 0u;
count = 0U;
/* -------------------
* Stage2 process
@ -234,12 +234,12 @@ void arm_conv_fast_q31(
/* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
* So, to loop unroll over blockSize2,
* srcBLen should be greater than or equal to 4 */
if (srcBLen >= 4u)
if (srcBLen >= 4U)
{
/* Loop unroll over blockSize2, by 4 */
blkCnt = blockSize2 >> 2u;
blkCnt = blockSize2 >> 2U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Set all accumulators to zero */
acc0 = 0;
@ -253,7 +253,7 @@ void arm_conv_fast_q31(
x2 = *(px++);
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
@ -331,9 +331,9 @@ void arm_conv_fast_q31(
/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Read y[srcBLen - 5] sample */
c0 = *(py--);
@ -367,7 +367,7 @@ void arm_conv_fast_q31(
*pOut++ = (q31_t) (acc3 << 1);
/* Increment the pointer pIn1 index, count by 4 */
count += 4u;
count += 4U;
/* Update the inputA and inputB pointers for next MAC calculation */
px = pIn1 + count;
@ -379,19 +379,19 @@ void arm_conv_fast_q31(
/* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize2 % 0x4u;
blkCnt = blockSize2 % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum = (q31_t) ((((q63_t) sum << 32) +
@ -409,9 +409,9 @@ void arm_conv_fast_q31(
/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulate */
sum = (q31_t) ((((q63_t) sum << 32) +
@ -441,7 +441,7 @@ void arm_conv_fast_q31(
* the blockSize2 loop cannot be unrolled by 4 */
blkCnt = blockSize2;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0;
@ -449,7 +449,7 @@ void arm_conv_fast_q31(
/* srcBLen number of MACS should be performed */
k = srcBLen;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulate */
sum = (q31_t) ((((q63_t) sum << 32) +
@ -490,28 +490,28 @@ void arm_conv_fast_q31(
The blockSize3 variable holds the number of MAC operations performed */
/* Working pointer of inputA */
pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
px = pSrc1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1u);
pSrc2 = pIn2 + (srcBLen - 1U);
py = pSrc2;
/* -------------------
* Stage3 process
* ------------------*/
while (blockSize3 > 0u)
while (blockSize3 > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = blockSize3 >> 2u;
k = blockSize3 >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */
sum = (q31_t) ((((q63_t) sum << 32) +
@ -535,9 +535,9 @@ void arm_conv_fast_q31(
/* If the blockSize3 is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = blockSize3 % 0x4u;
k = blockSize3 % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulate */
sum = (q31_t) ((((q63_t) sum << 32) +

@ -130,12 +130,12 @@ void arm_conv_opt_q15(
px = pIn2;
/* Apply loop unrolling and do 4 Copies simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling copies 4 data points at a time.
** a second loop below copies for the remaining 1 to 3 samples. */
/* Copy smaller length input sequence in reverse order into second scratch buffer */
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner */
*pScr2-- = *px++;
@ -149,9 +149,9 @@ void arm_conv_opt_q15(
/* If the count is not a multiple of 4, copy remaining samples here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner for remaining samples */
*pScr2-- = *px++;
@ -164,11 +164,11 @@ void arm_conv_opt_q15(
pScr1 = pScratch1;
/* Assuming scratch1 buffer is aligned by 32-bit */
/* Fill (srcBLen - 1u) zeros in scratch buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1u));
/* Fill (srcBLen - 1U) zeros in scratch buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1U));
/* Update temporary scratch pointer */
pScr1 += (srcBLen - 1u);
pScr1 += (srcBLen - 1U);
/* Copy bigger length sequence(srcALen) samples in scratch1 buffer */
@ -183,11 +183,11 @@ void arm_conv_opt_q15(
#else
/* Apply loop unrolling and do 4 Copies simultaneously. */
k = srcALen >> 2u;
k = srcALen >> 2U;
/* First part of the processing with loop unrolling copies 4 data points at a time.
** a second loop below copies for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner */
*pScr1++ = *pIn1++;
@ -201,9 +201,9 @@ void arm_conv_opt_q15(
/* If the count is not a multiple of 4, copy remaining samples here.
** No loop unrolling is used. */
k = srcALen % 0x4u;
k = srcALen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner for remaining samples */
*pScr1++ = *pIn1++;
@ -217,20 +217,20 @@ void arm_conv_opt_q15(
#ifndef UNALIGNED_SUPPORT_DISABLE
/* Fill (srcBLen - 1u) zeros at end of scratch buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1u));
/* Fill (srcBLen - 1U) zeros at end of scratch buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1U));
/* Update pointer */
pScr1 += (srcBLen - 1u);
pScr1 += (srcBLen - 1U);
#else
/* Apply loop unrolling and do 4 Copies simultaneously. */
k = (srcBLen - 1u) >> 2u;
k = (srcBLen - 1U) >> 2U;
/* First part of the processing with loop unrolling copies 4 data points at a time.
** a second loop below copies for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner */
*pScr1++ = 0;
@ -244,9 +244,9 @@ void arm_conv_opt_q15(
/* If the count is not a multiple of 4, copy remaining samples here.
** No loop unrolling is used. */
k = (srcBLen - 1u) % 0x4u;
k = (srcBLen - 1U) % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner for remaining samples */
*pScr1++ = 0;
@ -268,7 +268,7 @@ void arm_conv_opt_q15(
** a second loop below process for the remaining 1 to 3 samples. */
/* Actual convolution process starts here */
blkCnt = (srcALen + srcBLen - 1u) >> 2;
blkCnt = (srcALen + srcBLen - 1U) >> 2;
while (blkCnt > 0)
{
@ -287,16 +287,16 @@ void arm_conv_opt_q15(
/* Read next two samples from scratch1 buffer */
x2 = *__SIMD32(pScr1)++;
tapCnt = (srcBLen) >> 2u;
tapCnt = (srcBLen) >> 2U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
#ifndef UNALIGNED_SUPPORT_DISABLE
/* Read four samples from smaller buffer */
y1 = _SIMD32_OFFSET(pIn2);
y2 = _SIMD32_OFFSET(pIn2 + 2u);
y2 = _SIMD32_OFFSET(pIn2 + 2U);
/* multiply and accumlate */
acc0 = __SMLALD(x1, y1, acc0);
@ -329,7 +329,7 @@ void arm_conv_opt_q15(
acc3 = __SMLALDX(x3, y1, acc3);
acc1 = __SMLALDX(x3, y2, acc1);
x2 = _SIMD32_OFFSET(pScr1 + 2u);
x2 = _SIMD32_OFFSET(pScr1 + 2U);
#ifndef ARM_MATH_BIG_ENDIAN
x3 = __PKHBT(x2, x1, 0);
@ -413,8 +413,8 @@ void arm_conv_opt_q15(
#endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
pIn2 += 4u;
pScr1 += 4u;
pIn2 += 4U;
pScr1 += 4U;
/* Decrement the loop counter */
@ -422,12 +422,12 @@ void arm_conv_opt_q15(
}
/* Update scratch pointer for remaining samples of smaller length sequence */
pScr1 -= 4u;
pScr1 -= 4U;
/* apply same above for remaining samples of smaller length sequence */
tapCnt = (srcBLen) & 3u;
tapCnt = (srcBLen) & 3U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* accumlate the results */
@ -436,7 +436,7 @@ void arm_conv_opt_q15(
acc2 += (*pScr1++ * *pIn2);
acc3 += (*pScr1++ * *pIn2++);
pScr1 -= 3u;
pScr1 -= 3U;
/* Decrement the loop counter */
tapCnt--;
@ -469,12 +469,12 @@ void arm_conv_opt_q15(
/* Initialization of inputB pointer */
pIn2 = py;
pScratch1 += 4u;
pScratch1 += 4U;
}
blkCnt = (srcALen + srcBLen - 1u) & 0x3;
blkCnt = (srcALen + srcBLen - 1U) & 0x3;
/* Calculate convolution for remaining samples of Bigger length sequence */
while (blkCnt > 0)
@ -485,9 +485,9 @@ void arm_conv_opt_q15(
/* Clear Accumlators */
acc0 = 0;
tapCnt = (srcBLen) >> 1u;
tapCnt = (srcBLen) >> 1U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Read next two samples from scratch1 buffer */
@ -498,10 +498,10 @@ void arm_conv_opt_q15(
tapCnt--;
}
tapCnt = (srcBLen) & 1u;
tapCnt = (srcBLen) & 1U;
/* apply same above for remaining samples of smaller length sequence */
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* accumlate the results */
@ -521,7 +521,7 @@ void arm_conv_opt_q15(
/* Initialization of inputB pointer */
pIn2 = py;
pScratch1 += 1u;
pScratch1 += 1U;
}

@ -117,11 +117,11 @@ void arm_conv_opt_q7(
px = pIn2 + srcBLen - 1;
/* Apply loop unrolling and do 4 Copies simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling copies 4 data points at a time.
** a second loop below copies for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner */
x4 = (q15_t) * px--;
@ -139,9 +139,9 @@ void arm_conv_opt_q7(
/* If the count is not a multiple of 4, copy remaining samples here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner for remaining samples */
x4 = (q15_t) * px--;
@ -154,19 +154,19 @@ void arm_conv_opt_q7(
/* Initialze temporary scratch pointer */
pScr1 = pScratch1;
/* Fill (srcBLen - 1u) zeros in scratch buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1u));
/* Fill (srcBLen - 1U) zeros in scratch buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1U));
/* Update temporary scratch pointer */
pScr1 += (srcBLen - 1u);
pScr1 += (srcBLen - 1U);
/* Copy (srcALen) samples in scratch buffer */
/* Apply loop unrolling and do 4 Copies simultaneously. */
k = srcALen >> 2u;
k = srcALen >> 2U;
/* First part of the processing with loop unrolling copies 4 data points at a time.
** a second loop below copies for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner */
x4 = (q15_t) * pIn1++;
@ -184,9 +184,9 @@ void arm_conv_opt_q7(
/* If the count is not a multiple of 4, copy remaining samples here.
** No loop unrolling is used. */
k = srcALen % 0x4u;
k = srcALen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner for remaining samples */
x4 = (q15_t) * pIn1++;
@ -198,20 +198,20 @@ void arm_conv_opt_q7(
#ifndef UNALIGNED_SUPPORT_DISABLE
/* Fill (srcBLen - 1u) zeros at end of scratch buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1u));
/* Fill (srcBLen - 1U) zeros at end of scratch buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1U));
/* Update pointer */
pScr1 += (srcBLen - 1u);
pScr1 += (srcBLen - 1U);
#else
/* Apply loop unrolling and do 4 Copies simultaneously. */
k = (srcBLen - 1u) >> 2u;
k = (srcBLen - 1U) >> 2U;
/* First part of the processing with loop unrolling copies 4 data points at a time.
** a second loop below copies for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner */
*pScr1++ = 0;
@ -225,9 +225,9 @@ void arm_conv_opt_q7(
/* If the count is not a multiple of 4, copy remaining samples here.
** No loop unrolling is used. */
k = (srcBLen - 1u) % 0x4u;
k = (srcBLen - 1U) % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner for remaining samples */
*pScr1++ = 0;
@ -247,7 +247,7 @@ void arm_conv_opt_q7(
pScr2 = py;
/* Actual convolution process starts here */
blkCnt = (srcALen + srcBLen - 1u) >> 2;
blkCnt = (srcALen + srcBLen - 1U) >> 2;
while (blkCnt > 0)
{
@ -266,9 +266,9 @@ void arm_conv_opt_q7(
/* Read next two samples from scratch1 buffer */
x2 = *__SIMD32(pScr1)++;
tapCnt = (srcBLen) >> 2u;
tapCnt = (srcBLen) >> 2U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Read four samples from smaller buffer */
@ -301,7 +301,7 @@ void arm_conv_opt_q7(
acc3 = __SMLADX(x3, y1, acc3);
/* Read four samples from smaller buffer */
y1 = _SIMD32_OFFSET(pScr2 + 2u);
y1 = _SIMD32_OFFSET(pScr2 + 2U);
acc0 = __SMLAD(x2, y1, acc0);
@ -319,7 +319,7 @@ void arm_conv_opt_q7(
acc3 = __SMLADX(x3, y1, acc3);
pScr2 += 4u;
pScr2 += 4U;
/* Decrement the loop counter */
@ -329,13 +329,13 @@ void arm_conv_opt_q7(
/* Update scratch pointer for remaining samples of smaller length sequence */
pScr1 -= 4u;
pScr1 -= 4U;
/* apply same above for remaining samples of smaller length sequence */
tapCnt = (srcBLen) & 3u;
tapCnt = (srcBLen) & 3U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* accumlate the results */
@ -344,7 +344,7 @@ void arm_conv_opt_q7(
acc2 += (*pScr1++ * *pScr2);
acc3 += (*pScr1++ * *pScr2++);
pScr1 -= 3u;
pScr1 -= 3U;
/* Decrement the loop counter */
tapCnt--;
@ -353,22 +353,22 @@ void arm_conv_opt_q7(
blkCnt--;
/* Store the result in the accumulator in the destination buffer. */
out0 = (q7_t) (__SSAT(acc0 >> 7u, 8));
out1 = (q7_t) (__SSAT(acc1 >> 7u, 8));
out2 = (q7_t) (__SSAT(acc2 >> 7u, 8));
out3 = (q7_t) (__SSAT(acc3 >> 7u, 8));
out0 = (q7_t) (__SSAT(acc0 >> 7U, 8));
out1 = (q7_t) (__SSAT(acc1 >> 7U, 8));
out2 = (q7_t) (__SSAT(acc2 >> 7U, 8));
out3 = (q7_t) (__SSAT(acc3 >> 7U, 8));
*__SIMD32(pOut)++ = __PACKq7(out0, out1, out2, out3);
/* Initialization of inputB pointer */
pScr2 = py;
pScratch1 += 4u;
pScratch1 += 4U;
}
blkCnt = (srcALen + srcBLen - 1u) & 0x3;
blkCnt = (srcALen + srcBLen - 1U) & 0x3;
/* Calculate convolution for remaining samples of Bigger length sequence */
while (blkCnt > 0)
@ -379,9 +379,9 @@ void arm_conv_opt_q7(
/* Clear Accumlators */
acc0 = 0;
tapCnt = (srcBLen) >> 1u;
tapCnt = (srcBLen) >> 1U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
acc0 += (*pScr1++ * *pScr2++);
acc0 += (*pScr1++ * *pScr2++);
@ -390,10 +390,10 @@ void arm_conv_opt_q7(
tapCnt--;
}
tapCnt = (srcBLen) & 1u;
tapCnt = (srcBLen) & 1U;
/* apply same above for remaining samples of smaller length sequence */
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* accumlate the results */
@ -406,12 +406,12 @@ void arm_conv_opt_q7(
blkCnt--;
/* Store the result in the accumulator in the destination buffer. */
*pOut++ = (q7_t) (__SSAT(acc0 >> 7u, 8));
*pOut++ = (q7_t) (__SSAT(acc0 >> 7U, 8));
/* Initialization of inputB pointer */
pScr2 = py;
pScratch1 += 1u;
pScratch1 += 1U;
}

@ -103,13 +103,13 @@ arm_status arm_conv_partial_f32(
float32_t *pSrc1, *pSrc2; /* Intermediate pointers */
float32_t sum, acc0, acc1, acc2, acc3; /* Accumulator */
float32_t x0, x1, x2, x3, c0; /* Temporary variables to hold state and coefficient values */
uint32_t j, k, count = 0u, blkCnt, check;
uint32_t j, k, count = 0U, blkCnt, check;
int32_t blockSize1, blockSize2, blockSize3; /* loop counters */
arm_status status; /* status of Partial convolution */
/* Check for range of output samples to be calculated */
if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U))))
{
/* Set status as ARM_MATH_ARGUMENT_ERROR */
status = ARM_MATH_ARGUMENT_ERROR;
@ -148,7 +148,7 @@ arm_status arm_conv_partial_f32(
blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1u)) ? blockSize1 :
blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :
(int32_t) numPoints) : 0;
blockSize2 = ((int32_t) check - blockSize3) -
(blockSize1 + (int32_t) firstIndex);
@ -181,7 +181,7 @@ arm_status arm_conv_partial_f32(
The count variable holds the number of MAC operations performed.
Since the partial convolution starts from from firstIndex
Number of Macs to be performed is firstIndex + 1 */
count = 1u + firstIndex;
count = 1U + firstIndex;
/* Working pointer of inputA */
px = pIn1;
@ -201,11 +201,11 @@ arm_status arm_conv_partial_f32(
sum = 0.0f;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = count >> 2u;
k = count >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* x[0] * y[srcBLen - 1] */
sum += *px++ * *py--;
@ -225,9 +225,9 @@ arm_status arm_conv_partial_f32(
/* If the count is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = count % 0x4u;
k = count % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += *px++ * *py--;
@ -271,11 +271,11 @@ arm_status arm_conv_partial_f32(
}
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1u);
pSrc2 = pIn2 + (srcBLen - 1U);
py = pSrc2;
/* count is index by which the pointer pIn1 to be incremented */
count = 0u;
count = 0U;
/* -------------------
* Stage2 process
@ -284,12 +284,12 @@ arm_status arm_conv_partial_f32(
/* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
* So, to loop unroll over blockSize2,
* srcBLen should be greater than or equal to 4 */
if (srcBLen >= 4u)
if (srcBLen >= 4U)
{
/* Loop unroll over blockSize2, by 4 */
blkCnt = ((uint32_t) blockSize2 >> 2u);
blkCnt = ((uint32_t) blockSize2 >> 2U);
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Set all accumulators to zero */
acc0 = 0.0f;
@ -303,7 +303,7 @@ arm_status arm_conv_partial_f32(
x2 = *(px++);
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
@ -381,9 +381,9 @@ arm_status arm_conv_partial_f32(
/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Read y[srcBLen - 5] sample */
c0 = *(py--);
@ -417,7 +417,7 @@ arm_status arm_conv_partial_f32(
*pOut++ = acc3;
/* Increment the pointer pIn1 index, count by 1 */
count += 4u;
count += 4U;
/* Update the inputA and inputB pointers for next MAC calculation */
px = pIn1 + count;
@ -429,19 +429,19 @@ arm_status arm_conv_partial_f32(
/* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = (uint32_t) blockSize2 % 0x4u;
blkCnt = (uint32_t) blockSize2 % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0.0f;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += *px++ * *py--;
@ -455,9 +455,9 @@ arm_status arm_conv_partial_f32(
/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulate */
sum += *px++ * *py--;
@ -486,7 +486,7 @@ arm_status arm_conv_partial_f32(
* the blockSize2 loop cannot be unrolled by 4 */
blkCnt = (uint32_t) blockSize2;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0.0f;
@ -494,7 +494,7 @@ arm_status arm_conv_partial_f32(
/* srcBLen number of MACS should be performed */
k = srcBLen;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulate */
sum += *px++ * *py--;
@ -532,14 +532,14 @@ arm_status arm_conv_partial_f32(
/* In this stage the MAC operations are decreased by 1 for every iteration.
The count variable holds the number of MAC operations performed */
count = srcBLen - 1u;
count = srcBLen - 1U;
/* Working pointer of inputA */
pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
px = pSrc1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1u);
pSrc2 = pIn2 + (srcBLen - 1U);
py = pSrc2;
while (blockSize3 > 0)
@ -548,11 +548,11 @@ arm_status arm_conv_partial_f32(
sum = 0.0f;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = count >> 2u;
k = count >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */
sum += *px++ * *py--;
@ -572,9 +572,9 @@ arm_status arm_conv_partial_f32(
/* If the count is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = count % 0x4u;
k = count % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
/* sum += x[srcALen-1] * y[srcBLen-1] */
@ -617,7 +617,7 @@ arm_status arm_conv_partial_f32(
arm_status status; /* status of Partial convolution */
/* Check for range of output samples to be calculated */
if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U))))
{
/* Set status as ARM_ARGUMENT_ERROR */
status = ARM_MATH_ARGUMENT_ERROR;
@ -631,7 +631,7 @@ arm_status arm_conv_partial_f32(
sum = 0.0f;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0u; j <= i; j++)
for (j = 0U; j <= i; j++)
{
/* Check the array limitations for inputs */
if ((((i - j) < srcBLen) && (j < srcALen)))

@ -88,7 +88,7 @@ arm_status arm_conv_partial_fast_opt_q15(
uint32_t tapCnt; /* loop count */
/* Check for range of output samples to be calculated */
if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U))))
{
/* Set status as ARM_MATH_ARGUMENT_ERROR */
status = ARM_MATH_ARGUMENT_ERROR;
@ -131,13 +131,13 @@ arm_status arm_conv_partial_fast_opt_q15(
px = pIn2;
/* Apply loop unrolling and do 4 Copies simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling copies 4 data points at a time.
** a second loop below copies for the remaining 1 to 3 samples. */
/* Copy smaller length input sequence in reverse order into second scratch buffer */
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner */
*pScr2-- = *px++;
@ -151,9 +151,9 @@ arm_status arm_conv_partial_fast_opt_q15(
/* If the count is not a multiple of 4, copy remaining samples here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner for remaining samples */
*pScr2-- = *px++;
@ -166,11 +166,11 @@ arm_status arm_conv_partial_fast_opt_q15(
pScr1 = pScratch1;
/* Assuming scratch1 buffer is aligned by 32-bit */
/* Fill (srcBLen - 1u) zeros in scratch buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1u));
/* Fill (srcBLen - 1U) zeros in scratch buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1U));
/* Update temporary scratch pointer */
pScr1 += (srcBLen - 1u);
pScr1 += (srcBLen - 1U);
/* Copy bigger length sequence(srcALen) samples in scratch1 buffer */
@ -180,11 +180,11 @@ arm_status arm_conv_partial_fast_opt_q15(
/* Update pointers */
pScr1 += srcALen;
/* Fill (srcBLen - 1u) zeros at end of scratch buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1u));
/* Fill (srcBLen - 1U) zeros at end of scratch buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1U));
/* Update pointer */
pScr1 += (srcBLen - 1u);
pScr1 += (srcBLen - 1U);
/* Initialization of pIn2 pointer */
pIn2 = py;
@ -216,14 +216,14 @@ arm_status arm_conv_partial_fast_opt_q15(
/* Read next two samples from scratch1 buffer */
x2 = *__SIMD32(pScr1)++;
tapCnt = (srcBLen) >> 2u;
tapCnt = (srcBLen) >> 2U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Read four samples from smaller buffer */
y1 = _SIMD32_OFFSET(pIn2);
y2 = _SIMD32_OFFSET(pIn2 + 2u);
y2 = _SIMD32_OFFSET(pIn2 + 2U);
/* multiply and accumlate */
acc0 = __SMLAD(x1, y1, acc0);
@ -257,7 +257,7 @@ arm_status arm_conv_partial_fast_opt_q15(
acc3 = __SMLADX(x3, y1, acc3);
acc1 = __SMLADX(x3, y2, acc1);
x2 = _SIMD32_OFFSET(pScr1 + 2u);
x2 = _SIMD32_OFFSET(pScr1 + 2U);
#ifndef ARM_MATH_BIG_ENDIAN
x3 = __PKHBT(x2, x1, 0);
@ -268,8 +268,8 @@ arm_status arm_conv_partial_fast_opt_q15(
acc3 = __SMLADX(x3, y2, acc3);
/* update scratch pointers */
pIn2 += 4u;
pScr1 += 4u;
pIn2 += 4U;
pScr1 += 4U;
/* Decrement the loop counter */
@ -277,12 +277,12 @@ arm_status arm_conv_partial_fast_opt_q15(
}
/* Update scratch pointer for remaining samples of smaller length sequence */
pScr1 -= 4u;
pScr1 -= 4U;
/* apply same above for remaining samples of smaller length sequence */
tapCnt = (srcBLen) & 3u;
tapCnt = (srcBLen) & 3U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* accumlate the results */
@ -291,7 +291,7 @@ arm_status arm_conv_partial_fast_opt_q15(
acc2 += (*pScr1++ * *pIn2);
acc3 += (*pScr1++ * *pIn2++);
pScr1 -= 3u;
pScr1 -= 3U;
/* Decrement the loop counter */
tapCnt--;
@ -321,7 +321,7 @@ arm_status arm_conv_partial_fast_opt_q15(
/* Initialization of inputB pointer */
pIn2 = py;
pScratch1 += 4u;
pScratch1 += 4U;
}
@ -337,9 +337,9 @@ arm_status arm_conv_partial_fast_opt_q15(
/* Clear Accumlators */
acc0 = 0;
tapCnt = (srcBLen) >> 1u;
tapCnt = (srcBLen) >> 1U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Read next two samples from scratch1 buffer */
@ -354,10 +354,10 @@ arm_status arm_conv_partial_fast_opt_q15(
tapCnt--;
}
tapCnt = (srcBLen) & 1u;
tapCnt = (srcBLen) & 1U;
/* apply same above for remaining samples of smaller length sequence */
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* accumlate the results */
@ -376,7 +376,7 @@ arm_status arm_conv_partial_fast_opt_q15(
/* Initialization of inputB pointer */
pIn2 = py;
pScratch1 += 1u;
pScratch1 += 1U;
}
/* set status as ARM_MATH_SUCCESS */
@ -416,7 +416,7 @@ arm_status arm_conv_partial_fast_opt_q15(
/* Check for range of output samples to be calculated */
if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U))))
{
/* Set status as ARM_MATH_ARGUMENT_ERROR */
status = ARM_MATH_ARGUMENT_ERROR;
@ -459,11 +459,11 @@ arm_status arm_conv_partial_fast_opt_q15(
px = pIn2;
/* Apply loop unrolling and do 4 Copies simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling copies 4 data points at a time.
** a second loop below copies for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner */
*pScr2-- = *px++;
@ -477,9 +477,9 @@ arm_status arm_conv_partial_fast_opt_q15(
/* If the count is not a multiple of 4, copy remaining samples here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner for remaining samples */
*pScr2-- = *px++;
@ -491,21 +491,21 @@ arm_status arm_conv_partial_fast_opt_q15(
/* Initialze temporary scratch pointer */
pScr1 = pScratch1;
/* Fill (srcBLen - 1u) zeros in scratch buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1u));
/* Fill (srcBLen - 1U) zeros in scratch buffer */
arm_fill_q15(0, pScr1, (srcBLen - 1U));
/* Update temporary scratch pointer */
pScr1 += (srcBLen - 1u);
pScr1 += (srcBLen - 1U);
/* Copy bigger length sequence(srcALen) samples in scratch1 buffer */
/* Apply loop unrolling and do 4 Copies simultaneously. */
k = srcALen >> 2u;
k = srcALen >> 2U;
/* First part of the processing with loop unrolling copies 4 data points at a time.
** a second loop below copies for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner */
*pScr1++ = *pIn1++;
@ -519,9 +519,9 @@ arm_status arm_conv_partial_fast_opt_q15(
/* If the count is not a multiple of 4, copy remaining samples here.
** No loop unrolling is used. */
k = srcALen % 0x4u;
k = srcALen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner for remaining samples */
*pScr1++ = *pIn1++;
@ -532,11 +532,11 @@ arm_status arm_conv_partial_fast_opt_q15(
/* Apply loop unrolling and do 4 Copies simultaneously. */
k = (srcBLen - 1u) >> 2u;
k = (srcBLen - 1U) >> 2U;
/* First part of the processing with loop unrolling copies 4 data points at a time.
** a second loop below copies for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner */
*pScr1++ = 0;
@ -550,9 +550,9 @@ arm_status arm_conv_partial_fast_opt_q15(
/* If the count is not a multiple of 4, copy remaining samples here.
** No loop unrolling is used. */
k = (srcBLen - 1u) % 0x4u;
k = (srcBLen - 1U) % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* copy second buffer in reversal manner for remaining samples */
*pScr1++ = 0;
@ -591,14 +591,14 @@ arm_status arm_conv_partial_fast_opt_q15(
x20 = *pScr1++;
x21 = *pScr1++;
tapCnt = (srcBLen) >> 2u;
tapCnt = (srcBLen) >> 2U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Read two samples from smaller buffer */
y10 = *pIn2;
y11 = *(pIn2 + 1u);
y11 = *(pIn2 + 1U);
/* multiply and accumlate */
acc0 += (q31_t) x10 *y10;
@ -612,15 +612,15 @@ arm_status arm_conv_partial_fast_opt_q15(
/* Read next two samples from scratch1 buffer */
x10 = *pScr1;
x11 = *(pScr1 + 1u);
x11 = *(pScr1 + 1U);
/* multiply and accumlate */
acc3 += (q31_t) x21 *y10;
acc3 += (q31_t) x10 *y11;
/* Read next two samples from scratch2 buffer */
y10 = *(pIn2 + 2u);
y11 = *(pIn2 + 3u);
y10 = *(pIn2 + 2U);
y11 = *(pIn2 + 3U);
/* multiply and accumlate */
acc0 += (q31_t) x20 *y10;
@ -639,20 +639,20 @@ arm_status arm_conv_partial_fast_opt_q15(
acc3 += (q31_t) x20 *y11;
/* update scratch pointers */
pIn2 += 4u;
pScr1 += 4u;
pIn2 += 4U;
pScr1 += 4U;
/* Decrement the loop counter */
tapCnt--;
}
/* Update scratch pointer for remaining samples of smaller length sequence */
pScr1 -= 4u;
pScr1 -= 4U;
/* apply same above for remaining samples of smaller length sequence */
tapCnt = (srcBLen) & 3u;
tapCnt = (srcBLen) & 3U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* accumlate the results */
acc0 += (*pScr1++ * *pIn2);
@ -660,7 +660,7 @@ arm_status arm_conv_partial_fast_opt_q15(
acc2 += (*pScr1++ * *pIn2);
acc3 += (*pScr1++ * *pIn2++);
pScr1 -= 3u;
pScr1 -= 3U;
/* Decrement the loop counter */
tapCnt--;
@ -678,7 +678,7 @@ arm_status arm_conv_partial_fast_opt_q15(
/* Initialization of inputB pointer */
pIn2 = py;
pScratch1 += 4u;
pScratch1 += 4U;
}
@ -694,9 +694,9 @@ arm_status arm_conv_partial_fast_opt_q15(
/* Clear Accumlators */
acc0 = 0;
tapCnt = (srcBLen) >> 1u;
tapCnt = (srcBLen) >> 1U;
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* Read next two samples from scratch1 buffer */
@ -715,10 +715,10 @@ arm_status arm_conv_partial_fast_opt_q15(
tapCnt--;
}
tapCnt = (srcBLen) & 1u;
tapCnt = (srcBLen) & 1U;
/* apply same above for remaining samples of smaller length sequence */
while (tapCnt > 0u)
while (tapCnt > 0U)
{
/* accumlate the results */
@ -736,7 +736,7 @@ arm_status arm_conv_partial_fast_opt_q15(
/* Initialization of inputB pointer */
pIn2 = py;
pScratch1 += 1u;
pScratch1 += 1U;
}

@ -76,7 +76,7 @@ arm_status arm_conv_partial_fast_q15(
arm_status status; /* status of Partial convolution */
/* Check for range of output samples to be calculated */
if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U))))
{
/* Set status as ARM_MATH_ARGUMENT_ERROR */
status = ARM_MATH_ARGUMENT_ERROR;
@ -115,7 +115,7 @@ arm_status arm_conv_partial_fast_q15(
blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
blockSize1 = (((int32_t) srcBLen - 1) - (int32_t) firstIndex);
blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1u)) ? blockSize1 :
blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :
(int32_t) numPoints) : 0;
blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) +
(int32_t) firstIndex);
@ -148,7 +148,7 @@ arm_status arm_conv_partial_fast_q15(
The count variable holds the number of MAC operations performed.
Since the partial convolution starts from firstIndex
Number of Macs to be performed is firstIndex + 1 */
count = 1u + firstIndex;
count = 1U + firstIndex;
/* Working pointer of inputA */
px = pIn1;
@ -166,7 +166,7 @@ arm_status arm_conv_partial_fast_q15(
/* Second part of this stage computes the MAC operations greater than or equal to 4 */
/* The first part of the stage starts here */
while ((count < 4u) && (blockSize1 > 0))
while ((count < 4U) && (blockSize1 > 0))
{
/* Accumulator is made zero for every iteration */
sum = 0;
@ -175,7 +175,7 @@ arm_status arm_conv_partial_fast_q15(
* inputA samples and inputB samples */
k = count;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum = __SMLAD(*px++, *py--, sum);
@ -210,11 +210,11 @@ arm_status arm_conv_partial_fast_q15(
sum = 0;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = count >> 2u;
k = count >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
/* x[0], x[1] are multiplied with y[srcBLen - 1], y[srcBLen - 2] respectively */
@ -228,13 +228,13 @@ arm_status arm_conv_partial_fast_q15(
/* For the next MAC operations, the pointer py is used without SIMD
* So, py is incremented by 1 */
py = py + 1u;
py = py + 1U;
/* If the count is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = count % 0x4u;
k = count % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum = __SMLAD(*px++, *py--, sum);
@ -247,7 +247,7 @@ arm_status arm_conv_partial_fast_q15(
*pOut++ = (q15_t) (sum >> 15);
/* Update the inputA and inputB pointers for next MAC calculation */
py = ++pSrc2 - 1u;
py = ++pSrc2 - 1U;
px = pIn1;
/* Increment the MAC count */
@ -278,11 +278,11 @@ arm_status arm_conv_partial_fast_q15(
}
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1u);
pSrc2 = pIn2 + (srcBLen - 1U);
py = pSrc2;
/* count is the index by which the pointer pIn1 to be incremented */
count = 0u;
count = 0U;
/* --------------------
@ -292,14 +292,14 @@ arm_status arm_conv_partial_fast_q15(
/* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
* So, to loop unroll over blockSize2,
* srcBLen should be greater than or equal to 4 */
if (srcBLen >= 4u)
if (srcBLen >= 4U)
{
/* Loop unroll over blockSize2, by 4 */
blkCnt = ((uint32_t) blockSize2 >> 2u);
blkCnt = ((uint32_t) blockSize2 >> 2U);
while (blkCnt > 0u)
while (blkCnt > 0U)
{
py = py - 1u;
py = py - 1U;
/* Set all accumulators to zero */
acc0 = 0;
@ -312,11 +312,11 @@ arm_status arm_conv_partial_fast_q15(
x0 = *__SIMD32(px);
/* read x[1], x[2] samples */
x1 = _SIMD32_OFFSET(px+1);
px+= 2u;
px+= 2U;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
@ -358,7 +358,7 @@ arm_status arm_conv_partial_fast_q15(
/* Read x[5], x[6] */
x1 = _SIMD32_OFFSET(px+3);
px += 4u;
px += 4U;
/* acc2 += x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */
acc2 = __SMLADX(x0, c0, acc2);
@ -373,15 +373,15 @@ arm_status arm_conv_partial_fast_q15(
/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
if (k == 1u)
if (k == 1U)
{
/* Read y[srcBLen - 5] */
c0 = *(py+1);
#ifdef ARM_MATH_BIG_ENDIAN
c0 = c0 << 16u;
c0 = c0 << 16U;
#else
@ -400,7 +400,7 @@ arm_status arm_conv_partial_fast_q15(
acc3 = __SMLADX(x3, c0, acc3);
}
if (k == 2u)
if (k == 2U)
{
/* Read y[srcBLen - 5], y[srcBLen - 6] */
c0 = _SIMD32_OFFSET(py);
@ -410,7 +410,7 @@ arm_status arm_conv_partial_fast_q15(
/* Read x[9] */
x2 = _SIMD32_OFFSET(px+1);
px += 2u;
px += 2U;
/* Perform the multiply-accumulates */
acc0 = __SMLADX(x0, c0, acc0);
@ -419,7 +419,7 @@ arm_status arm_conv_partial_fast_q15(
acc3 = __SMLADX(x2, c0, acc3);
}
if (k == 3u)
if (k == 3U)
{
/* Read y[srcBLen - 5], y[srcBLen - 6] */
c0 = _SIMD32_OFFSET(py);
@ -439,7 +439,7 @@ arm_status arm_conv_partial_fast_q15(
c0 = *(py-1);
#ifdef ARM_MATH_BIG_ENDIAN
c0 = c0 << 16u;
c0 = c0 << 16U;
#else
c0 = c0 & 0x0000FFFF;
@ -447,7 +447,7 @@ arm_status arm_conv_partial_fast_q15(
/* Read x[10] */
x3 = _SIMD32_OFFSET(px+2);
px += 3u;
px += 3U;
/* Perform the multiply-accumulates */
acc0 = __SMLADX(x1, c0, acc0);
@ -470,7 +470,7 @@ arm_status arm_conv_partial_fast_q15(
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* Increment the pointer pIn1 index, count by 4 */
count += 4u;
count += 4U;
/* Update the inputA and inputB pointers for next MAC calculation */
px = pIn1 + count;
@ -482,19 +482,19 @@ arm_status arm_conv_partial_fast_q15(
/* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = (uint32_t) blockSize2 % 0x4u;
blkCnt = (uint32_t) blockSize2 % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += ((q31_t) * px++ * *py--);
@ -508,9 +508,9 @@ arm_status arm_conv_partial_fast_q15(
/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += ((q31_t) * px++ * *py--);
@ -539,7 +539,7 @@ arm_status arm_conv_partial_fast_q15(
* the blockSize2 loop cannot be unrolled by 4 */
blkCnt = (uint32_t) blockSize2;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0;
@ -547,7 +547,7 @@ arm_status arm_conv_partial_fast_q15(
/* srcBLen number of MACS should be performed */
k = srcBLen;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulate */
sum += ((q31_t) * px++ * *py--);
@ -585,15 +585,15 @@ arm_status arm_conv_partial_fast_q15(
/* In this stage the MAC operations are decreased by 1 for every iteration.
The count variable holds the number of MAC operations performed */
count = srcBLen - 1u;
count = srcBLen - 1U;
/* Working pointer of inputA */
pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
px = pSrc1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1u);
pIn2 = pSrc2 - 1u;
pSrc2 = pIn2 + (srcBLen - 1U);
pIn2 = pSrc2 - 1U;
py = pIn2;
/* -------------------
@ -605,19 +605,19 @@ arm_status arm_conv_partial_fast_q15(
/* Second part of this stage computes the MAC operations less than or equal to 4 */
/* The first part of the stage starts here */
j = count >> 2u;
j = count >> 2U;
while ((j > 0u) && (blockSize3 > 0))
while ((j > 0U) && (blockSize3 > 0))
{
/* Accumulator is made zero for every iteration */
sum = 0;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = count >> 2u;
k = count >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* x[srcALen - srcBLen + 1], x[srcALen - srcBLen + 2] are multiplied
* with y[srcBLen - 1], y[srcBLen - 2] respectively */
@ -632,13 +632,13 @@ arm_status arm_conv_partial_fast_q15(
/* For the next MAC operations, the pointer py is used without SIMD
* So, py is incremented by 1 */
py = py + 1u;
py = py + 1U;
/* If the count is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = count % 0x4u;
k = count % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* sum += x[srcALen - srcBLen + 5] * y[srcBLen - 5] */
sum = __SMLAD(*px++, *py--, sum);
@ -666,7 +666,7 @@ arm_status arm_conv_partial_fast_q15(
/* The second part of the stage starts here */
/* SIMD is not used for the next MAC operations,
* so pointer py is updated to read only one sample at a time */
py = py + 1u;
py = py + 1U;
while (blockSize3 > 0)
{
@ -676,7 +676,7 @@ arm_status arm_conv_partial_fast_q15(
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = count;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
/* sum += x[srcALen-1] * y[srcBLen-1] */
@ -723,7 +723,7 @@ arm_status arm_conv_partial_fast_q15(
q15_t a, b;
/* Check for range of output samples to be calculated */
if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U))))
{
/* Set status as ARM_MATH_ARGUMENT_ERROR */
status = ARM_MATH_ARGUMENT_ERROR;
@ -762,7 +762,7 @@ arm_status arm_conv_partial_fast_q15(
blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1u)) ? blockSize1 :
blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :
(int32_t) numPoints) : 0;
blockSize2 = ((int32_t) check - blockSize3) -
(blockSize1 + (int32_t) firstIndex);
@ -795,7 +795,7 @@ arm_status arm_conv_partial_fast_q15(
The count variable holds the number of MAC operations performed.
Since the partial convolution starts from firstIndex
Number of Macs to be performed is firstIndex + 1 */
count = 1u + firstIndex;
count = 1U + firstIndex;
/* Working pointer of inputA */
px = pIn1;
@ -813,7 +813,7 @@ arm_status arm_conv_partial_fast_q15(
/* Second part of this stage computes the MAC operations greater than or equal to 4 */
/* The first part of the stage starts here */
while ((count < 4u) && (blockSize1 > 0))
while ((count < 4U) && (blockSize1 > 0))
{
/* Accumulator is made zero for every iteration */
sum = 0;
@ -822,7 +822,7 @@ arm_status arm_conv_partial_fast_q15(
* inputA samples and inputB samples */
k = count;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += ((q31_t) * px++ * *py--);
@ -857,13 +857,13 @@ arm_status arm_conv_partial_fast_q15(
sum = 0;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = count >> 2u;
k = count >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
py++;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += ((q31_t) * px++ * *py--);
@ -877,9 +877,9 @@ arm_status arm_conv_partial_fast_q15(
/* If the count is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = count % 0x4u;
k = count % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += ((q31_t) * px++ * *py--);
@ -892,7 +892,7 @@ arm_status arm_conv_partial_fast_q15(
*pOut++ = (q15_t) (sum >> 15);
/* Update the inputA and inputB pointers for next MAC calculation */
py = ++pSrc2 - 1u;
py = ++pSrc2 - 1U;
px = pIn1;
/* Increment the MAC count */
@ -923,11 +923,11 @@ arm_status arm_conv_partial_fast_q15(
}
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1u);
pSrc2 = pIn2 + (srcBLen - 1U);
py = pSrc2;
/* count is the index by which the pointer pIn1 to be incremented */
count = 0u;
count = 0U;
/* --------------------
@ -937,14 +937,14 @@ arm_status arm_conv_partial_fast_q15(
/* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
* So, to loop unroll over blockSize2,
* srcBLen should be greater than or equal to 4 */
if (srcBLen >= 4u)
if (srcBLen >= 4U)
{
/* Loop unroll over blockSize2, by 4 */
blkCnt = ((uint32_t) blockSize2 >> 2u);
blkCnt = ((uint32_t) blockSize2 >> 2U);
while (blkCnt > 0u)
while (blkCnt > 0U)
{
py = py - 1u;
py = py - 1U;
/* Set all accumulators to zero */
acc0 = 0;
@ -971,7 +971,7 @@ arm_status arm_conv_partial_fast_q15(
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
@ -1061,7 +1061,7 @@ arm_status arm_conv_partial_fast_q15(
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
px += 4u;
px += 4U;
/* acc2 += x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */
acc2 = __SMLADX(x0, c0, acc2);
@ -1076,16 +1076,16 @@ arm_status arm_conv_partial_fast_q15(
/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
if (k == 1u)
if (k == 1U)
{
/* Read y[srcBLen - 5] */
c0 = *(py+1);
#ifdef ARM_MATH_BIG_ENDIAN
c0 = c0 << 16u;
c0 = c0 << 16U;
#else
@ -1116,7 +1116,7 @@ arm_status arm_conv_partial_fast_q15(
acc3 = __SMLADX(x3, c0, acc3);
}
if (k == 2u)
if (k == 2U)
{
/* Read y[srcBLen - 5], y[srcBLen - 6] */
a = *py;
@ -1149,7 +1149,7 @@ arm_status arm_conv_partial_fast_q15(
x2 = __PKHBT(a, b, 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
px += 2u;
px += 2U;
/* Perform the multiply-accumulates */
acc0 = __SMLADX(x0, c0, acc0);
@ -1158,7 +1158,7 @@ arm_status arm_conv_partial_fast_q15(
acc3 = __SMLADX(x2, c0, acc3);
}
if (k == 3u)
if (k == 3U)
{
/* Read y[srcBLen - 5], y[srcBLen - 6] */
a = *py;
@ -1202,7 +1202,7 @@ arm_status arm_conv_partial_fast_q15(
c0 = *(py-1);
#ifdef ARM_MATH_BIG_ENDIAN
c0 = c0 << 16u;
c0 = c0 << 16U;
#else
c0 = c0 & 0x0000FFFF;
@ -1222,7 +1222,7 @@ arm_status arm_conv_partial_fast_q15(
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
px += 3u;
px += 3U;
/* Perform the multiply-accumulates */
acc0 = __SMLADX(x1, c0, acc0);
@ -1238,7 +1238,7 @@ arm_status arm_conv_partial_fast_q15(
*pOut++ = (q15_t)(acc3 >> 15);
/* Increment the pointer pIn1 index, count by 4 */
count += 4u;
count += 4U;
/* Update the inputA and inputB pointers for next MAC calculation */
px = pIn1 + count;
@ -1250,19 +1250,19 @@ arm_status arm_conv_partial_fast_q15(
/* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = (uint32_t) blockSize2 % 0x4u;
blkCnt = (uint32_t) blockSize2 % 0x4U;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2u;
k = srcBLen >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += ((q31_t) * px++ * *py--);
@ -1276,9 +1276,9 @@ arm_status arm_conv_partial_fast_q15(
/* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = srcBLen % 0x4u;
k = srcBLen % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += ((q31_t) * px++ * *py--);
@ -1307,7 +1307,7 @@ arm_status arm_conv_partial_fast_q15(
* the blockSize2 loop cannot be unrolled by 4 */
blkCnt = (uint32_t) blockSize2;
while (blkCnt > 0u)
while (blkCnt > 0U)
{
/* Accumulator is made zero for every iteration */
sum = 0;
@ -1315,7 +1315,7 @@ arm_status arm_conv_partial_fast_q15(
/* srcBLen number of MACS should be performed */
k = srcBLen;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulate */
sum += ((q31_t) * px++ * *py--);
@ -1353,15 +1353,15 @@ arm_status arm_conv_partial_fast_q15(
/* In this stage the MAC operations are decreased by 1 for every iteration.
The count variable holds the number of MAC operations performed */
count = srcBLen - 1u;
count = srcBLen - 1U;
/* Working pointer of inputA */
pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
px = pSrc1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1u);
pIn2 = pSrc2 - 1u;
pSrc2 = pIn2 + (srcBLen - 1U);
pIn2 = pSrc2 - 1U;
py = pIn2;
/* -------------------
@ -1373,21 +1373,21 @@ arm_status arm_conv_partial_fast_q15(
/* Second part of this stage computes the MAC operations less than or equal to 4 */
/* The first part of the stage starts here */
j = count >> 2u;
j = count >> 2U;
while ((j > 0u) && (blockSize3 > 0))
while ((j > 0U) && (blockSize3 > 0))
{
/* Accumulator is made zero for every iteration */
sum = 0;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = count >> 2u;
k = count >> 2U;
/* First part of the processing with loop unrolling. Compute 4 MACs at a time.
** a second loop below computes MACs for the remaining 1 to 3 samples. */
py++;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += ((q31_t) * px++ * *py--);
@ -1401,9 +1401,9 @@ arm_status arm_conv_partial_fast_q15(
/* If the count is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
k = count % 0x4u;
k = count % 0x4U;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
sum += ((q31_t) * px++ * *py--);
@ -1431,7 +1431,7 @@ arm_status arm_conv_partial_fast_q15(
/* The second part of the stage starts here */
/* SIMD is not used for the next MAC operations,
* so pointer py is updated to read only one sample at a time */
py = py + 1u;
py = py + 1U;
while (blockSize3 > 0)
{
@ -1441,7 +1441,7 @@ arm_status arm_conv_partial_fast_q15(
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = count;
while (k > 0u)
while (k > 0U)
{
/* Perform the multiply-accumulates */
/* sum += x[srcALen-1] * y[srcBLen-1] */

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save