pull/19/head
GuentherMartin 7 years ago
parent 06eff37277
commit da38c27dd0

@ -294,9 +294,9 @@ void arm_conv_f32(
acc3 = 0.0f;
/* read x[0], x[1], x[2] samples */
x0 = *(px++);
x1 = *(px++);
x2 = *(px++);
x0 = *px++;
x1 = *px++;
x2 = *px++;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2U;
@ -306,7 +306,7 @@ void arm_conv_f32(
do
{
/* Read y[srcBLen - 1] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[3] sample */
x3 = *(px);
@ -325,7 +325,7 @@ void arm_conv_f32(
acc3 += x3 * c0;
/* Read y[srcBLen - 2] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[4] sample */
x0 = *(px + 1U);
@ -341,7 +341,7 @@ void arm_conv_f32(
acc3 += x0 * c0;
/* Read y[srcBLen - 3] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[5] sample */
x1 = *(px + 2U);
@ -357,7 +357,7 @@ void arm_conv_f32(
acc3 += x1 * c0;
/* Read y[srcBLen - 4] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[6] sample */
x2 = *(px + 3U);
@ -383,10 +383,10 @@ void arm_conv_f32(
while (k > 0U)
{
/* Read y[srcBLen - 5] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[7] sample */
x3 = *(px++);
x3 = *px++;
/* Perform the multiply-accumulates */
/* acc0 += x[4] * y[srcBLen - 5] */

@ -248,9 +248,9 @@ void arm_conv_fast_q31(
acc3 = 0;
/* read x[0], x[1], x[2] samples */
x0 = *(px++);
x1 = *(px++);
x2 = *(px++);
x0 = *px++;
x1 = *px++;
x2 = *px++;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2U;
@ -260,10 +260,10 @@ void arm_conv_fast_q31(
do
{
/* Read y[srcBLen - 1] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[3] sample */
x3 = *(px++);
x3 = *px++;
/* Perform the multiply-accumulates */
/* acc0 += x[0] * y[srcBLen - 1] */
@ -279,10 +279,10 @@ void arm_conv_fast_q31(
acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x3 * c0)) >> 32);
/* Read y[srcBLen - 2] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[4] sample */
x0 = *(px++);
x0 = *px++;
/* Perform the multiply-accumulate */
/* acc0 += x[1] * y[srcBLen - 2] */
@ -295,10 +295,10 @@ void arm_conv_fast_q31(
acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x0 * c0)) >> 32);
/* Read y[srcBLen - 3] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[5] sample */
x1 = *(px++);
x1 = *px++;
/* Perform the multiply-accumulates */
/* acc0 += x[2] * y[srcBLen - 3] */
@ -311,10 +311,10 @@ void arm_conv_fast_q31(
acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x1 * c0)) >> 32);
/* Read y[srcBLen - 4] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[6] sample */
x2 = *(px++);
x2 = *px++;
/* Perform the multiply-accumulates */
/* acc0 += x[3] * y[srcBLen - 4] */
@ -336,10 +336,10 @@ void arm_conv_fast_q31(
while (k > 0U)
{
/* Read y[srcBLen - 5] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[7] sample */
x3 = *(px++);
x3 = *px++;
/* Perform the multiply-accumulates */
/* acc0 += x[4] * y[srcBLen - 5] */

@ -263,12 +263,13 @@ arm_status arm_conv_partial_f32(
/* Working pointer of inputA */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1;
pSrc1 = pIn1 + firstIndex - srcBLen + 1;
}
else
{
px = pIn1;
pSrc1 = pIn1;
}
px = pSrc1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1U);
@ -298,9 +299,9 @@ arm_status arm_conv_partial_f32(
acc3 = 0.0f;
/* read x[0], x[1], x[2] samples */
x0 = *(px++);
x1 = *(px++);
x2 = *(px++);
x0 = *px++;
x1 = *px++;
x2 = *px++;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2U;
@ -310,10 +311,10 @@ arm_status arm_conv_partial_f32(
do
{
/* Read y[srcBLen - 1] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[3] sample */
x3 = *(px++);
x3 = *px++;
/* Perform the multiply-accumulate */
/* acc0 += x[0] * y[srcBLen - 1] */
@ -329,10 +330,10 @@ arm_status arm_conv_partial_f32(
acc3 += x3 * c0;
/* Read y[srcBLen - 2] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[4] sample */
x0 = *(px++);
x0 = *px++;
/* Perform the multiply-accumulate */
/* acc0 += x[1] * y[srcBLen - 2] */
@ -345,10 +346,10 @@ arm_status arm_conv_partial_f32(
acc3 += x0 * c0;
/* Read y[srcBLen - 3] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[5] sample */
x1 = *(px++);
x1 = *px++;
/* Perform the multiply-accumulates */
/* acc0 += x[2] * y[srcBLen - 3] */
@ -361,10 +362,10 @@ arm_status arm_conv_partial_f32(
acc3 += x1 * c0;
/* Read y[srcBLen - 4] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[6] sample */
x2 = *(px++);
x2 = *px++;
/* Perform the multiply-accumulates */
/* acc0 += x[3] * y[srcBLen - 4] */
@ -386,10 +387,10 @@ arm_status arm_conv_partial_f32(
while (k > 0U)
{
/* Read y[srcBLen - 5] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[7] sample */
x3 = *(px++);
x3 = *px++;
/* Perform the multiply-accumulates */
/* acc0 += x[4] * y[srcBLen - 5] */
@ -420,14 +421,7 @@ arm_status arm_conv_partial_f32(
count += 4U;
/* Update the inputA and inputB pointers for next MAC calculation */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1 + count;
}
else
{
px = pIn1 + count;
}
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */
@ -480,14 +474,7 @@ arm_status arm_conv_partial_f32(
count++;
/* Update the inputA and inputB pointers for next MAC calculation */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1 + count;
}
else
{
px = pIn1 + count;
}
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */
@ -524,14 +511,7 @@ arm_status arm_conv_partial_f32(
count++;
/* Update the inputA and inputB pointers for next MAC calculation */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1 + count;
}
else
{
px = pIn1 + count;
}
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */

@ -270,12 +270,13 @@ arm_status arm_conv_partial_fast_q15(
/* Working pointer of inputA */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1;
pSrc1 = pIn1 + firstIndex - srcBLen + 1;
}
else
{
px = pIn1;
pSrc1 = pIn1;
}
px = pSrc1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1U);
@ -473,7 +474,7 @@ arm_status arm_conv_partial_fast_q15(
count += 4U;
/* Update the inputA and inputB pointers for next MAC calculation */
px = pIn1 + count;
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */
@ -526,14 +527,7 @@ arm_status arm_conv_partial_fast_q15(
count++;
/* Update the inputA and inputB pointers for next MAC calculation */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1 + count;
}
else
{
px = pIn1 + count;
}
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */
@ -570,14 +564,7 @@ arm_status arm_conv_partial_fast_q15(
count++;
/* Update the inputA and inputB pointers for next MAC calculation */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1 + count;
}
else
{
px = pIn1 + count;
}
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */
@ -929,12 +916,13 @@ arm_status arm_conv_partial_fast_q15(
/* Working pointer of inputA */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1;
pSrc1 = pIn1 + firstIndex - srcBLen + 1;
}
else
{
px = pIn1;
pSrc1 = pIn1;
}
px = pSrc1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1U);
@ -1255,7 +1243,7 @@ arm_status arm_conv_partial_fast_q15(
count += 4U;
/* Update the inputA and inputB pointers for next MAC calculation */
px = pIn1 + count;
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */
@ -1308,7 +1296,7 @@ arm_status arm_conv_partial_fast_q15(
count++;
/* Update the inputA and inputB pointers for next MAC calculation */
px = pIn1 + count;
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */
@ -1345,7 +1333,7 @@ arm_status arm_conv_partial_fast_q15(
count++;
/* Update the inputA and inputB pointers for next MAC calculation */
px = pIn1 + count;
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */

@ -234,12 +234,13 @@ arm_status arm_conv_partial_fast_q31(
/* Working pointer of inputA */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1;
pSrc1 = pIn1 + firstIndex - srcBLen + 1;
}
else
{
px = pIn1;
pSrc1 = pIn1;
}
px = pSrc1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1U);
@ -269,9 +270,9 @@ arm_status arm_conv_partial_fast_q31(
acc3 = 0;
/* read x[0], x[1], x[2] samples */
x0 = *(px++);
x1 = *(px++);
x2 = *(px++);
x0 = *px++;
x1 = *px++;
x2 = *px++;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2U;
@ -281,10 +282,10 @@ arm_status arm_conv_partial_fast_q31(
do
{
/* Read y[srcBLen - 1] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[3] sample */
x3 = *(px++);
x3 = *px++;
/* Perform the multiply-accumulate */
/* acc0 += x[0] * y[srcBLen - 1] */
@ -300,10 +301,10 @@ arm_status arm_conv_partial_fast_q31(
acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x3 * c0)) >> 32);
/* Read y[srcBLen - 2] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[4] sample */
x0 = *(px++);
x0 = *px++;
/* Perform the multiply-accumulate */
/* acc0 += x[1] * y[srcBLen - 2] */
@ -316,10 +317,10 @@ arm_status arm_conv_partial_fast_q31(
acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x0 * c0)) >> 32);
/* Read y[srcBLen - 3] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[5] sample */
x1 = *(px++);
x1 = *px++;
/* Perform the multiply-accumulates */
/* acc0 += x[2] * y[srcBLen - 3] */
@ -332,10 +333,10 @@ arm_status arm_conv_partial_fast_q31(
acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x1 * c0)) >> 32);
/* Read y[srcBLen - 4] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[6] sample */
x2 = *(px++);
x2 = *px++;
/* Perform the multiply-accumulates */
/* acc0 += x[3] * y[srcBLen - 4] */
@ -357,10 +358,10 @@ arm_status arm_conv_partial_fast_q31(
while (k > 0U)
{
/* Read y[srcBLen - 5] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[7] sample */
x3 = *(px++);
x3 = *px++;
/* Perform the multiply-accumulates */
/* acc0 += x[4] * y[srcBLen - 5] */
@ -391,14 +392,7 @@ arm_status arm_conv_partial_fast_q31(
count += 4U;
/* Update the inputA and inputB pointers for next MAC calculation */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1 + count;
}
else
{
px = pIn1 + count;
}
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */
@ -456,14 +450,7 @@ arm_status arm_conv_partial_fast_q31(
count++;
/* Update the inputA and inputB pointers for next MAC calculation */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1 + count;
}
else
{
px = pIn1 + count;
}
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */
@ -501,14 +488,7 @@ arm_status arm_conv_partial_fast_q31(
count++;
/* Update the inputA and inputB pointers for next MAC calculation */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1 + count;
}
else
{
px = pIn1 + count;
}
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */

@ -277,12 +277,13 @@ arm_status arm_conv_partial_q15(
/* Working pointer of inputA */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1;
pSrc1 = pIn1 + firstIndex - srcBLen + 1;
}
else
{
px = pIn1;
pSrc1 = pIn1;
}
px = pSrc1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1U);
@ -488,14 +489,7 @@ arm_status arm_conv_partial_q15(
count += 4U;
/* Update the inputA and inputB pointers for next MAC calculation */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1 + count;
}
else
{
px = pIn1 + count;
}
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */
@ -548,14 +542,7 @@ arm_status arm_conv_partial_q15(
count++;
/* Update the inputA and inputB pointers for next MAC calculation */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1 + count;
}
else
{
px = pIn1 + count;
}
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */
@ -592,14 +579,7 @@ arm_status arm_conv_partial_q15(
count++;
/* Update the inputA and inputB pointers for next MAC calculation */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1 + count;
}
else
{
px = pIn1 + count;
}
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */

@ -231,12 +231,13 @@ arm_status arm_conv_partial_q31(
/* Working pointer of inputA */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1;
pSrc1 = pIn1 + firstIndex - srcBLen + 1;
}
else
{
px = pIn1;
pSrc1 = pIn1;
}
px = pSrc1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1U);
@ -265,8 +266,8 @@ arm_status arm_conv_partial_q31(
acc2 = 0;
/* read x[0], x[1] samples */
x0 = *(px++);
x1 = *(px++);
x0 = *px++;
x1 = *px++;
/* Apply loop unrolling and compute 3 MACs simultaneously. */
k = srcBLen / 3;
@ -331,10 +332,10 @@ arm_status arm_conv_partial_q31(
while (k > 0U)
{
/* Read y[srcBLen - 5] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[7] sample */
x2 = *(px++);
x2 = *px++;
/* Perform the multiply-accumulates */
/* acc0 += x[4] * y[srcBLen - 5] */
@ -361,14 +362,7 @@ arm_status arm_conv_partial_q31(
count += 3U;
/* Update the inputA and inputB pointers for next MAC calculation */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1 + count;
}
else
{
px = pIn1 + count;
}
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */
@ -421,14 +415,7 @@ arm_status arm_conv_partial_q31(
count++;
/* Update the inputA and inputB pointers for next MAC calculation */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1 + count;
}
else
{
px = pIn1 + count;
}
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */
@ -465,14 +452,7 @@ arm_status arm_conv_partial_q31(
count++;
/* Update the inputA and inputB pointers for next MAC calculation */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1 + count;
}
else
{
px = pIn1 + count;
}
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */

@ -254,12 +254,13 @@ arm_status arm_conv_partial_q7(
/* Working pointer of inputA */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1;
pSrc1 = pIn1 + firstIndex - srcBLen + 1;
}
else
{
px = pIn1;
pSrc1 = pIn1;
}
px = pSrc1;
/* Working pointer of inputB */
pSrc2 = pIn2 + (srcBLen - 1U);
@ -289,9 +290,9 @@ arm_status arm_conv_partial_q7(
acc3 = 0;
/* read x[0], x[1], x[2] samples */
x0 = *(px++);
x1 = *(px++);
x2 = *(px++);
x0 = *px++;
x1 = *px++;
x2 = *px++;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2U;
@ -301,12 +302,12 @@ arm_status arm_conv_partial_q7(
do
{
/* Read y[srcBLen - 1] sample */
c0 = *(py--);
c0 = *py--;
/* Read y[srcBLen - 2] sample */
c1 = *(py--);
c1 = *py--;
/* Read x[3] sample */
x3 = *(px++);
x3 = *px++;
/* x[0] and x[1] are packed */
in1 = (q15_t) x0;
@ -342,7 +343,7 @@ arm_status arm_conv_partial_q7(
acc2 = __SMLAD(input1, input2, acc2);
/* Read x[4] sample */
x0 = *(px++);
x0 = *px++;
/* x[3] and x[4] are packed */
in1 = (q15_t) x3;
@ -354,12 +355,12 @@ arm_status arm_conv_partial_q7(
acc3 = __SMLAD(input1, input2, acc3);
/* Read y[srcBLen - 3] sample */
c0 = *(py--);
c0 = *py--;
/* Read y[srcBLen - 4] sample */
c1 = *(py--);
c1 = *py--;
/* Read x[5] sample */
x1 = *(px++);
x1 = *px++;
/* x[2] and x[3] are packed */
in1 = (q15_t) x2;
@ -395,7 +396,7 @@ arm_status arm_conv_partial_q7(
acc2 = __SMLAD(input1, input2, acc2);
/* Read x[6] sample */
x2 = *(px++);
x2 = *px++;
/* x[5] and x[6] are packed */
in1 = (q15_t) x1;
@ -415,10 +416,10 @@ arm_status arm_conv_partial_q7(
while (k > 0U)
{
/* Read y[srcBLen - 5] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[7] sample */
x3 = *(px++);
x3 = *px++;
/* Perform the multiply-accumulates */
/* acc0 += x[4] * y[srcBLen - 5] */
@ -449,14 +450,7 @@ arm_status arm_conv_partial_q7(
count += 4U;
/* Update the inputA and inputB pointers for next MAC calculation */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1 + count;
}
else
{
px = pIn1 + count;
}
px = pSrc1 + count;
py = pSrc2;
@ -531,14 +525,7 @@ arm_status arm_conv_partial_q7(
count++;
/* Update the inputA and inputB pointers for next MAC calculation */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1 + count;
}
else
{
px = pIn1 + count;
}
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */
@ -575,14 +562,7 @@ arm_status arm_conv_partial_q7(
count++;
/* Update the inputA and inputB pointers for next MAC calculation */
if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
{
px = pIn1 + firstIndex - srcBLen + 1 + count;
}
else
{
px = pIn1 + count;
}
px = pSrc1 + count;
py = pSrc2;
/* Decrement the loop counter */

@ -245,8 +245,8 @@ void arm_conv_q31(
acc2 = 0;
/* read x[0], x[1], x[2] samples */
x0 = *(px++);
x1 = *(px++);
x0 = *px++;
x1 = *px++;
/* Apply loop unrolling and compute 3 MACs simultaneously. */
k = srcBLen / 3;
@ -310,10 +310,10 @@ void arm_conv_q31(
while (k > 0U)
{
/* Read y[srcBLen - 5] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[7] sample */
x2 = *(px++);
x2 = *px++;
/* Perform the multiply-accumulates */
/* acc0 += x[4] * y[srcBLen - 5] */

@ -264,9 +264,9 @@ void arm_conv_q7(
acc3 = 0;
/* read x[0], x[1], x[2] samples */
x0 = *(px++);
x1 = *(px++);
x2 = *(px++);
x0 = *px++;
x1 = *px++;
x2 = *px++;
/* Apply loop unrolling and compute 4 MACs simultaneously. */
k = srcBLen >> 2U;
@ -276,12 +276,12 @@ void arm_conv_q7(
do
{
/* Read y[srcBLen - 1] sample */
c0 = *(py--);
c0 = *py--;
/* Read y[srcBLen - 2] sample */
c1 = *(py--);
c1 = *py--;
/* Read x[3] sample */
x3 = *(px++);
x3 = *px++;
/* x[0] and x[1] are packed */
in1 = (q15_t) x0;
@ -317,7 +317,7 @@ void arm_conv_q7(
acc2 = __SMLAD(input1, input2, acc2);
/* Read x[4] sample */
x0 = *(px++);
x0 = *px++;
/* x[3] and x[4] are packed */
in1 = (q15_t) x3;
@ -329,12 +329,12 @@ void arm_conv_q7(
acc3 = __SMLAD(input1, input2, acc3);
/* Read y[srcBLen - 3] sample */
c0 = *(py--);
c0 = *py--;
/* Read y[srcBLen - 4] sample */
c1 = *(py--);
c1 = *py--;
/* Read x[5] sample */
x1 = *(px++);
x1 = *px++;
/* x[2] and x[3] are packed */
in1 = (q15_t) x2;
@ -370,7 +370,7 @@ void arm_conv_q7(
acc2 = __SMLAD(input1, input2, acc2);
/* Read x[6] sample */
x2 = *(px++);
x2 = *px++;
/* x[5] and x[6] are packed */
in1 = (q15_t) x1;
@ -390,10 +390,10 @@ void arm_conv_q7(
while (k > 0U)
{
/* Read y[srcBLen - 5] sample */
c0 = *(py--);
c0 = *py--;
/* Read x[7] sample */
x3 = *(px++);
x3 = *px++;
/* Perform the multiply-accumulates */
/* acc0 += x[4] * y[srcBLen - 5] */

Loading…
Cancel
Save