From da38c27dd035b601dfe78f63eb5be48096b375e7 Mon Sep 17 00:00:00 2001 From: GuentherMartin Date: Thu, 6 Dec 2018 14:56:42 +0100 Subject: [PATCH] Merged PR #166 --- Source/FilteringFunctions/arm_conv_f32.c | 18 +++--- Source/FilteringFunctions/arm_conv_fast_q31.c | 26 ++++----- .../FilteringFunctions/arm_conv_partial_f32.c | 58 ++++++------------- .../arm_conv_partial_fast_q15.c | 36 ++++-------- .../arm_conv_partial_fast_q31.c | 58 ++++++------------- .../FilteringFunctions/arm_conv_partial_q15.c | 32 ++-------- .../FilteringFunctions/arm_conv_partial_q31.c | 40 ++++--------- .../FilteringFunctions/arm_conv_partial_q7.c | 58 ++++++------------- Source/FilteringFunctions/arm_conv_q31.c | 8 +-- Source/FilteringFunctions/arm_conv_q7.c | 26 ++++----- 10 files changed, 124 insertions(+), 236 deletions(-) diff --git a/Source/FilteringFunctions/arm_conv_f32.c b/Source/FilteringFunctions/arm_conv_f32.c index 9ce5bf02..5c82326c 100644 --- a/Source/FilteringFunctions/arm_conv_f32.c +++ b/Source/FilteringFunctions/arm_conv_f32.c @@ -294,9 +294,9 @@ void arm_conv_f32( acc3 = 0.0f; /* read x[0], x[1], x[2] samples */ - x0 = *(px++); - x1 = *(px++); - x2 = *(px++); + x0 = *px++; + x1 = *px++; + x2 = *px++; /* Apply loop unrolling and compute 4 MACs simultaneously. */ k = srcBLen >> 2U; @@ -306,7 +306,7 @@ void arm_conv_f32( do { /* Read y[srcBLen - 1] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[3] sample */ x3 = *(px); @@ -325,7 +325,7 @@ void arm_conv_f32( acc3 += x3 * c0; /* Read y[srcBLen - 2] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[4] sample */ x0 = *(px + 1U); @@ -341,7 +341,7 @@ void arm_conv_f32( acc3 += x0 * c0; /* Read y[srcBLen - 3] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[5] sample */ x1 = *(px + 2U); @@ -357,7 +357,7 @@ void arm_conv_f32( acc3 += x1 * c0; /* Read y[srcBLen - 4] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[6] sample */ x2 = *(px + 3U); @@ -383,10 +383,10 @@ void arm_conv_f32( while (k > 0U) { /* Read y[srcBLen - 5] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[7] sample */ - x3 = *(px++); + x3 = *px++; /* Perform the multiply-accumulates */ /* acc0 += x[4] * y[srcBLen - 5] */ diff --git a/Source/FilteringFunctions/arm_conv_fast_q31.c b/Source/FilteringFunctions/arm_conv_fast_q31.c index ce3e3347..9a7feaaa 100644 --- a/Source/FilteringFunctions/arm_conv_fast_q31.c +++ b/Source/FilteringFunctions/arm_conv_fast_q31.c @@ -248,9 +248,9 @@ void arm_conv_fast_q31( acc3 = 0; /* read x[0], x[1], x[2] samples */ - x0 = *(px++); - x1 = *(px++); - x2 = *(px++); + x0 = *px++; + x1 = *px++; + x2 = *px++; /* Apply loop unrolling and compute 4 MACs simultaneously. */ k = srcBLen >> 2U; @@ -260,10 +260,10 @@ void arm_conv_fast_q31( do { /* Read y[srcBLen - 1] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[3] sample */ - x3 = *(px++); + x3 = *px++; /* Perform the multiply-accumulates */ /* acc0 += x[0] * y[srcBLen - 1] */ @@ -279,10 +279,10 @@ void arm_conv_fast_q31( acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x3 * c0)) >> 32); /* Read y[srcBLen - 2] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[4] sample */ - x0 = *(px++); + x0 = *px++; /* Perform the multiply-accumulate */ /* acc0 += x[1] * y[srcBLen - 2] */ @@ -295,10 +295,10 @@ void arm_conv_fast_q31( acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x0 * c0)) >> 32); /* Read y[srcBLen - 3] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[5] sample */ - x1 = *(px++); + x1 = *px++; /* Perform the multiply-accumulates */ /* acc0 += x[2] * y[srcBLen - 3] */ @@ -311,10 +311,10 @@ void arm_conv_fast_q31( acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x1 * c0)) >> 32); /* Read y[srcBLen - 4] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[6] sample */ - x2 = *(px++); + x2 = *px++; /* Perform the multiply-accumulates */ /* acc0 += x[3] * y[srcBLen - 4] */ @@ -336,10 +336,10 @@ void arm_conv_fast_q31( while (k > 0U) { /* Read y[srcBLen - 5] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[7] sample */ - x3 = *(px++); + x3 = *px++; /* Perform the multiply-accumulates */ /* acc0 += x[4] * y[srcBLen - 5] */ diff --git a/Source/FilteringFunctions/arm_conv_partial_f32.c b/Source/FilteringFunctions/arm_conv_partial_f32.c index f3b15b4a..693cf897 100644 --- a/Source/FilteringFunctions/arm_conv_partial_f32.c +++ b/Source/FilteringFunctions/arm_conv_partial_f32.c @@ -263,12 +263,13 @@ arm_status arm_conv_partial_f32( /* Working pointer of inputA */ if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) { - px = pIn1 + firstIndex - srcBLen + 1; + pSrc1 = pIn1 + firstIndex - srcBLen + 1; } else { - px = pIn1; + pSrc1 = pIn1; } + px = pSrc1; /* Working pointer of inputB */ pSrc2 = pIn2 + (srcBLen - 1U); @@ -298,9 +299,9 @@ arm_status arm_conv_partial_f32( acc3 = 0.0f; /* read x[0], x[1], x[2] samples */ - x0 = *(px++); - x1 = *(px++); - x2 = *(px++); + x0 = *px++; + x1 = *px++; + x2 = *px++; /* Apply loop unrolling and compute 4 MACs simultaneously. */ k = srcBLen >> 2U; @@ -310,10 +311,10 @@ arm_status arm_conv_partial_f32( do { /* Read y[srcBLen - 1] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[3] sample */ - x3 = *(px++); + x3 = *px++; /* Perform the multiply-accumulate */ /* acc0 += x[0] * y[srcBLen - 1] */ @@ -329,10 +330,10 @@ arm_status arm_conv_partial_f32( acc3 += x3 * c0; /* Read y[srcBLen - 2] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[4] sample */ - x0 = *(px++); + x0 = *px++; /* Perform the multiply-accumulate */ /* acc0 += x[1] * y[srcBLen - 2] */ @@ -345,10 +346,10 @@ arm_status arm_conv_partial_f32( acc3 += x0 * c0; /* Read y[srcBLen - 3] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[5] sample */ - x1 = *(px++); + x1 = *px++; /* Perform the multiply-accumulates */ /* acc0 += x[2] * y[srcBLen - 3] */ @@ -361,10 +362,10 @@ arm_status arm_conv_partial_f32( acc3 += x1 * c0; /* Read y[srcBLen - 4] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[6] sample */ - x2 = *(px++); + x2 = *px++; /* Perform the multiply-accumulates */ /* acc0 += x[3] * y[srcBLen - 4] */ @@ -386,10 +387,10 @@ arm_status arm_conv_partial_f32( while (k > 0U) { /* Read y[srcBLen - 5] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[7] sample */ - x3 = *(px++); + x3 = *px++; /* Perform the multiply-accumulates */ /* acc0 += x[4] * y[srcBLen - 5] */ @@ -420,14 +421,7 @@ arm_status arm_conv_partial_f32( count += 4U; /* Update the inputA and inputB pointers for next MAC calculation */ - if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) - { - px = pIn1 + firstIndex - srcBLen + 1 + count; - } - else - { - px = pIn1 + count; - } + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ @@ -480,14 +474,7 @@ arm_status arm_conv_partial_f32( count++; /* Update the inputA and inputB pointers for next MAC calculation */ - if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) - { - px = pIn1 + firstIndex - srcBLen + 1 + count; - } - else - { - px = pIn1 + count; - } + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ @@ -524,14 +511,7 @@ arm_status arm_conv_partial_f32( count++; /* Update the inputA and inputB pointers for next MAC calculation */ - if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) - { - px = pIn1 + firstIndex - srcBLen + 1 + count; - } - else - { - px = pIn1 + count; - } + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ diff --git a/Source/FilteringFunctions/arm_conv_partial_fast_q15.c b/Source/FilteringFunctions/arm_conv_partial_fast_q15.c index bd43a984..50972579 100644 --- a/Source/FilteringFunctions/arm_conv_partial_fast_q15.c +++ b/Source/FilteringFunctions/arm_conv_partial_fast_q15.c @@ -270,12 +270,13 @@ arm_status arm_conv_partial_fast_q15( /* Working pointer of inputA */ if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) { - px = pIn1 + firstIndex - srcBLen + 1; + pSrc1 = pIn1 + firstIndex - srcBLen + 1; } else { - px = pIn1; + pSrc1 = pIn1; } + px = pSrc1; /* Working pointer of inputB */ pSrc2 = pIn2 + (srcBLen - 1U); @@ -473,7 +474,7 @@ arm_status arm_conv_partial_fast_q15( count += 4U; /* Update the inputA and inputB pointers for next MAC calculation */ - px = pIn1 + count; + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ @@ -526,14 +527,7 @@ arm_status arm_conv_partial_fast_q15( count++; /* Update the inputA and inputB pointers for next MAC calculation */ - if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) - { - px = pIn1 + firstIndex - srcBLen + 1 + count; - } - else - { - px = pIn1 + count; - } + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ @@ -570,14 +564,7 @@ arm_status arm_conv_partial_fast_q15( count++; /* Update the inputA and inputB pointers for next MAC calculation */ - if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) - { - px = pIn1 + firstIndex - srcBLen + 1 + count; - } - else - { - px = pIn1 + count; - } + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ @@ -929,12 +916,13 @@ arm_status arm_conv_partial_fast_q15( /* Working pointer of inputA */ if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) { - px = pIn1 + firstIndex - srcBLen + 1; + pSrc1 = pIn1 + firstIndex - srcBLen + 1; } else { - px = pIn1; + pSrc1 = pIn1; } + px = pSrc1; /* Working pointer of inputB */ pSrc2 = pIn2 + (srcBLen - 1U); @@ -1255,7 +1243,7 @@ arm_status arm_conv_partial_fast_q15( count += 4U; /* Update the inputA and inputB pointers for next MAC calculation */ - px = pIn1 + count; + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ @@ -1308,7 +1296,7 @@ arm_status arm_conv_partial_fast_q15( count++; /* Update the inputA and inputB pointers for next MAC calculation */ - px = pIn1 + count; + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ @@ -1345,7 +1333,7 @@ arm_status arm_conv_partial_fast_q15( count++; /* Update the inputA and inputB pointers for next MAC calculation */ - px = pIn1 + count; + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ diff --git a/Source/FilteringFunctions/arm_conv_partial_fast_q31.c b/Source/FilteringFunctions/arm_conv_partial_fast_q31.c index af3724dc..84872763 100644 --- a/Source/FilteringFunctions/arm_conv_partial_fast_q31.c +++ b/Source/FilteringFunctions/arm_conv_partial_fast_q31.c @@ -234,12 +234,13 @@ arm_status arm_conv_partial_fast_q31( /* Working pointer of inputA */ if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) { - px = pIn1 + firstIndex - srcBLen + 1; + pSrc1 = pIn1 + firstIndex - srcBLen + 1; } else { - px = pIn1; + pSrc1 = pIn1; } + px = pSrc1; /* Working pointer of inputB */ pSrc2 = pIn2 + (srcBLen - 1U); @@ -269,9 +270,9 @@ arm_status arm_conv_partial_fast_q31( acc3 = 0; /* read x[0], x[1], x[2] samples */ - x0 = *(px++); - x1 = *(px++); - x2 = *(px++); + x0 = *px++; + x1 = *px++; + x2 = *px++; /* Apply loop unrolling and compute 4 MACs simultaneously. */ k = srcBLen >> 2U; @@ -281,10 +282,10 @@ arm_status arm_conv_partial_fast_q31( do { /* Read y[srcBLen - 1] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[3] sample */ - x3 = *(px++); + x3 = *px++; /* Perform the multiply-accumulate */ /* acc0 += x[0] * y[srcBLen - 1] */ @@ -300,10 +301,10 @@ arm_status arm_conv_partial_fast_q31( acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x3 * c0)) >> 32); /* Read y[srcBLen - 2] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[4] sample */ - x0 = *(px++); + x0 = *px++; /* Perform the multiply-accumulate */ /* acc0 += x[1] * y[srcBLen - 2] */ @@ -316,10 +317,10 @@ arm_status arm_conv_partial_fast_q31( acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x0 * c0)) >> 32); /* Read y[srcBLen - 3] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[5] sample */ - x1 = *(px++); + x1 = *px++; /* Perform the multiply-accumulates */ /* acc0 += x[2] * y[srcBLen - 3] */ @@ -332,10 +333,10 @@ arm_status arm_conv_partial_fast_q31( acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x1 * c0)) >> 32); /* Read y[srcBLen - 4] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[6] sample */ - x2 = *(px++); + x2 = *px++; /* Perform the multiply-accumulates */ /* acc0 += x[3] * y[srcBLen - 4] */ @@ -357,10 +358,10 @@ arm_status arm_conv_partial_fast_q31( while (k > 0U) { /* Read y[srcBLen - 5] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[7] sample */ - x3 = *(px++); + x3 = *px++; /* Perform the multiply-accumulates */ /* acc0 += x[4] * y[srcBLen - 5] */ @@ -391,14 +392,7 @@ arm_status arm_conv_partial_fast_q31( count += 4U; /* Update the inputA and inputB pointers for next MAC calculation */ - if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) - { - px = pIn1 + firstIndex - srcBLen + 1 + count; - } - else - { - px = pIn1 + count; - } + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ @@ -456,14 +450,7 @@ arm_status arm_conv_partial_fast_q31( count++; /* Update the inputA and inputB pointers for next MAC calculation */ - if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) - { - px = pIn1 + firstIndex - srcBLen + 1 + count; - } - else - { - px = pIn1 + count; - } + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ @@ -501,14 +488,7 @@ arm_status arm_conv_partial_fast_q31( count++; /* Update the inputA and inputB pointers for next MAC calculation */ - if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) - { - px = pIn1 + firstIndex - srcBLen + 1 + count; - } - else - { - px = pIn1 + count; - } + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ diff --git a/Source/FilteringFunctions/arm_conv_partial_q15.c b/Source/FilteringFunctions/arm_conv_partial_q15.c index 93864b78..6a5bd51d 100644 --- a/Source/FilteringFunctions/arm_conv_partial_q15.c +++ b/Source/FilteringFunctions/arm_conv_partial_q15.c @@ -277,12 +277,13 @@ arm_status arm_conv_partial_q15( /* Working pointer of inputA */ if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) { - px = pIn1 + firstIndex - srcBLen + 1; + pSrc1 = pIn1 + firstIndex - srcBLen + 1; } else { - px = pIn1; + pSrc1 = pIn1; } + px = pSrc1; /* Working pointer of inputB */ pSrc2 = pIn2 + (srcBLen - 1U); @@ -488,14 +489,7 @@ arm_status arm_conv_partial_q15( count += 4U; /* Update the inputA and inputB pointers for next MAC calculation */ - if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) - { - px = pIn1 + firstIndex - srcBLen + 1 + count; - } - else - { - px = pIn1 + count; - } + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ @@ -548,14 +542,7 @@ arm_status arm_conv_partial_q15( count++; /* Update the inputA and inputB pointers for next MAC calculation */ - if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) - { - px = pIn1 + firstIndex - srcBLen + 1 + count; - } - else - { - px = pIn1 + count; - } + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ @@ -592,14 +579,7 @@ arm_status arm_conv_partial_q15( count++; /* Update the inputA and inputB pointers for next MAC calculation */ - if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) - { - px = pIn1 + firstIndex - srcBLen + 1 + count; - } - else - { - px = pIn1 + count; - } + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ diff --git a/Source/FilteringFunctions/arm_conv_partial_q31.c b/Source/FilteringFunctions/arm_conv_partial_q31.c index 94999b93..9f6f315c 100644 --- a/Source/FilteringFunctions/arm_conv_partial_q31.c +++ b/Source/FilteringFunctions/arm_conv_partial_q31.c @@ -231,12 +231,13 @@ arm_status arm_conv_partial_q31( /* Working pointer of inputA */ if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) { - px = pIn1 + firstIndex - srcBLen + 1; + pSrc1 = pIn1 + firstIndex - srcBLen + 1; } else { - px = pIn1; + pSrc1 = pIn1; } + px = pSrc1; /* Working pointer of inputB */ pSrc2 = pIn2 + (srcBLen - 1U); @@ -265,8 +266,8 @@ arm_status arm_conv_partial_q31( acc2 = 0; /* read x[0], x[1] samples */ - x0 = *(px++); - x1 = *(px++); + x0 = *px++; + x1 = *px++; /* Apply loop unrolling and compute 3 MACs simultaneously. */ k = srcBLen / 3; @@ -331,10 +332,10 @@ arm_status arm_conv_partial_q31( while (k > 0U) { /* Read y[srcBLen - 5] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[7] sample */ - x2 = *(px++); + x2 = *px++; /* Perform the multiply-accumulates */ /* acc0 += x[4] * y[srcBLen - 5] */ @@ -361,14 +362,7 @@ arm_status arm_conv_partial_q31( count += 3U; /* Update the inputA and inputB pointers for next MAC calculation */ - if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) - { - px = pIn1 + firstIndex - srcBLen + 1 + count; - } - else - { - px = pIn1 + count; - } + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ @@ -421,14 +415,7 @@ arm_status arm_conv_partial_q31( count++; /* Update the inputA and inputB pointers for next MAC calculation */ - if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) - { - px = pIn1 + firstIndex - srcBLen + 1 + count; - } - else - { - px = pIn1 + count; - } + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ @@ -465,14 +452,7 @@ arm_status arm_conv_partial_q31( count++; /* Update the inputA and inputB pointers for next MAC calculation */ - if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) - { - px = pIn1 + firstIndex - srcBLen + 1 + count; - } - else - { - px = pIn1 + count; - } + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ diff --git a/Source/FilteringFunctions/arm_conv_partial_q7.c b/Source/FilteringFunctions/arm_conv_partial_q7.c index d4e0679d..4d953592 100644 --- a/Source/FilteringFunctions/arm_conv_partial_q7.c +++ b/Source/FilteringFunctions/arm_conv_partial_q7.c @@ -254,12 +254,13 @@ arm_status arm_conv_partial_q7( /* Working pointer of inputA */ if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) { - px = pIn1 + firstIndex - srcBLen + 1; + pSrc1 = pIn1 + firstIndex - srcBLen + 1; } else { - px = pIn1; + pSrc1 = pIn1; } + px = pSrc1; /* Working pointer of inputB */ pSrc2 = pIn2 + (srcBLen - 1U); @@ -289,9 +290,9 @@ arm_status arm_conv_partial_q7( acc3 = 0; /* read x[0], x[1], x[2] samples */ - x0 = *(px++); - x1 = *(px++); - x2 = *(px++); + x0 = *px++; + x1 = *px++; + x2 = *px++; /* Apply loop unrolling and compute 4 MACs simultaneously. */ k = srcBLen >> 2U; @@ -301,12 +302,12 @@ arm_status arm_conv_partial_q7( do { /* Read y[srcBLen - 1] sample */ - c0 = *(py--); + c0 = *py--; /* Read y[srcBLen - 2] sample */ - c1 = *(py--); + c1 = *py--; /* Read x[3] sample */ - x3 = *(px++); + x3 = *px++; /* x[0] and x[1] are packed */ in1 = (q15_t) x0; @@ -342,7 +343,7 @@ arm_status arm_conv_partial_q7( acc2 = __SMLAD(input1, input2, acc2); /* Read x[4] sample */ - x0 = *(px++); + x0 = *px++; /* x[3] and x[4] are packed */ in1 = (q15_t) x3; @@ -354,12 +355,12 @@ arm_status arm_conv_partial_q7( acc3 = __SMLAD(input1, input2, acc3); /* Read y[srcBLen - 3] sample */ - c0 = *(py--); + c0 = *py--; /* Read y[srcBLen - 4] sample */ - c1 = *(py--); + c1 = *py--; /* Read x[5] sample */ - x1 = *(px++); + x1 = *px++; /* x[2] and x[3] are packed */ in1 = (q15_t) x2; @@ -395,7 +396,7 @@ arm_status arm_conv_partial_q7( acc2 = __SMLAD(input1, input2, acc2); /* Read x[6] sample */ - x2 = *(px++); + x2 = *px++; /* x[5] and x[6] are packed */ in1 = (q15_t) x1; @@ -415,10 +416,10 @@ arm_status arm_conv_partial_q7( while (k > 0U) { /* Read y[srcBLen - 5] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[7] sample */ - x3 = *(px++); + x3 = *px++; /* Perform the multiply-accumulates */ /* acc0 += x[4] * y[srcBLen - 5] */ @@ -449,14 +450,7 @@ arm_status arm_conv_partial_q7( count += 4U; /* Update the inputA and inputB pointers for next MAC calculation */ - if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) - { - px = pIn1 + firstIndex - srcBLen + 1 + count; - } - else - { - px = pIn1 + count; - } + px = pSrc1 + count; py = pSrc2; @@ -531,14 +525,7 @@ arm_status arm_conv_partial_q7( count++; /* Update the inputA and inputB pointers for next MAC calculation */ - if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) - { - px = pIn1 + firstIndex - srcBLen + 1 + count; - } - else - { - px = pIn1 + count; - } + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ @@ -575,14 +562,7 @@ arm_status arm_conv_partial_q7( count++; /* Update the inputA and inputB pointers for next MAC calculation */ - if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0) - { - px = pIn1 + firstIndex - srcBLen + 1 + count; - } - else - { - px = pIn1 + count; - } + px = pSrc1 + count; py = pSrc2; /* Decrement the loop counter */ diff --git a/Source/FilteringFunctions/arm_conv_q31.c b/Source/FilteringFunctions/arm_conv_q31.c index 78e50f09..2dda9d41 100644 --- a/Source/FilteringFunctions/arm_conv_q31.c +++ b/Source/FilteringFunctions/arm_conv_q31.c @@ -245,8 +245,8 @@ void arm_conv_q31( acc2 = 0; /* read x[0], x[1], x[2] samples */ - x0 = *(px++); - x1 = *(px++); + x0 = *px++; + x1 = *px++; /* Apply loop unrolling and compute 3 MACs simultaneously. */ k = srcBLen / 3; @@ -310,10 +310,10 @@ void arm_conv_q31( while (k > 0U) { /* Read y[srcBLen - 5] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[7] sample */ - x2 = *(px++); + x2 = *px++; /* Perform the multiply-accumulates */ /* acc0 += x[4] * y[srcBLen - 5] */ diff --git a/Source/FilteringFunctions/arm_conv_q7.c b/Source/FilteringFunctions/arm_conv_q7.c index 9e5a79b5..94ac6204 100644 --- a/Source/FilteringFunctions/arm_conv_q7.c +++ b/Source/FilteringFunctions/arm_conv_q7.c @@ -264,9 +264,9 @@ void arm_conv_q7( acc3 = 0; /* read x[0], x[1], x[2] samples */ - x0 = *(px++); - x1 = *(px++); - x2 = *(px++); + x0 = *px++; + x1 = *px++; + x2 = *px++; /* Apply loop unrolling and compute 4 MACs simultaneously. */ k = srcBLen >> 2U; @@ -276,12 +276,12 @@ void arm_conv_q7( do { /* Read y[srcBLen - 1] sample */ - c0 = *(py--); + c0 = *py--; /* Read y[srcBLen - 2] sample */ - c1 = *(py--); + c1 = *py--; /* Read x[3] sample */ - x3 = *(px++); + x3 = *px++; /* x[0] and x[1] are packed */ in1 = (q15_t) x0; @@ -317,7 +317,7 @@ void arm_conv_q7( acc2 = __SMLAD(input1, input2, acc2); /* Read x[4] sample */ - x0 = *(px++); + x0 = *px++; /* x[3] and x[4] are packed */ in1 = (q15_t) x3; @@ -329,12 +329,12 @@ void arm_conv_q7( acc3 = __SMLAD(input1, input2, acc3); /* Read y[srcBLen - 3] sample */ - c0 = *(py--); + c0 = *py--; /* Read y[srcBLen - 4] sample */ - c1 = *(py--); + c1 = *py--; /* Read x[5] sample */ - x1 = *(px++); + x1 = *px++; /* x[2] and x[3] are packed */ in1 = (q15_t) x2; @@ -370,7 +370,7 @@ void arm_conv_q7( acc2 = __SMLAD(input1, input2, acc2); /* Read x[6] sample */ - x2 = *(px++); + x2 = *px++; /* x[5] and x[6] are packed */ in1 = (q15_t) x1; @@ -390,10 +390,10 @@ void arm_conv_q7( while (k > 0U) { /* Read y[srcBLen - 5] sample */ - c0 = *(py--); + c0 = *py--; /* Read x[7] sample */ - x3 = *(px++); + x3 = *px++; /* Perform the multiply-accumulates */ /* acc0 += x[4] * y[srcBLen - 5] */