Merged PR #166

7 years ago · da38c27dd0
parent 06eff37277
commit da38c27dd0
10 changed files with 124 additions and 236 deletions
--- a/Source/FilteringFunctions/arm_conv_f32.c
+++ b/Source/FilteringFunctions/arm_conv_f32.c
@ -294,9 +294,9 @@ void arm_conv_f32(
      acc3 = 0.0f;

      /* read x[0], x[1], x[2] samples */
-      x0 = *(px++);
-      x1 = *(px++);
-      x2 = *(px++);
+      x0 = *px++;
+      x1 = *px++;
+      x2 = *px++;

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
      k = srcBLen >> 2U;
@ -306,7 +306,7 @@ void arm_conv_f32(
      do
      {
        /* Read y[srcBLen - 1] sample */
-        c0 = *(py--);
+        c0 = *py--;

        /* Read x[3] sample */
        x3 = *(px);
@ -325,7 +325,7 @@ void arm_conv_f32(
        acc3 += x3 * c0;

        /* Read y[srcBLen - 2] sample */
-        c0 = *(py--);
+        c0 = *py--;

        /* Read x[4] sample */
        x0 = *(px + 1U);
@ -341,7 +341,7 @@ void arm_conv_f32(
        acc3 += x0 * c0;

        /* Read y[srcBLen - 3] sample */
-        c0 = *(py--);
+        c0 = *py--;

        /* Read x[5] sample */
        x1 = *(px + 2U);
@ -357,7 +357,7 @@ void arm_conv_f32(
        acc3 += x1 * c0;

        /* Read y[srcBLen - 4] sample */
-        c0 = *(py--);
+        c0 = *py--;

        /* Read x[6] sample */
        x2 = *(px + 3U);
@ -383,10 +383,10 @@ void arm_conv_f32(
      while (k > 0U)
      {
        /* Read y[srcBLen - 5] sample */
-        c0 = *(py--);
+        c0 = *py--;

        /* Read x[7] sample */
-        x3 = *(px++);
+        x3 = *px++;

        /* Perform the multiply-accumulates */
        /* acc0 +=  x[4] * y[srcBLen - 5] */
--- a/Source/FilteringFunctions/arm_conv_fast_q31.c
+++ b/Source/FilteringFunctions/arm_conv_fast_q31.c
@ -248,9 +248,9 @@ void arm_conv_fast_q31(
      acc3 = 0;

      /* read x[0], x[1], x[2] samples */
-      x0 = *(px++);
-      x1 = *(px++);
-      x2 = *(px++);
+      x0 = *px++;
+      x1 = *px++;
+      x2 = *px++;

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
      k = srcBLen >> 2U;
@ -260,10 +260,10 @@ void arm_conv_fast_q31(
      do
      {
        /* Read y[srcBLen - 1] sample */
-        c0 = *(py--);
+        c0 = *py--;

        /* Read x[3] sample */
-        x3 = *(px++);
+        x3 = *px++;

        /* Perform the multiply-accumulates */
        /* acc0 +=  x[0] * y[srcBLen - 1] */
@ -279,10 +279,10 @@ void arm_conv_fast_q31(
        acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x3 * c0)) >> 32);

        /* Read y[srcBLen - 2] sample */
-        c0 = *(py--);
+        c0 = *py--;

        /* Read x[4] sample */
-        x0 = *(px++);
+        x0 = *px++;

        /* Perform the multiply-accumulate */
        /* acc0 +=  x[1] * y[srcBLen - 2] */
@ -295,10 +295,10 @@ void arm_conv_fast_q31(
        acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x0 * c0)) >> 32);

        /* Read y[srcBLen - 3] sample */
-        c0 = *(py--);
+        c0 = *py--;

        /* Read x[5] sample */
-        x1 = *(px++);
+        x1 = *px++;

        /* Perform the multiply-accumulates */
        /* acc0 +=  x[2] * y[srcBLen - 3] */
@ -311,10 +311,10 @@ void arm_conv_fast_q31(
        acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x1 * c0)) >> 32);

        /* Read y[srcBLen - 4] sample */
-        c0 = *(py--);
+        c0 = *py--;

        /* Read x[6] sample */
-        x2 = *(px++);
+        x2 = *px++;

        /* Perform the multiply-accumulates */
        /* acc0 +=  x[3] * y[srcBLen - 4] */
@ -336,10 +336,10 @@ void arm_conv_fast_q31(
      while (k > 0U)
      {
        /* Read y[srcBLen - 5] sample */
-        c0 = *(py--);
+        c0 = *py--;

        /* Read x[7] sample */
-        x3 = *(px++);
+        x3 = *px++;

        /* Perform the multiply-accumulates */
        /* acc0 +=  x[4] * y[srcBLen - 5] */
--- a/Source/FilteringFunctions/arm_conv_partial_f32.c
+++ b/Source/FilteringFunctions/arm_conv_partial_f32.c
@ -263,12 +263,13 @@ arm_status arm_conv_partial_f32(
    /* Working pointer of inputA */
    if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
    {
-      px = pIn1 + firstIndex - srcBLen + 1;
+      pSrc1 = pIn1 + firstIndex - srcBLen + 1;
    }
    else
    {
-      px = pIn1;
+      pSrc1 = pIn1;
    }
+    px = pSrc1;

    /* Working pointer of inputB */
    pSrc2 = pIn2 + (srcBLen - 1U);
@ -298,9 +299,9 @@ arm_status arm_conv_partial_f32(
        acc3 = 0.0f;

        /* read x[0], x[1], x[2] samples */
-        x0 = *(px++);
-        x1 = *(px++);
-        x2 = *(px++);
+        x0 = *px++;
+        x1 = *px++;
+        x2 = *px++;

        /* Apply loop unrolling and compute 4 MACs simultaneously. */
        k = srcBLen >> 2U;
@ -310,10 +311,10 @@ arm_status arm_conv_partial_f32(
        do
        {
          /* Read y[srcBLen - 1] sample */
-          c0 = *(py--);
+          c0 = *py--;

          /* Read x[3] sample */
-          x3 = *(px++);
+          x3 = *px++;

          /* Perform the multiply-accumulate */
          /* acc0 +=  x[0] * y[srcBLen - 1] */
@ -329,10 +330,10 @@ arm_status arm_conv_partial_f32(
          acc3 += x3 * c0;

          /* Read y[srcBLen - 2] sample */
-          c0 = *(py--);
+          c0 = *py--;

          /* Read x[4] sample */
-          x0 = *(px++);
+          x0 = *px++;

          /* Perform the multiply-accumulate */
          /* acc0 +=  x[1] * y[srcBLen - 2] */
@ -345,10 +346,10 @@ arm_status arm_conv_partial_f32(
          acc3 += x0 * c0;

          /* Read y[srcBLen - 3] sample */
-          c0 = *(py--);
+          c0 = *py--;

          /* Read x[5] sample */
-          x1 = *(px++);
+          x1 = *px++;

          /* Perform the multiply-accumulates */
          /* acc0 +=  x[2] * y[srcBLen - 3] */
@ -361,10 +362,10 @@ arm_status arm_conv_partial_f32(
          acc3 += x1 * c0;

          /* Read y[srcBLen - 4] sample */
-          c0 = *(py--);
+          c0 = *py--;

          /* Read x[6] sample */
-          x2 = *(px++);
+          x2 = *px++;

          /* Perform the multiply-accumulates */
          /* acc0 +=  x[3] * y[srcBLen - 4] */
@ -386,10 +387,10 @@ arm_status arm_conv_partial_f32(
        while (k > 0U)
        {
          /* Read y[srcBLen - 5] sample */
-          c0 = *(py--);
+          c0 = *py--;

          /* Read x[7] sample */
-          x3 = *(px++);
+          x3 = *px++;

          /* Perform the multiply-accumulates */
          /* acc0 +=  x[4] * y[srcBLen - 5] */
@ -420,14 +421,7 @@ arm_status arm_conv_partial_f32(
        count += 4U;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
-        {
-          px = pIn1 + firstIndex - srcBLen + 1 + count;
-        }
-        else
-        {
-          px = pIn1 + count;
-        }
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
@ -480,14 +474,7 @@ arm_status arm_conv_partial_f32(
        count++;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
-        {
-          px = pIn1 + firstIndex - srcBLen + 1 + count;
-        }
-        else
-        {
-          px = pIn1 + count;
-        }
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
@ -524,14 +511,7 @@ arm_status arm_conv_partial_f32(
        count++;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
-        {
-          px = pIn1 + firstIndex - srcBLen + 1 + count;
-        }
-        else
-        {
-          px = pIn1 + count;
-        }
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
--- a/Source/FilteringFunctions/arm_conv_partial_fast_q15.c
+++ b/Source/FilteringFunctions/arm_conv_partial_fast_q15.c
@ -270,12 +270,13 @@ arm_status arm_conv_partial_fast_q15(
    /* Working pointer of inputA */
    if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
    {
-      px = pIn1 + firstIndex - srcBLen + 1;
+      pSrc1 = pIn1 + firstIndex - srcBLen + 1;
    }
    else
    {
-      px = pIn1;
+      pSrc1 = pIn1;
    }
+    px = pSrc1;

    /* Working pointer of inputB */
    pSrc2 = pIn2 + (srcBLen - 1U);
@ -473,7 +474,7 @@ arm_status arm_conv_partial_fast_q15(
        count += 4U;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        px = pIn1 + count;
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
@ -526,14 +527,7 @@ arm_status arm_conv_partial_fast_q15(
        count++;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
-        {
-          px = pIn1 + firstIndex - srcBLen + 1 + count;
-        }
-        else
-        {
-          px = pIn1 + count;
-        }
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
@ -570,14 +564,7 @@ arm_status arm_conv_partial_fast_q15(
        count++;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
-        {
-          px = pIn1 + firstIndex - srcBLen + 1 + count;
-        }
-        else
-        {
-          px = pIn1 + count;
-        }
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
@ -929,12 +916,13 @@ arm_status arm_conv_partial_fast_q15(
    /* Working pointer of inputA */
    if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
    {
-      px = pIn1 + firstIndex - srcBLen + 1;
+      pSrc1 = pIn1 + firstIndex - srcBLen + 1;
    }
    else
    {
-      px = pIn1;
+      pSrc1 = pIn1;
    }
+    px = pSrc1;

    /* Working pointer of inputB */
    pSrc2 = pIn2 + (srcBLen - 1U);
@ -1255,7 +1243,7 @@ arm_status arm_conv_partial_fast_q15(
        count += 4U;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        px = pIn1 + count;
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
@ -1308,7 +1296,7 @@ arm_status arm_conv_partial_fast_q15(
        count++;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        px = pIn1 + count;
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
@ -1345,7 +1333,7 @@ arm_status arm_conv_partial_fast_q15(
        count++;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        px = pIn1 + count;
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
--- a/Source/FilteringFunctions/arm_conv_partial_fast_q31.c
+++ b/Source/FilteringFunctions/arm_conv_partial_fast_q31.c
@ -234,12 +234,13 @@ arm_status arm_conv_partial_fast_q31(
    /* Working pointer of inputA */
    if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
    {
-      px = pIn1 + firstIndex - srcBLen + 1;
+      pSrc1 = pIn1 + firstIndex - srcBLen + 1;
    }
    else
    {
-      px = pIn1;
+      pSrc1 = pIn1;
    }
+    px = pSrc1;

    /* Working pointer of inputB */
    pSrc2 = pIn2 + (srcBLen - 1U);
@ -269,9 +270,9 @@ arm_status arm_conv_partial_fast_q31(
        acc3 = 0;

        /* read x[0], x[1], x[2] samples */
-        x0 = *(px++);
-        x1 = *(px++);
-        x2 = *(px++);
+        x0 = *px++;
+        x1 = *px++;
+        x2 = *px++;

        /* Apply loop unrolling and compute 4 MACs simultaneously. */
        k = srcBLen >> 2U;
@ -281,10 +282,10 @@ arm_status arm_conv_partial_fast_q31(
        do
        {
          /* Read y[srcBLen - 1] sample */
-          c0 = *(py--);
+          c0 = *py--;

          /* Read x[3] sample */
-          x3 = *(px++);
+          x3 = *px++;

          /* Perform the multiply-accumulate */
          /* acc0 +=  x[0] * y[srcBLen - 1] */
@ -300,10 +301,10 @@ arm_status arm_conv_partial_fast_q31(
          acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x3 * c0)) >> 32);

          /* Read y[srcBLen - 2] sample */
-          c0 = *(py--);
+          c0 = *py--;

          /* Read x[4] sample */
-          x0 = *(px++);
+          x0 = *px++;

          /* Perform the multiply-accumulate */
          /* acc0 +=  x[1] * y[srcBLen - 2] */
@ -316,10 +317,10 @@ arm_status arm_conv_partial_fast_q31(
          acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x0 * c0)) >> 32);

          /* Read y[srcBLen - 3] sample */
-          c0 = *(py--);
+          c0 = *py--;

          /* Read x[5] sample */
-          x1 = *(px++);
+          x1 = *px++;

          /* Perform the multiply-accumulates */
          /* acc0 +=  x[2] * y[srcBLen - 3] */
@ -332,10 +333,10 @@ arm_status arm_conv_partial_fast_q31(
          acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x1 * c0)) >> 32);

          /* Read y[srcBLen - 4] sample */
-          c0 = *(py--);
+          c0 = *py--;

          /* Read x[6] sample */
-          x2 = *(px++);
+          x2 = *px++;

          /* Perform the multiply-accumulates */
          /* acc0 +=  x[3] * y[srcBLen - 4] */
@ -357,10 +358,10 @@ arm_status arm_conv_partial_fast_q31(
        while (k > 0U)
        {
          /* Read y[srcBLen - 5] sample */
-          c0 = *(py--);
+          c0 = *py--;

          /* Read x[7] sample */
-          x3 = *(px++);
+          x3 = *px++;

          /* Perform the multiply-accumulates */
          /* acc0 +=  x[4] * y[srcBLen - 5] */
@ -391,14 +392,7 @@ arm_status arm_conv_partial_fast_q31(
        count += 4U;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
-        {
-          px = pIn1 + firstIndex - srcBLen + 1 + count;
-        }
-        else
-        {
-          px = pIn1 + count;
-        }
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
@ -456,14 +450,7 @@ arm_status arm_conv_partial_fast_q31(
        count++;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
-        {
-          px = pIn1 + firstIndex - srcBLen + 1 + count;
-        }
-        else
-        {
-          px = pIn1 + count;
-        }
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
@ -501,14 +488,7 @@ arm_status arm_conv_partial_fast_q31(
        count++;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
-        {
-          px = pIn1 + firstIndex - srcBLen + 1 + count;
-        }
-        else
-        {
-          px = pIn1 + count;
-        }
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
--- a/Source/FilteringFunctions/arm_conv_partial_q15.c
+++ b/Source/FilteringFunctions/arm_conv_partial_q15.c
@ -277,12 +277,13 @@ arm_status arm_conv_partial_q15(
    /* Working pointer of inputA */
    if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
    {
-      px = pIn1 + firstIndex - srcBLen + 1;
+      pSrc1 = pIn1 + firstIndex - srcBLen + 1;
    }
    else
    {
-      px = pIn1;
+      pSrc1 = pIn1;
    }
+    px = pSrc1;

    /* Working pointer of inputB */
    pSrc2 = pIn2 + (srcBLen - 1U);
@ -488,14 +489,7 @@ arm_status arm_conv_partial_q15(
      count += 4U;

      /* Update the inputA and inputB pointers for next MAC calculation */
-      if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
-      {
-        px = pIn1 + firstIndex - srcBLen + 1 + count;
-      }
-      else
-      {
-        px = pIn1 + count;
-      }
+      px = pSrc1 + count;
      py = pSrc2;

        /* Decrement the loop counter */
@ -548,14 +542,7 @@ arm_status arm_conv_partial_q15(
        count++;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
-        {
-          px = pIn1 + firstIndex - srcBLen + 1 + count;
-        }
-        else
-        {
-          px = pIn1 + count;
-        }
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
@ -592,14 +579,7 @@ arm_status arm_conv_partial_q15(
        count++;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
-        {
-          px = pIn1 + firstIndex - srcBLen + 1 + count;
-        }
-        else
-        {
-          px = pIn1 + count;
-        }
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
--- a/Source/FilteringFunctions/arm_conv_partial_q31.c
+++ b/Source/FilteringFunctions/arm_conv_partial_q31.c
@ -231,12 +231,13 @@ arm_status arm_conv_partial_q31(
    /* Working pointer of inputA */
    if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
    {
-      px = pIn1 + firstIndex - srcBLen + 1;
+      pSrc1 = pIn1 + firstIndex - srcBLen + 1;
    }
    else
    {
-      px = pIn1;
+      pSrc1 = pIn1;
    }
+    px = pSrc1;

    /* Working pointer of inputB */
    pSrc2 = pIn2 + (srcBLen - 1U);
@ -265,8 +266,8 @@ arm_status arm_conv_partial_q31(
        acc2 = 0;

        /* read x[0], x[1] samples */
-        x0 = *(px++);
-        x1 = *(px++);
+        x0 = *px++;
+        x1 = *px++;

        /* Apply loop unrolling and compute 3 MACs simultaneously. */
        k = srcBLen / 3;
@ -331,10 +332,10 @@ arm_status arm_conv_partial_q31(
        while (k > 0U)
        {
          /* Read y[srcBLen - 5] sample */
-          c0 = *(py--);
+          c0 = *py--;

          /* Read x[7] sample */
-          x2 = *(px++);
+          x2 = *px++;

          /* Perform the multiply-accumulates */
          /* acc0 +=  x[4] * y[srcBLen - 5] */
@ -361,14 +362,7 @@ arm_status arm_conv_partial_q31(
        count += 3U;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
-        {
-          px = pIn1 + firstIndex - srcBLen + 1 + count;
-        }
-        else
-        {
-          px = pIn1 + count;
-        }
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
@ -421,14 +415,7 @@ arm_status arm_conv_partial_q31(
        count++;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
-        {
-          px = pIn1 + firstIndex - srcBLen + 1 + count;
-        }
-        else
-        {
-          px = pIn1 + count;
-        }
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
@ -465,14 +452,7 @@ arm_status arm_conv_partial_q31(
        count++;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
-        {
-          px = pIn1 + firstIndex - srcBLen + 1 + count;
-        }
-        else
-        {
-          px = pIn1 + count;
-        }
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
--- a/Source/FilteringFunctions/arm_conv_partial_q7.c
+++ b/Source/FilteringFunctions/arm_conv_partial_q7.c
@ -254,12 +254,13 @@ arm_status arm_conv_partial_q7(
    /* Working pointer of inputA */
    if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
    {
-      px = pIn1 + firstIndex - srcBLen + 1;
+      pSrc1 = pIn1 + firstIndex - srcBLen + 1;
    }
    else
    {
-      px = pIn1;
+      pSrc1 = pIn1;
    }
+    px = pSrc1;

    /* Working pointer of inputB */
    pSrc2 = pIn2 + (srcBLen - 1U);
@ -289,9 +290,9 @@ arm_status arm_conv_partial_q7(
        acc3 = 0;

        /* read x[0], x[1], x[2] samples */
-        x0 = *(px++);
-        x1 = *(px++);
-        x2 = *(px++);
+        x0 = *px++;
+        x1 = *px++;
+        x2 = *px++;

        /* Apply loop unrolling and compute 4 MACs simultaneously. */
        k = srcBLen >> 2U;
@ -301,12 +302,12 @@ arm_status arm_conv_partial_q7(
        do
        {
          /* Read y[srcBLen - 1] sample */
-          c0 = *(py--);
+          c0 = *py--;
          /* Read y[srcBLen - 2] sample */
-          c1 = *(py--);
+          c1 = *py--;

          /* Read x[3] sample */
-          x3 = *(px++);
+          x3 = *px++;

          /* x[0] and x[1] are packed */
          in1 = (q15_t) x0;
@ -342,7 +343,7 @@ arm_status arm_conv_partial_q7(
          acc2 = __SMLAD(input1, input2, acc2);

          /* Read x[4] sample */
-          x0 = *(px++);
+          x0 = *px++;

          /* x[3] and x[4] are packed */
          in1 = (q15_t) x3;
@ -354,12 +355,12 @@ arm_status arm_conv_partial_q7(
          acc3 = __SMLAD(input1, input2, acc3);

          /* Read y[srcBLen - 3] sample */
-          c0 = *(py--);
+          c0 = *py--;
          /* Read y[srcBLen - 4] sample */
-          c1 = *(py--);
+          c1 = *py--;

          /* Read x[5] sample */
-          x1 = *(px++);
+          x1 = *px++;

          /* x[2] and x[3] are packed */
          in1 = (q15_t) x2;
@ -395,7 +396,7 @@ arm_status arm_conv_partial_q7(
          acc2 = __SMLAD(input1, input2, acc2);

          /* Read x[6] sample */
-          x2 = *(px++);
+          x2 = *px++;

          /* x[5] and x[6] are packed */
          in1 = (q15_t) x1;
@ -415,10 +416,10 @@ arm_status arm_conv_partial_q7(
        while (k > 0U)
        {
          /* Read y[srcBLen - 5] sample */
-          c0 = *(py--);
+          c0 = *py--;

          /* Read x[7] sample */
-          x3 = *(px++);
+          x3 = *px++;

          /* Perform the multiply-accumulates */
          /* acc0 +=  x[4] * y[srcBLen - 5] */
@ -449,14 +450,7 @@ arm_status arm_conv_partial_q7(
        count += 4U;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
-        {
-          px = pIn1 + firstIndex - srcBLen + 1 + count;
-        }
-        else
-        {
-          px = pIn1 + count;
-        }
+        px = pSrc1 + count;
        py = pSrc2;


@ -531,14 +525,7 @@ arm_status arm_conv_partial_q7(
 	    count++;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
-        {
-          px = pIn1 + firstIndex - srcBLen + 1 + count;
-        }
-        else
-        {
-          px = pIn1 + count;
-        }
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
@ -575,14 +562,7 @@ arm_status arm_conv_partial_q7(
        count++;

        /* Update the inputA and inputB pointers for next MAC calculation */
-        if ((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
-        {
-          px = pIn1 + firstIndex - srcBLen + 1 + count;
-        }
-        else
-        {
-          px = pIn1 + count;
-        }
+        px = pSrc1 + count;
        py = pSrc2;

        /* Decrement the loop counter */
--- a/Source/FilteringFunctions/arm_conv_q31.c
+++ b/Source/FilteringFunctions/arm_conv_q31.c
@ -245,8 +245,8 @@ void arm_conv_q31(
      acc2 = 0;

      /* read x[0], x[1], x[2] samples */
-      x0 = *(px++);
-      x1 = *(px++);
+      x0 = *px++;
+      x1 = *px++;

      /* Apply loop unrolling and compute 3 MACs simultaneously. */
      k = srcBLen / 3;
@ -310,10 +310,10 @@ void arm_conv_q31(
      while (k > 0U)
      {
        /* Read y[srcBLen - 5] sample */
-        c0 = *(py--);
+        c0 = *py--;

        /* Read x[7] sample */
-        x2 = *(px++);
+        x2 = *px++;

        /* Perform the multiply-accumulates */
        /* acc0 +=  x[4] * y[srcBLen - 5] */
--- a/Source/FilteringFunctions/arm_conv_q7.c
+++ b/Source/FilteringFunctions/arm_conv_q7.c
@ -264,9 +264,9 @@ void arm_conv_q7(
      acc3 = 0;

      /* read x[0], x[1], x[2] samples */
-      x0 = *(px++);
-      x1 = *(px++);
-      x2 = *(px++);
+      x0 = *px++;
+      x1 = *px++;
+      x2 = *px++;

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
      k = srcBLen >> 2U;
@ -276,12 +276,12 @@ void arm_conv_q7(
      do
      {
        /* Read y[srcBLen - 1] sample */
-        c0 = *(py--);
+        c0 = *py--;
        /* Read y[srcBLen - 2] sample */
-        c1 = *(py--);
+        c1 = *py--;

        /* Read x[3] sample */
-        x3 = *(px++);
+        x3 = *px++;

        /* x[0] and x[1] are packed */
        in1 = (q15_t) x0;
@ -317,7 +317,7 @@ void arm_conv_q7(
        acc2 = __SMLAD(input1, input2, acc2);

        /* Read x[4] sample */
-        x0 = *(px++);
+        x0 = *px++;

        /* x[3] and x[4] are packed */
        in1 = (q15_t) x3;
@ -329,12 +329,12 @@ void arm_conv_q7(
        acc3 = __SMLAD(input1, input2, acc3);

        /* Read y[srcBLen - 3] sample */
-        c0 = *(py--);
+        c0 = *py--;
        /* Read y[srcBLen - 4] sample */
-        c1 = *(py--);
+        c1 = *py--;

        /* Read x[5] sample */
-        x1 = *(px++);
+        x1 = *px++;

        /* x[2] and x[3] are packed */
        in1 = (q15_t) x2;
@ -370,7 +370,7 @@ void arm_conv_q7(
        acc2 = __SMLAD(input1, input2, acc2);

        /* Read x[6] sample */
-        x2 = *(px++);
+        x2 = *px++;

        /* x[5] and x[6] are packed */
        in1 = (q15_t) x1;
@ -390,10 +390,10 @@ void arm_conv_q7(
      while (k > 0U)
      {
        /* Read y[srcBLen - 5] sample */
-        c0 = *(py--);
+        c0 = *py--;

        /* Read x[7] sample */
-        x3 = *(px++);
+        x3 = *px++;

        /* Perform the multiply-accumulates */
        /* acc0 +=  x[4] * y[srcBLen - 5] */