Global MISRA-C Rule 10.6 fix up: Unsigned constant values with U suffix, uppercase instead of lowercase. (Issue #227)

9 years ago · 2208df0e14
parent b665acba90
commit 2208df0e14
264 changed files with 4691 additions and 4691 deletions
--- a/DSP_Lib_TestSuite/Common/src/transform_tests/cfft_family_tests.c
+++ b/DSP_Lib_TestSuite/Common/src/transform_tests/cfft_family_tests.c
@ -151,8 +151,8 @@
    /*                         TYPE_FROM_ABBREV(q15),                  \ */
    /*                         ifft_flag) */

-CFFT_FAMILY_DEFINE_ALL_TESTS(forward, 0u);
-CFFT_FAMILY_DEFINE_ALL_TESTS(inverse, 1u);
+CFFT_FAMILY_DEFINE_ALL_TESTS(forward, 0U);
+CFFT_FAMILY_DEFINE_ALL_TESTS(inverse, 1U);

 /*--------------------------------------------------------------------------------*/
 /* Collect all tests in a group */
--- a/DSP_Lib_TestSuite/Common/src/transform_tests/rfft_fast_tests.c
+++ b/DSP_Lib_TestSuite/Common/src/transform_tests/rfft_fast_tests.c
@ -61,8 +61,8 @@ FFT fast function test template. Arguments are: function configuration suffix
        return JTEST_TEST_PASSED;                                       \
    }

-RFFT_FAST_DEFINE_TEST(forward, 0u);
-RFFT_FAST_DEFINE_TEST(inverse, 1u);
+RFFT_FAST_DEFINE_TEST(forward, 0U);
+RFFT_FAST_DEFINE_TEST(inverse, 1U);

 /*--------------------------------------------------------------------------------*/
 /* Collect all tests in a group */
--- a/DSP_Lib_TestSuite/Common/src/transform_tests/rfft_tests.c
+++ b/DSP_Lib_TestSuite/Common/src/transform_tests/rfft_tests.c
@ -26,11 +26,11 @@
            /* Initialize the RFFT and CFFT Instances */                \
            arm_rfft_init_##suffix(                                     \
                &rfft_inst_fut,                                         \
-                (uint32_t) fftlen, ifft_flag, 1u);                      \
+                (uint32_t) fftlen, ifft_flag, 1U);                      \
                                                                        \
            arm_rfft_init_##suffix(                                     \
                &rfft_inst_ref,                                         \
-                (uint32_t) fftlen, ifft_flag, 1u);                      \
+                (uint32_t) fftlen, ifft_flag, 1U);                      \
                                                                        \
            if (ifft_flag)                                               \
            {                                                           \
@ -74,11 +74,11 @@
            return JTEST_TEST_PASSED;                                   \
    }

-RFFT_DEFINE_TEST(q31, forward, 0u, TYPE_FROM_ABBREV(q31), TYPE_FROM_ABBREV(q31));
-RFFT_DEFINE_TEST(q15, forward, 0u, TYPE_FROM_ABBREV(q15), TYPE_FROM_ABBREV(q15));
-//RFFT_DEFINE_TEST(f32, inverse, 1u, TYPE_FROM_ABBREV(f32), TYPE_FROM_ABBREV(f32));
-RFFT_DEFINE_TEST(q31, inverse, 1u, TYPE_FROM_ABBREV(q31), TYPE_FROM_ABBREV(q31));
-RFFT_DEFINE_TEST(q15, inverse, 1u, TYPE_FROM_ABBREV(q15), TYPE_FROM_ABBREV(q15));
+RFFT_DEFINE_TEST(q31, forward, 0U, TYPE_FROM_ABBREV(q31), TYPE_FROM_ABBREV(q31));
+RFFT_DEFINE_TEST(q15, forward, 0U, TYPE_FROM_ABBREV(q15), TYPE_FROM_ABBREV(q15));
+//RFFT_DEFINE_TEST(f32, inverse, 1U, TYPE_FROM_ABBREV(f32), TYPE_FROM_ABBREV(f32));
+RFFT_DEFINE_TEST(q31, inverse, 1U, TYPE_FROM_ABBREV(q31), TYPE_FROM_ABBREV(q31));
+RFFT_DEFINE_TEST(q15, inverse, 1U, TYPE_FROM_ABBREV(q15), TYPE_FROM_ABBREV(q15));

 /*--------------------------------------------------------------------------------*/
 /* Collect all tests in a group */
--- a/DSP_Lib_TestSuite/RefLibs/src/ControllerFunctions/pid.c
+++ b/DSP_Lib_TestSuite/RefLibs/src/ControllerFunctions/pid.c
@ -35,7 +35,7 @@ q31_t ref_pid_q31(
 	acc += (q63_t) S->A2 * S->state[1];

 	/* convert output to 1.31 format to add y[n-1] */
-	out = (q31_t) (acc >> 31u);
+	out = (q31_t) (acc >> 31U);

 	/* out += y[n-1] */
 	out += S->state[2];
--- a/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/biquad.c
+++ b/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/biquad.c
@ -31,7 +31,7 @@ void ref_biquad_cascade_df2T_f32(

      sample = blockSize;

-      while (sample > 0u)
+      while (sample > 0U)
      {
         /* Read the input */
         Xn = *pIn++;
@ -66,7 +66,7 @@ void ref_biquad_cascade_df2T_f32(
      /* decrement the loop counter */
      stage--;

-   } while (stage > 0u);
+   } while (stage > 0U);
 }


@ -103,7 +103,7 @@ void ref_biquad_cascade_stereo_df2T_f32(

        sample = blockSize;

-        while (sample > 0u)
+        while (sample > 0U)
        {
            /* Read the input */
            Xn1a = *pIn++; //Channel a
@ -145,7 +145,7 @@ void ref_biquad_cascade_stereo_df2T_f32(
        /* decrement the loop counter */
        stage--;

-    } while (stage > 0u);
+    } while (stage > 0U);
 	
 }

@ -180,7 +180,7 @@ void ref_biquad_cascade_df2T_f64(

      sample = blockSize;

-      while (sample > 0u)
+      while (sample > 0U)
      {
         /* Read the input */
         Xn = *pIn++;
@ -215,7 +215,7 @@ void ref_biquad_cascade_df2T_f64(
      /* decrement the loop counter */
      stage--;

-   } while (stage > 0u);
+   } while (stage > 0U);
 }

 void ref_biquad_cascade_df1_f32(
@ -255,7 +255,7 @@ void ref_biquad_cascade_df1_f32(

    sample = blockSize;

-    while (sample > 0u)
+    while (sample > 0U)
    {
      /* Read the input */
      Xn = *pIn++;
@ -297,7 +297,7 @@ void ref_biquad_cascade_df1_f32(
    /* decrement the loop counter */
    stage--;

-  } while (stage > 0u);
+  } while (stage > 0U);
 }

 void ref_biquad_cas_df1_32x64_q31(
@ -318,8 +318,8 @@ void ref_biquad_cas_df1_32x64_q31(
  int32_t shift = (int32_t) S->postShift + 1;    /*  Shift to be applied to the output 	*/
  uint32_t sample, stage = S->numStages;         /*  loop counters                     	*/
  q31_t acc_l, acc_h;                            /*  temporary output               		*/
-  uint32_t uShift = ((uint32_t) S->postShift + 1u);
-  uint32_t lShift = 32u - uShift;                /*  Shift to be applied to the output 	*/
+  uint32_t uShift = ((uint32_t) S->postShift + 1U);
+  uint32_t lShift = 32U - uShift;                /*  Shift to be applied to the output 	*/

  do
  {
@ -338,7 +338,7 @@ void ref_biquad_cas_df1_32x64_q31(

    sample = blockSize;

-    while (sample > 0u)
+    while (sample > 0U)
    {
      /* Read the input */
      Xn = *pIn++;
@ -396,8 +396,8 @@ void ref_biquad_cascade_df1_q31(
  uint32_t blockSize)
 {	
  q63_t acc;                                     /*  accumulator                   */
-  uint32_t uShift = ((uint32_t) S->postShift + 1u);
-  uint32_t lShift = 32u - uShift;                /*  Shift to be applied to the output */
+  uint32_t uShift = ((uint32_t) S->postShift + 1U);
+  uint32_t lShift = 32U - uShift;                /*  Shift to be applied to the output */
  q31_t *pIn = pSrc;                             /*  input pointer initialization  */
  q31_t *pOut = pDst;                            /*  output pointer initialization */
  q31_t *pState = S->pState;                     /*  pState pointer initialization */
@ -428,7 +428,7 @@ void ref_biquad_cascade_df1_q31(

    sample = blockSize;

-    while (sample > 0u)
+    while (sample > 0U)
    {
      /* Read the input */
      Xn = *pIn++;
@ -518,7 +518,7 @@ void ref_biquad_cascade_df1_fast_q31(
 		
    sample = blockSize;

-   while (sample > 0u)
+   while (sample > 0U)
   {
      /* Read the input */
      Xn = *pIn++;
@ -597,7 +597,7 @@ void ref_biquad_cascade_df1_fast_q15(

    sample = blockSize;

-    while (sample > 0u)
+    while (sample > 0U)
    {
      /* Read the input */
      Xn = *pIn++;
@ -672,7 +672,7 @@ void ref_biquad_cascade_df1_q15(

    sample = blockSize;

-    while (sample > 0u)
+    while (sample > 0U)
    {
      /* Read the input */
      Xn = *pIn++;
--- a/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/conv.c
+++ b/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/conv.c
@ -73,7 +73,7 @@ void ref_conv_q31(
    }

    /* Store the output in the destination buffer */
-    pDst[i] = (q31_t)(sum >> 31u);
+    pDst[i] = (q31_t)(sum >> 31U);
  }
 }

@ -106,7 +106,7 @@ void ref_conv_fast_q31(
    }

    /* Store the output in the destination buffer */
-    pDst[i] = (q31_t)(sum << 1u);
+    pDst[i] = (q31_t)(sum << 1U);
  }
 }

@ -166,7 +166,7 @@ void ref_conv_q15(
    }

    /* Store the output in the destination buffer */
-    pDst[i] = ref_sat_q15(sum >> 15u);
+    pDst[i] = ref_sat_q15(sum >> 15U);
  }
 }

@ -202,7 +202,7 @@ arm_status ref_conv_partial_fast_opt_q15(
    }

    /* Store the output in the destination buffer */
-    pDst[i] = ref_sat_q15(sum >> 15u);
+    pDst[i] = ref_sat_q15(sum >> 15U);
  }
 	
  return ARM_MATH_SUCCESS;
@ -236,7 +236,7 @@ void ref_conv_fast_q15(
    }

    /* Store the output in the destination buffer */
-    pDst[i] = sum >> 15u;
+    pDst[i] = sum >> 15U;
  }
 }

@ -270,7 +270,7 @@ void ref_conv_fast_opt_q15(
    }

    /* Store the output in the destination buffer */
-    pDst[i] = ref_sat_q15(sum >> 15u);
+    pDst[i] = ref_sat_q15(sum >> 15U);
  }
 }

--- a/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/correlate.c
+++ b/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/correlate.c
@ -8,11 +8,11 @@ void ref_correlate_f32(
  float32_t * pDst)
 {
  float32_t *pIn1 = pSrcA;                       /* inputA pointer 			*/
-  float32_t *pIn2 = pSrcB + (srcBLen - 1u);      /* inputB pointer 			*/
+  float32_t *pIn2 = pSrcB + (srcBLen - 1U);      /* inputB pointer 			*/
  float32_t sum;                                 /* Accumulator 				*/
-  uint32_t i = 0u, j;                            /* loop counters 			*/
-  uint32_t inv = 0u;                             /* Reverse order flag 	*/
-  uint32_t tot = 0u;                             /* Length 							*/
+  uint32_t i = 0U, j;                            /* loop counters 			*/
+  uint32_t inv = 0U;                             /* Reverse order flag 	*/
+  uint32_t tot = 0U;                             /* Length 							*/

  /* The algorithm implementation is based on the lengths of the inputs. 
   * srcB is always made to slide across srcA. 
@ -32,7 +32,7 @@ void ref_correlate_f32(
 	 */

  /* Calculate the length of the remaining sequence */
-  tot = srcALen + srcBLen - 2u;
+  tot = srcALen + srcBLen - 2U;

  if (srcALen > srcBLen)
  {
@ -46,7 +46,7 @@ void ref_correlate_f32(
    pIn1 = pSrcB;

    /* Initialization to the end of inputA pointer */
-    pIn2 = pSrcA + srcALen - 1u;
+    pIn2 = pSrcA + srcALen - 1U;

    /* Initialisation of the pointer after zero padding */
    pDst += tot;
@ -61,13 +61,13 @@ void ref_correlate_f32(
  }

  /* Loop to calculate convolution for output length number of times */
-  for (i = 0u; i <= tot; i++)
+  for (i = 0U; i <= tot; i++)
  {
    /* Initialize sum with zero to carry on MAC operations */
    sum = 0.0f;

    /* Loop to perform MAC operations according to convolution equation */
-    for (j = 0u; j <= i; j++)
+    for (j = 0U; j <= i; j++)
    {
      /* Check the array limitations */
      if ((i - j < srcBLen) && (j < srcALen))
@ -92,14 +92,14 @@ void ref_correlate_q31(
  q31_t * pDst)
 {
  q31_t *pIn1 = pSrcA;                           /* inputA pointer               */
-  q31_t *pIn2 = pSrcB + (srcBLen - 1u);          /* inputB pointer               */
+  q31_t *pIn2 = pSrcB + (srcBLen - 1U);          /* inputB pointer               */
  q63_t sum;                                     /* Accumulators                  */
-  uint32_t i = 0u, j;                            /* loop counters */
-  uint32_t inv = 0u;                             /* Reverse order flag */
-  uint32_t tot = 0u;                             /* Length */
+  uint32_t i = 0U, j;                            /* loop counters */
+  uint32_t inv = 0U;                             /* Reverse order flag */
+  uint32_t tot = 0U;                             /* Length */

  /* Calculate the length of the remaining sequence */
-  tot = ((srcALen + srcBLen) - 2u);
+  tot = ((srcALen + srcBLen) - 2U);

  if (srcALen > srcBLen)
  {
@ -116,7 +116,7 @@ void ref_correlate_q31(
    pIn1 = pSrcB;

    /* Initialization to the end of inputA pointer */
-    pIn2 = pSrcA + (srcALen - 1u);
+    pIn2 = pSrcA + (srcALen - 1U);

    /* Initialisation of the pointer after zero padding */
    pDst = pDst + tot;
@ -132,13 +132,13 @@ void ref_correlate_q31(
  }

  /* Loop to calculate correlation for output length number of times */
-  for (i = 0u; i <= tot; i++)
+  for (i = 0U; i <= tot; i++)
  {
    /* Initialize sum with zero to carry on MAC operations */
    sum = 0;

    /* Loop to perform MAC operations according to correlation equation */
-    for (j = 0u; j <= i; j++)
+    for (j = 0U; j <= i; j++)
    {
      /* Check the array limitations */
      if ((((i - j) < srcBLen) && (j < srcALen)))
@ -149,9 +149,9 @@ void ref_correlate_q31(
    }
    /* Store the output in the destination buffer */
    if (inv == 1)
-      *pDst-- = (q31_t)(sum >> 31u);
+      *pDst-- = (q31_t)(sum >> 31U);
    else
-      *pDst++ = (q31_t)(sum >> 31u);
+      *pDst++ = (q31_t)(sum >> 31U);
  }
 }

@ -163,14 +163,14 @@ void ref_correlate_fast_q31(
  q31_t * pDst)
 {
  q31_t *pIn1 = pSrcA;                           /* inputA pointer               */
-  q31_t *pIn2 = pSrcB + (srcBLen - 1u);          /* inputB pointer               */
+  q31_t *pIn2 = pSrcB + (srcBLen - 1U);          /* inputB pointer               */
  q63_t sum;                                     /* Accumulators                  */
-  uint32_t i = 0u, j;                            /* loop counters */
-  uint32_t inv = 0u;                             /* Reverse order flag */
-  uint32_t tot = 0u;                             /* Length */
+  uint32_t i = 0U, j;                            /* loop counters */
+  uint32_t inv = 0U;                             /* Reverse order flag */
+  uint32_t tot = 0U;                             /* Length */

  /* Calculate the length of the remaining sequence */
-  tot = ((srcALen + srcBLen) - 2u);
+  tot = ((srcALen + srcBLen) - 2U);

  if (srcALen > srcBLen)
  {
@ -187,7 +187,7 @@ void ref_correlate_fast_q31(
    pIn1 = pSrcB;

    /* Initialization to the end of inputA pointer */
-    pIn2 = pSrcA + (srcALen - 1u);
+    pIn2 = pSrcA + (srcALen - 1U);

    /* Initialisation of the pointer after zero padding */
    pDst = pDst + tot;
@ -203,13 +203,13 @@ void ref_correlate_fast_q31(
  }

  /* Loop to calculate correlation for output length number of times */
-  for (i = 0u; i <= tot; i++)
+  for (i = 0U; i <= tot; i++)
  {
    /* Initialize sum with zero to carry on MAC operations */
    sum = 0;

    /* Loop to perform MAC operations according to correlation equation */
-    for (j = 0u; j <= i; j++)
+    for (j = 0U; j <= i; j++)
    {
      /* Check the array limitations */
      if ((((i - j) < srcBLen) && (j < srcALen)))
@ -221,9 +221,9 @@ void ref_correlate_fast_q31(
    }
    /* Store the output in the destination buffer */
    if (inv == 1)
-      *pDst-- = (q31_t)(sum << 1u);
+      *pDst-- = (q31_t)(sum << 1U);
    else
-      *pDst++ = (q31_t)(sum << 1u);
+      *pDst++ = (q31_t)(sum << 1U);
  }          
 }

@ -235,14 +235,14 @@ void ref_correlate_q15(
  q15_t * pDst)
 {
  q15_t *pIn1 = pSrcA;                           /* inputA pointer               */
-  q15_t *pIn2 = pSrcB + (srcBLen - 1u);          /* inputB pointer               */
+  q15_t *pIn2 = pSrcB + (srcBLen - 1U);          /* inputB pointer               */
  q63_t sum;                                     /* Accumulators                  */
-  uint32_t i = 0u, j;                            /* loop counters */
-  uint32_t inv = 0u;                             /* Reverse order flag */
-  uint32_t tot = 0u;                             /* Length */
+  uint32_t i = 0U, j;                            /* loop counters */
+  uint32_t inv = 0U;                             /* Reverse order flag */
+  uint32_t tot = 0U;                             /* Length */

  /* Calculate the length of the remaining sequence */
-  tot = ((srcALen + srcBLen) - 2u);
+  tot = ((srcALen + srcBLen) - 2U);

  if (srcALen > srcBLen)
  {
@ -259,7 +259,7 @@ void ref_correlate_q15(
    pIn1 = pSrcB;

    /* Initialization to the end of inputA pointer */
-    pIn2 = pSrcA + (srcALen - 1u);
+    pIn2 = pSrcA + (srcALen - 1U);

    /* Initialisation of the pointer after zero padding */
    pDst = pDst + tot;
@ -275,13 +275,13 @@ void ref_correlate_q15(
  }

  /* Loop to calculate convolution for output length number of times */
-  for (i = 0u; i <= tot; i++)
+  for (i = 0U; i <= tot; i++)
  {
    /* Initialize sum with zero to carry on MAC operations */
    sum = 0;

    /* Loop to perform MAC operations according to convolution equation */
-    for (j = 0u; j <= i; j++)
+    for (j = 0U; j <= i; j++)
    {
      /* Check the array limitations */
      if ((((i - j) < srcBLen) && (j < srcALen)))
@ -292,9 +292,9 @@ void ref_correlate_q15(
    }
    /* Store the output in the destination buffer */
    if (inv == 1)
-      *pDst-- = (q15_t) ref_sat_q15(sum >> 15u);
+      *pDst-- = (q15_t) ref_sat_q15(sum >> 15U);
    else
-      *pDst++ = (q15_t) ref_sat_q15(sum >> 15u);
+      *pDst++ = (q15_t) ref_sat_q15(sum >> 15U);
  }
 }

@ -306,14 +306,14 @@ void ref_correlate_fast_q15(
  q15_t * pDst)
 {
  q15_t *pIn1 = pSrcA;                           /* inputA pointer               */
-  q15_t *pIn2 = pSrcB + (srcBLen - 1u);          /* inputB pointer               */
+  q15_t *pIn2 = pSrcB + (srcBLen - 1U);          /* inputB pointer               */
  q63_t sum;                                     /* Accumulators                  */
-  uint32_t i = 0u, j;                            /* loop counters */
-  uint32_t inv = 0u;                             /* Reverse order flag */
-  uint32_t tot = 0u;                             /* Length */
+  uint32_t i = 0U, j;                            /* loop counters */
+  uint32_t inv = 0U;                             /* Reverse order flag */
+  uint32_t tot = 0U;                             /* Length */

  /* Calculate the length of the remaining sequence */
-  tot = ((srcALen + srcBLen) - 2u);
+  tot = ((srcALen + srcBLen) - 2U);

  if (srcALen > srcBLen)
  {
@ -330,7 +330,7 @@ void ref_correlate_fast_q15(
    pIn1 = pSrcB;

    /* Initialization to the end of inputA pointer */
-    pIn2 = pSrcA + (srcALen - 1u);
+    pIn2 = pSrcA + (srcALen - 1U);

    /* Initialisation of the pointer after zero padding */
    pDst = pDst + tot;
@ -346,13 +346,13 @@ void ref_correlate_fast_q15(
  }

  /* Loop to calculate convolution for output length number of times */
-  for (i = 0u; i <= tot; i++)
+  for (i = 0U; i <= tot; i++)
  {
    /* Initialize sum with zero to carry on MAC operations */
    sum = 0;

    /* Loop to perform MAC operations according to convolution equation */
-    for (j = 0u; j <= i; j++)
+    for (j = 0U; j <= i; j++)
    {
      /* Check the array limitations */
      if ((((i - j) < srcBLen) && (j < srcALen)))
@ -363,9 +363,9 @@ void ref_correlate_fast_q15(
    }
    /* Store the output in the destination buffer */
    if (inv == 1)
-      *pDst-- = (q15_t)(sum >> 15u);
+      *pDst-- = (q15_t)(sum >> 15U);
    else
-      *pDst++ = (q15_t)(sum >> 15u);
+      *pDst++ = (q15_t)(sum >> 15U);
  }
 }

@ -378,14 +378,14 @@ void ref_correlate_fast_opt_q15(
  q15_t * pScratch)
 {
  q15_t *pIn1 = pSrcA;                           /* inputA pointer               */
-  q15_t *pIn2 = pSrcB + (srcBLen - 1u);          /* inputB pointer               */
+  q15_t *pIn2 = pSrcB + (srcBLen - 1U);          /* inputB pointer               */
  q31_t sum;                                     /* Accumulators                  */
-  uint32_t i = 0u, j;                            /* loop counters */
-  uint32_t inv = 0u;                             /* Reverse order flag */
-  uint32_t tot = 0u;                             /* Length */
+  uint32_t i = 0U, j;                            /* loop counters */
+  uint32_t inv = 0U;                             /* Reverse order flag */
+  uint32_t tot = 0U;                             /* Length */

  /* Calculate the length of the remaining sequence */
-  tot = ((srcALen + srcBLen) - 2u);
+  tot = ((srcALen + srcBLen) - 2U);

  if (srcALen > srcBLen)
  {
@ -402,7 +402,7 @@ void ref_correlate_fast_opt_q15(
    pIn1 = pSrcB;

    /* Initialization to the end of inputA pointer */
-    pIn2 = pSrcA + (srcALen - 1u);
+    pIn2 = pSrcA + (srcALen - 1U);

    /* Initialisation of the pointer after zero padding */
    pDst = pDst + tot;
@ -418,13 +418,13 @@ void ref_correlate_fast_opt_q15(
  }

  /* Loop to calculate convolution for output length number of times */
-  for (i = 0u; i <= tot; i++)
+  for (i = 0U; i <= tot; i++)
  {
    /* Initialize sum with zero to carry on MAC operations */
    sum = 0;

    /* Loop to perform MAC operations according to convolution equation */
-    for (j = 0u; j <= i; j++)
+    for (j = 0U; j <= i; j++)
    {
      /* Check the array limitations */
      if ((((i - j) < srcBLen) && (j < srcALen)))
@ -435,9 +435,9 @@ void ref_correlate_fast_opt_q15(
    }
    /* Store the output in the destination buffer */
    if (inv == 1)
-      *pDst-- = (q15_t) ref_sat_q15(sum >> 15u);
+      *pDst-- = (q15_t) ref_sat_q15(sum >> 15U);
    else
-      *pDst++ = (q15_t) ref_sat_q15(sum >> 15u);
+      *pDst++ = (q15_t) ref_sat_q15(sum >> 15U);
  }
 }

@ -449,14 +449,14 @@ void ref_correlate_q7(
  q7_t * pDst)
 {
  q7_t *pIn1 = pSrcA;                            /* inputA pointer */
-  q7_t *pIn2 = pSrcB + (srcBLen - 1u);           /* inputB pointer */
+  q7_t *pIn2 = pSrcB + (srcBLen - 1U);           /* inputB pointer */
  q31_t sum;                                     /* Accumulator */
-  uint32_t i = 0u, j;                            /* loop counters */
-  uint32_t inv = 0u;                             /* Reverse order flag */
-  uint32_t tot = 0u;                             /* Length */
+  uint32_t i = 0U, j;                            /* loop counters */
+  uint32_t inv = 0U;                             /* Reverse order flag */
+  uint32_t tot = 0U;                             /* Length */

  /* Calculate the length of the remaining sequence */
-  tot = ((srcALen + srcBLen) - 2u);
+  tot = ((srcALen + srcBLen) - 2U);

  if (srcALen > srcBLen)
  {
@ -473,7 +473,7 @@ void ref_correlate_q7(
    pIn1 = pSrcB;

    /* Initialization to the end of inputA pointer */
-    pIn2 = pSrcA + (srcALen - 1u);
+    pIn2 = pSrcA + (srcALen - 1U);

    /* Initialisation of the pointer after zero padding */
    pDst = pDst + tot;
@ -489,13 +489,13 @@ void ref_correlate_q7(
  }

  /* Loop to calculate convolution for output length number of times */
-  for (i = 0u; i <= tot; i++)
+  for (i = 0U; i <= tot; i++)
  {
    /* Initialize sum with zero to carry on MAC operations */
    sum = 0;

    /* Loop to perform MAC operations according to convolution equation */
-    for (j = 0u; j <= i; j++)
+    for (j = 0U; j <= i; j++)
    {
      /* Check the array limitations */
      if ((((i - j) < srcBLen) && (j < srcALen)))
@ -506,8 +506,8 @@ void ref_correlate_q7(
    }
    /* Store the output in the destination buffer */
    if (inv == 1)
-      *pDst-- = (q7_t) __SSAT((sum >> 7u), 8u);
+      *pDst-- = (q7_t) __SSAT((sum >> 7U), 8U);
    else
-      *pDst++ = (q7_t) __SSAT((sum >> 7u), 8u);
+      *pDst++ = (q7_t) __SSAT((sum >> 7U), 8U);
  }
 }
--- a/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir.c
+++ b/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir.c
@ -15,9 +15,9 @@ void ref_fir_f32(

   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
   /* pStateCurnt points to the location where the new input data should be written */
-   pStateCurnt = &(S->pState[(numTaps - 1u)]);
+   pStateCurnt = &(S->pState[(numTaps - 1U)]);

-   while (blockSize > 0u)
+   while (blockSize > 0U)
   {
      /* Copy one sample at a time into state buffer */
      *pStateCurnt++ = *pSrc++;
@ -69,9 +69,9 @@ void ref_fir_q31(

   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
   /* pStateCurnt points to the location where the new input data should be written */
-   pStateCurnt = &(S->pState[(numTaps - 1u)]);
+   pStateCurnt = &(S->pState[(numTaps - 1U)]);

-   while (blockSize > 0u)
+   while (blockSize > 0U)
   {
      /* Copy one sample at a time into state buffer */
      *pStateCurnt++ = *pSrc++;
@ -123,9 +123,9 @@ void ref_fir_fast_q31(

   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
   /* pStateCurnt points to the location where the new input data should be written */
-   pStateCurnt = &(S->pState[(numTaps - 1u)]);
+   pStateCurnt = &(S->pState[(numTaps - 1U)]);

-   while (blockSize > 0u)
+   while (blockSize > 0U)
   {
      /* Copy one sample at a time into state buffer */
      *pStateCurnt++ = *pSrc++;
@ -177,9 +177,9 @@ void ref_fir_q15(

   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
   /* pStateCurnt points to the location where the new input data should be written */
-   pStateCurnt = &(S->pState[(numTaps - 1u)]);
+   pStateCurnt = &(S->pState[(numTaps - 1U)]);

-   while (blockSize > 0u)
+   while (blockSize > 0U)
   {
      /* Copy one sample at a time into state buffer */
      *pStateCurnt++ = *pSrc++;
@ -231,9 +231,9 @@ void ref_fir_fast_q15(

   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
   /* pStateCurnt points to the location where the new input data should be written */
-   pStateCurnt = &(S->pState[(numTaps - 1u)]);
+   pStateCurnt = &(S->pState[(numTaps - 1U)]);

-   while (blockSize > 0u)
+   while (blockSize > 0U)
   {
      /* Copy one sample at a time into state buffer */
      *pStateCurnt++ = *pSrc++;
@ -285,9 +285,9 @@ void ref_fir_q7(

   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
   /* pStateCurnt points to the location where the new input data should be written */
-   pStateCurnt = &(S->pState[(numTaps - 1u)]);
+   pStateCurnt = &(S->pState[(numTaps - 1U)]);

-   while (blockSize > 0u)
+   while (blockSize > 0U)
   {
      /* Copy one sample at a time into state buffer */
      *pStateCurnt++ = *pSrc++;
--- a/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_decimate.c
+++ b/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_decimate.c
@ -16,12 +16,12 @@ void ref_fir_decimate_f32(

  /* S->pState buffer contains previous frame (numTaps - 1) samples */
  /* pStateCurnt points to the location where the new input data should be written */
-  pStateCurnt = S->pState + numTaps - 1u;
+  pStateCurnt = S->pState + numTaps - 1U;

  /* Total number of output samples to be computed */
  blkCnt = blockSize / S->M;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Copy decimation factor number of new input samples into the state buffer */
    i = S->M;
@ -64,10 +64,10 @@ void ref_fir_decimate_f32(
  pStateCurnt = S->pState;

  /* Copy numTaps number of values */
-  i = numTaps - 1u;
+  i = numTaps - 1U;

  /* copy data */
-  while (i > 0u)
+  while (i > 0U)
  {
    *pStateCurnt++ = *pState++;

@ -92,12 +92,12 @@ void ref_fir_decimate_q31(

  /* S->pState buffer contains previous frame (numTaps - 1) samples */
  /* pStateCurnt points to the location where the new input data should be written */
-  pStateCurnt = S->pState + numTaps - 1u;
+  pStateCurnt = S->pState + numTaps - 1U;

  /* Total number of output samples to be computed */
  blkCnt = blockSize / S->M;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Copy decimation factor number of new input samples into the state buffer */
    i = S->M;
@ -141,10 +141,10 @@ void ref_fir_decimate_q31(
  /* Points to the start of the state buffer */
  pStateCurnt = S->pState;

-  i = numTaps - 1u;
+  i = numTaps - 1U;

  /* copy data */
-  while (i > 0u)
+  while (i > 0U)
  {
    *pStateCurnt++ = *pState++;

@ -169,12 +169,12 @@ void ref_fir_decimate_fast_q31(

  /* S->pState buffer contains previous frame (numTaps - 1) samples */
  /* pStateCurnt points to the location where the new input data should be written */
-  pStateCurnt = S->pState + numTaps - 1u;
+  pStateCurnt = S->pState + numTaps - 1U;

  /* Total number of output samples to be computed */
  blkCnt = blockSize / S->M;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Copy decimation factor number of new input samples into the state buffer */
    i = S->M;
@ -218,10 +218,10 @@ void ref_fir_decimate_fast_q31(
  /* Points to the start of the state buffer */
  pStateCurnt = S->pState;

-  i = numTaps - 1u;
+  i = numTaps - 1U;

  /* copy data */
-  while (i > 0u)
+  while (i > 0U)
  {
    *pStateCurnt++ = *pState++;

@ -246,12 +246,12 @@ void ref_fir_decimate_q15(

  /* S->pState buffer contains previous frame (numTaps - 1) samples */
  /* pStateCurnt points to the location where the new input data should be written */
-  pStateCurnt = S->pState + numTaps - 1u;
+  pStateCurnt = S->pState + numTaps - 1U;

  /* Total number of output samples to be computed */
  blkCnt = blockSize / S->M;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Copy decimation factor number of new input samples into the state buffer */
    i = S->M;
@ -295,10 +295,10 @@ void ref_fir_decimate_q15(
  /* Points to the start of the state buffer */
  pStateCurnt = S->pState;

-  i = numTaps - 1u;
+  i = numTaps - 1U;

  /* copy data */
-  while (i > 0u)
+  while (i > 0U)
  {
    *pStateCurnt++ = *pState++;

@ -323,12 +323,12 @@ void ref_fir_decimate_fast_q15(

  /* S->pState buffer contains previous frame (numTaps - 1) samples */
  /* pStateCurnt points to the location where the new input data should be written */
-  pStateCurnt = S->pState + numTaps - 1u;
+  pStateCurnt = S->pState + numTaps - 1U;

  /* Total number of output samples to be computed */
  blkCnt = blockSize / S->M;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Copy decimation factor number of new input samples into the state buffer */
    i = S->M;
@ -372,10 +372,10 @@ void ref_fir_decimate_fast_q15(
  /* Points to the start of the state buffer */
  pStateCurnt = S->pState;

-  i = numTaps - 1u;
+  i = numTaps - 1U;

  /* copy data */
-  while (i > 0u)
+  while (i > 0U)
  {
    *pStateCurnt++ = *pState++;

--- a/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_interpolate.c
+++ b/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_interpolate.c
@ -23,7 +23,7 @@ void ref_fir_interpolate_f32(
  blkCnt = blockSize;

  /* Loop over the blockSize. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Copy new input sample into the state buffer */
    *pStateCurnt++ = *pSrc++;
@ -31,7 +31,7 @@ void ref_fir_interpolate_f32(
    /* Loop over the Interpolation factor. */
    i = S->L;

-    while (i > 0u)
+    while (i > 0U)
    {
      /* Set accumulator to zero */
      sum = 0.0f;
@ -45,7 +45,7 @@ void ref_fir_interpolate_f32(
      /* Loop over the polyPhase length */
      tapCnt = phaseLen;

-      while (tapCnt > 0u)
+      while (tapCnt > 0U)
      {
        /* Perform the multiply-accumulate */
        sum += *ptr1++ * *ptr2;
@ -79,9 +79,9 @@ void ref_fir_interpolate_f32(
  /* Points to the start of the state buffer */
  pStateCurnt = S->pState;

-  tapCnt = phaseLen - 1u;
+  tapCnt = phaseLen - 1U;

-  while (tapCnt > 0u)
+  while (tapCnt > 0U)
  {
    *pStateCurnt++ = *pState++;

@ -118,7 +118,7 @@ void ref_fir_interpolate_q31(
  blkCnt = blockSize;

  /* Loop over the blockSize. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Copy new input sample into the state buffer */
    *pStateCurnt++ = *pSrc++;
@ -126,7 +126,7 @@ void ref_fir_interpolate_q31(
    /* Loop over the Interpolation factor. */
    i = S->L;

-    while (i > 0u)
+    while (i > 0U)
    {
      /* Set accumulator to zero */
      sum = 0;
@ -139,7 +139,7 @@ void ref_fir_interpolate_q31(

      tapCnt = phaseLen;

-      while (tapCnt > 0u)
+      while (tapCnt > 0U)
      {
        /* Read the coefficient */
        c0 = *(ptr2);
@ -179,10 +179,10 @@ void ref_fir_interpolate_q31(
  /* Points to the start of the state buffer */
  pStateCurnt = S->pState;

-  tapCnt = phaseLen - 1u;
+  tapCnt = phaseLen - 1U;

  /* copy data */
-  while (tapCnt > 0u)
+  while (tapCnt > 0U)
  {
    *pStateCurnt++ = *pState++;

@ -216,7 +216,7 @@ void ref_fir_interpolate_q15(
  blkCnt = blockSize;

  /* Loop over the blockSize. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Copy new input sample into the state buffer */
    *pStateCurnt++ = *pSrc++;
@ -224,7 +224,7 @@ void ref_fir_interpolate_q15(
    /* Loop over the Interpolation factor. */
    i = S->L;

-    while (i > 0u)
+    while (i > 0U)
    {
      /* Set accumulator to zero */
      sum = 0;
@ -238,7 +238,7 @@ void ref_fir_interpolate_q15(
      /* Loop over the polyPhase length */
      tapCnt = (uint32_t)phaseLen;

-      while (tapCnt > 0u)
+      while (tapCnt > 0U)
      {
        /* Read the coefficient */
        c0 = *ptr2;
@ -278,9 +278,9 @@ void ref_fir_interpolate_q15(
  /* Points to the start of the state buffer */
  pStateCurnt = S->pState;

-  i = (uint32_t) phaseLen - 1u;
+  i = (uint32_t) phaseLen - 1U;

-  while (i > 0u)
+  while (i > 0U)
  {
    *pStateCurnt++ = *pState++;

--- a/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_lattice.c
+++ b/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_lattice.c
@ -18,7 +18,7 @@ void ref_fir_lattice_f32(

  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* f0(n) = x(n) */
    fcurr = *pSrc++;
@ -45,10 +45,10 @@ void ref_fir_lattice_f32(
       for next stage processing */
    fcurr = fnext;

-    stageCnt = (numStages - 1u);
+    stageCnt = (numStages - 1U);

    /* stage loop */
-    while (stageCnt > 0u)
+    while (stageCnt > 0U)
    {
      /* read g2(n) from state buffer */
      gcurr = *px;
@ -94,7 +94,7 @@ void ref_fir_lattice_q31(

  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* f0(n) = x(n) */
    fcurr = *pSrc++;
@ -120,10 +120,10 @@ void ref_fir_lattice_q31(
       for next stage processing */
    fcurr = fnext;

-    stageCnt = (numStages - 1u);
+    stageCnt = (numStages - 1U);

    /* stage loop */
-    while (stageCnt > 0u)
+    while (stageCnt > 0U)
    {
      /* read g2(n) from state buffer */
      gcurr = *px;
@ -171,7 +171,7 @@ void ref_fir_lattice_q15(

  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* f0(n) = x(n) */
    fcurnt = *pSrc++;
@ -187,12 +187,12 @@ void ref_fir_lattice_q15(

    /* for sample 1 processing */
    /* f1(n) = f0(n) +  K1 * g0(n-1) */
-    fnext = ((gcurnt * (*pk)) >> 15u) + fcurnt;
+    fnext = ((gcurnt * (*pk)) >> 15U) + fcurnt;
    fnext = ref_sat_q15(fnext);


    /* g1(n) = f0(n) * K1  +  g0(n-1) */
-    gnext = ((fcurnt * (*pk++)) >> 15u) + gcurnt;
+    gnext = ((fcurnt * (*pk++)) >> 15U) + gcurnt;
    gnext = ref_sat_q15(gnext);

    /* save f0(n) in state buffer */
@ -202,10 +202,10 @@ void ref_fir_lattice_q15(
       for next stage processing */
    fcurnt = fnext;

-    stageCnt = (numStages - 1u);
+    stageCnt = (numStages - 1U);

    /* stage loop */
-    while (stageCnt > 0u)
+    while (stageCnt > 0U)
    {
      /* read g1(n-1) from state buffer */
      gcurnt = *px;
@ -215,11 +215,11 @@ void ref_fir_lattice_q15(

      /* Sample processing for K2, K3.... */
      /* f2(n) = f1(n) +  K2 * g1(n-1) */
-      fnext = ((gcurnt * (*pk)) >> 15u) + fcurnt;
+      fnext = ((gcurnt * (*pk)) >> 15U) + fcurnt;
      fnext = ref_sat_q15(fnext);

      /* g2(n) = f1(n) * K2  +  g1(n-1) */
-      gnext = ((fcurnt * (*pk++)) >> 15u) + gcurnt;
+      gnext = ((fcurnt * (*pk++)) >> 15U) + gcurnt;
      gnext = ref_sat_q15(gnext);


--- a/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_sparse.c
+++ b/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_sparse.c
@ -52,7 +52,7 @@ void ref_fir_sparse_f32(

  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Perform Multiplications and store in destination buffer */
    *pOut++ = *px++ * coeff;
@ -62,9 +62,9 @@ void ref_fir_sparse_f32(
  }

  /* Loop over the number of taps. */
-  tapCnt = (uint32_t) numTaps - 1u;
+  tapCnt = (uint32_t) numTaps - 1U;

-  while (tapCnt > 0u)
+  while (tapCnt > 0U)
  {
    /* Load the coefficient value and
     * increment the coefficient buffer for the next set of state values */
@ -95,7 +95,7 @@ void ref_fir_sparse_f32(

    blkCnt = blockSize;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* Perform Multiply-Accumulate */
      *pOut++ += *px++ * coeff;
@ -162,7 +162,7 @@ void ref_fir_sparse_q31(
  
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Perform Multiplications and store in the destination buffer */
    *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32);
@ -172,9 +172,9 @@ void ref_fir_sparse_q31(
  }

  /* Loop over the number of taps. */
-  tapCnt = (uint32_t) numTaps - 1u;
+  tapCnt = (uint32_t) numTaps - 1U;

-  while (tapCnt > 0u)
+  while (tapCnt > 0U)
  {
    /* Load the coefficient value and           
     * increment the coefficient buffer for the next set of state values */
@ -205,7 +205,7 @@ void ref_fir_sparse_q31(

    blkCnt = blockSize;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* Perform Multiply-Accumulate */
      out = *pOut;
@ -226,7 +226,7 @@ void ref_fir_sparse_q31(
  /* Output is converted into 1.31 format. */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    in = *pOut << 1;
    *pOut++ = in;
@ -290,7 +290,7 @@ void ref_fir_sparse_q15(

  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Perform multiplication and store in the scratch buffer */
    *pScratchOut++ = ((q31_t) * px++ * coeff);
@ -300,9 +300,9 @@ void ref_fir_sparse_q15(
  }

  /* Loop over the number of taps. */
-  tapCnt = (uint32_t) numTaps - 1u;
+  tapCnt = (uint32_t) numTaps - 1U;

-  while (tapCnt > 0u)
+  while (tapCnt > 0U)
  {
    /* Load the coefficient value and           
     * increment the coefficient buffer for the next set of state values */
@ -332,7 +332,7 @@ void ref_fir_sparse_q15(

    blkCnt = blockSize;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* Perform Multiply-Accumulate */
      *pScratchOut++ += (q31_t) * px++ * coeff;
@ -350,7 +350,7 @@ void ref_fir_sparse_q15(
  /* Loop over the blockSize. */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    *pOut++ = (q15_t) __SSAT(*pScr2++ >> 15, 16);
    blkCnt--;
@ -413,7 +413,7 @@ void ref_fir_sparse_q7(
  /* Loop over the blockSize */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Perform multiplication and store in the scratch buffer */
    *pScratchOut++ = ((q31_t) * px++ * coeff);
@ -423,9 +423,9 @@ void ref_fir_sparse_q7(
  }

  /* Loop over the number of taps. */
-  tapCnt = (uint32_t) numTaps - 1u;
+  tapCnt = (uint32_t) numTaps - 1U;

-  while (tapCnt > 0u)
+  while (tapCnt > 0U)
  {
    /* Load the coefficient value and           
     * increment the coefficient buffer for the next set of state values */
@ -456,7 +456,7 @@ void ref_fir_sparse_q7(
    /* Loop over the blockSize */
    blkCnt = blockSize;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* Perform Multiply-Accumulate */
      in = *pScratchOut + ((q31_t) * px++ * coeff);
@ -475,7 +475,7 @@ void ref_fir_sparse_q7(
  /* Loop over the blockSize. */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    *pOut++ = (q7_t) __SSAT(*pScr2++ >> 7, 8);

--- a/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/iir_lattice.c
+++ b/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/iir_lattice.c
@ -18,7 +18,7 @@ void ref_iir_lattice_f32(
  pState = &S->pState[0];

  /* Sample processing */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Read Sample from input buffer */
    /* fN(n) = x(n) */
@ -38,7 +38,7 @@ void ref_iir_lattice_f32(
    /* Process sample for numStages */
    tapCnt = numStages;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {
      gcurr = *px1++;
      /* Process sample for last taps */
@ -63,7 +63,7 @@ void ref_iir_lattice_f32(
    *pDst++ = acc;

    /* Advance the state pointer by 1 to process the next group of samples */
-    pState = pState + 1u;
+    pState = pState + 1U;
    blkCnt--;
  }

@ -77,7 +77,7 @@ void ref_iir_lattice_f32(
  tapCnt = numStages;

  /* Copy the data */
-  while (tapCnt > 0u)
+  while (tapCnt > 0U)
  {
    *pStateCurnt++ = *pState++;

@ -104,7 +104,7 @@ void ref_iir_lattice_q31(
  pState = &S->pState[0];

  /* Sample processing */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Read Sample from input buffer */
    /* fN(n) = x(n) */
@ -123,7 +123,7 @@ void ref_iir_lattice_q31(

    tapCnt = numStages;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {
      gcurr = *px1++;
      /* Process sample */
@ -152,10 +152,10 @@ void ref_iir_lattice_q31(
    *px2++ = fnext;

    /* write out into pDst */
-    *pDst++ = (q31_t) (acc >> 31u);
+    *pDst++ = (q31_t) (acc >> 31U);

    /* Advance the state pointer by 1 to process the next group of samples */
-    pState = pState + 1u;
+    pState = pState + 1U;
    blkCnt--;
  }

@ -169,7 +169,7 @@ void ref_iir_lattice_q31(
  tapCnt = numStages;

  /* Copy the remaining q31_t data */
-  while (tapCnt > 0u)
+  while (tapCnt > 0U)
  {
    *pStateCurnt++ = *pState++;

@ -198,7 +198,7 @@ void ref_iir_lattice_q15(
  pState = &S->pState[0];

  /* Sample processing */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Read Sample from input buffer */
    /* fN(n) = x(n) */
@ -217,7 +217,7 @@ void ref_iir_lattice_q15(

    tapCnt = numStages;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {
      gcurr = *px1++;
      /* Process sample */
@ -248,7 +248,7 @@ void ref_iir_lattice_q15(
    *pDst++ = out;

    /* Advance the state pointer by 1 to process the next group of samples */
-    pState = pState + 1u;
+    pState = pState + 1U;
    blkCnt--;
  }

@ -261,7 +261,7 @@ void ref_iir_lattice_q15(
  stgCnt = numStages;

  /* copy data */
-  while (stgCnt > 0u)
+  while (stgCnt > 0U)
  {
    *pStateCurnt++ = *pState++;

--- a/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/lms.c
+++ b/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/lms.c
@ -22,11 +22,11 @@ void ref_lms_f32(

  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
  /* pStateCurnt points to the location where the new input data should be written */
-  pStateCurnt = &(S->pState[numTaps - 1u]);
+  pStateCurnt = &(S->pState[numTaps - 1U]);

  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Copy the new input sample into the state buffer */
    *pStateCurnt++ = *pSrc++;
@ -99,9 +99,9 @@ void ref_lms_norm_f32(

  /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
  /* pStateCurnt points to the location where the new input data should be written */
-  pStateCurnt = &(S->pState[numTaps - 1u]);
+  pStateCurnt = &(S->pState[numTaps - 1U]);

-  for(blkCnt = blockSize; blkCnt > 0u; blkCnt--)
+  for(blkCnt = blockSize; blkCnt > 0U; blkCnt--)
  {
    /* Copy the new input sample into the state buffer */
    *pStateCurnt++ = *pSrc;
@ -179,13 +179,13 @@ void ref_lms_q31(
  q31_t coef;                                    /* Temporary variable for coef */
  q31_t acc_l, acc_h;                            /*  temporary input */
  uint32_t uShift = (uint32_t)S->postShift + 1;
-  uint32_t lShift = 32u - uShift;                /*  Shift to be applied to the output */
+  uint32_t lShift = 32U - uShift;                /*  Shift to be applied to the output */

  /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
  /* pStateCurnt points to the location where the new input data should be written */
-  pStateCurnt = &(S->pState[(numTaps - 1u)]);
+  pStateCurnt = &(S->pState[(numTaps - 1U)]);

-  for(blkCnt = blockSize; blkCnt > 0u; blkCnt--)
+  for(blkCnt = blockSize; blkCnt > 0U; blkCnt--)
  {
    /* Copy the new input sample into the state buffer */
    *pStateCurnt++ = *pSrc++;
@ -202,7 +202,7 @@ void ref_lms_q31(
    /* Loop over numTaps number of values */
    tapCnt = numTaps;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {
      /* Perform the multiply-accumulate */
      acc += (q63_t)(*px++) * (*pb++);
@ -241,7 +241,7 @@ void ref_lms_q31(
    /* Loop over numTaps number of values */
    tapCnt = numTaps;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {
      /* Perform the multiply-accumulate */
      coef = (q31_t)(((q63_t) alpha * (*px++)) >> 32);
@ -260,11 +260,11 @@ void ref_lms_q31(
  /* Points to the start of the pState buffer */
  pStateCurnt = S->pState;

-  /*  Copy (numTaps - 1u) samples  */
+  /*  Copy (numTaps - 1U) samples  */
  tapCnt = numTaps - 1;

  /* Copy the data */
-  while (tapCnt > 0u)
+  while (tapCnt > 0U)
  {
    *pStateCurnt++ = *pState++;

@ -296,17 +296,17 @@ void ref_lms_norm_q31(
  q63_t errorXmu;                   				 /* Temporary variables to store error and mu product and reciprocal of energy */
  q31_t coef;                                    /* Temporary variable for coef */
  q31_t acc_l, acc_h;                            /*  temporary input */
-  uint32_t uShift = ((uint32_t) S->postShift + 1u);
-  uint32_t lShift = 32u - uShift;                /*  Shift to be applied to the output */
+  uint32_t uShift = ((uint32_t) S->postShift + 1U);
+  uint32_t lShift = 32U - uShift;                /*  Shift to be applied to the output */

  energy = S->energy;
  x0 = S->x0;

  /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
  /* pStateCurnt points to the location where the new input data should be written */
-  pStateCurnt = &(S->pState[(numTaps - 1u)]);
+  pStateCurnt = &(S->pState[(numTaps - 1U)]);

-  for(blkCnt = blockSize; blkCnt > 0u; blkCnt--)
+  for(blkCnt = blockSize; blkCnt > 0U; blkCnt--)
  {

    /* Copy the new input sample into the state buffer */
@ -331,7 +331,7 @@ void ref_lms_norm_q31(
    /* Loop over numTaps number of values */
    tapCnt = numTaps;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {
      /* Perform the multiply-accumulate */
      acc += ((q63_t) (*px++)) * (*pb++);
@ -372,13 +372,13 @@ void ref_lms_norm_q31(
    /* Loop over numTaps number of values */
    tapCnt = numTaps;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {
      /* Perform the multiply-accumulate */
      /* coef is in 2.30 format */
      coef = (q31_t)(((q63_t)w * (*px++)) >> 32);
      /* get coef in 1.31 format by left shifting */
-      *pb = ref_sat_q31((q63_t)*pb + (coef << 1u));
+      *pb = ref_sat_q31((q63_t)*pb + (coef << 1U));
      /* update coefficient buffer to next coefficient */
      pb++;

@ -404,11 +404,11 @@ void ref_lms_norm_q31(
  /* Points to the start of the pState buffer */
  pStateCurnt = S->pState;

-  /* Loop for (numTaps - 1u) samples copy */
+  /* Loop for (numTaps - 1U) samples copy */
  tapCnt = numTaps - 1;

  /* Copy the remaining q31_t data */
-  while (tapCnt > 0u)
+  while (tapCnt > 0U)
  {
    *pStateCurnt++ = *pState++;

@ -443,9 +443,9 @@ void ref_lms_q15(

  /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
  /* pStateCurnt points to the location where the new input data should be written */
-  pStateCurnt = &(S->pState[(numTaps - 1u)]);
+  pStateCurnt = &(S->pState[(numTaps - 1U)]);

-  for(blkCnt = blockSize; blkCnt > 0u; blkCnt--)
+  for(blkCnt = blockSize; blkCnt > 0U; blkCnt--)
  {
    /* Copy the new input sample into the state buffer */
    *pStateCurnt++ = *pSrc++;
@ -462,7 +462,7 @@ void ref_lms_q15(
    /* Loop over numTaps number of values */
    tapCnt = numTaps;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {
      /* Perform the multiply-accumulate */
      acc += (q63_t)((q31_t)(*px++) * (*pb++));
@ -504,7 +504,7 @@ void ref_lms_q15(
    /* Loop over numTaps number of values */
    tapCnt = numTaps;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {
      /* Perform the multiply-accumulate */
      coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
@ -522,11 +522,11 @@ void ref_lms_q15(
  /* Points to the start of the pState buffer */
  pStateCurnt = S->pState;

-  /*  Copy (numTaps - 1u) samples  */
+  /*  Copy (numTaps - 1U) samples  */
  tapCnt = numTaps - 1;

  /* Copy the data */
-  while (tapCnt > 0u)
+  while (tapCnt > 0U)
  {
    *pStateCurnt++ = *pState++;

@ -568,9 +568,9 @@ void ref_lms_norm_q15(

  /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
  /* pStateCurnt points to the location where the new input data should be written */
-  pStateCurnt = &(S->pState[(numTaps - 1u)]);
+  pStateCurnt = &(S->pState[(numTaps - 1U)]);

-  for(blkCnt = blockSize; blkCnt > 0u; blkCnt--)
+  for(blkCnt = blockSize; blkCnt > 0U; blkCnt--)
  {
    /* Copy the new input sample into the state buffer */
    *pStateCurnt++ = *pSrc;
@ -594,7 +594,7 @@ void ref_lms_norm_q15(
    /* Loop over numTaps number of values */
    tapCnt = numTaps;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {
      /* Perform the multiply-accumulate */
      acc += (q31_t)*px++ * (*pb++);
@ -653,7 +653,7 @@ void ref_lms_norm_q15(
    /* Loop over numTaps number of values */
    tapCnt = numTaps;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {
      /* Perform the multiply-accumulate */
      coef = *pb + (((q31_t)w * (*px++)) >> 15);
@ -667,7 +667,7 @@ void ref_lms_norm_q15(
    x0 = *pState;

    /* Advance state pointer by 1 for the next sample */
-    pState = pState + 1u;
+    pState = pState + 1U;
  }

  /* Save energy and x0 values for the next frame */
@ -681,11 +681,11 @@ void ref_lms_norm_q15(
  /* Points to the start of the pState buffer */
  pStateCurnt = S->pState;

-  /* copy (numTaps - 1u) data */
+  /* copy (numTaps - 1U) data */
  tapCnt = numTaps - 1;

  /* copy data */
-  while (tapCnt > 0u)
+  while (tapCnt > 0U)
  {
    *pStateCurnt++ = *pState++;

--- a/Include/arm_math.h
+++ b/Include/arm_math.h
@ -590,8 +590,8 @@ extern "C"

    while ((data & mask) == 0)
    {
-      count += 1u;
-      mask = mask >> 1u;
+      count += 1U;
+      mask = mask >> 1U;
    }

    return (count);
@ -633,7 +633,7 @@ extern "C"

    /* calculation of reciprocal value */
    /* running approximation for two iterations */
-    for (i = 0u; i < 2u; i++)
+    for (i = 0U; i < 2U; i++)
    {
      tempVal = (uint32_t) (((q63_t) in * out) >> 31);
      tempVal = 0x7FFFFFFFu - tempVal;
@ -646,7 +646,7 @@ extern "C"
    *dst = out;

    /* return num of signbits of out = 1/in value */
-    return (signBits + 1u);
+    return (signBits + 1U);
  }


@ -684,7 +684,7 @@ extern "C"

    /* calculation of reciprocal value */
    /* running approximation for two iterations */
-    for (i = 0u; i < 2u; i++)
+    for (i = 0U; i < 2U; i++)
    {
      tempVal = (uint32_t) (((q31_t) in * out) >> 15);
      tempVal = 0x7FFFu - tempVal;
@ -4924,7 +4924,7 @@ void arm_rfft_fast_f32(
    acc += (q63_t) S->A2 * S->state[1];

    /* convert output to 1.31 format to add y[n-1] */
-    out = (q31_t) (acc >> 31u);
+    out = (q31_t) (acc >> 31U);

    /* out += y[n-1] */
    out += S->state[2];
@ -5608,7 +5608,7 @@ void arm_rfft_fast_f32(
      y += ((q31_t) (((q63_t) y1 * fract) >> 32));

      /* Convert y to 1.31 format */
-      return (y << 1u);
+      return (y << 1U);
    }
  }

@ -5892,7 +5892,7 @@ void arm_rfft_fast_f32(
  int32_t srcInc,
  uint32_t blockSize)
  {
-    uint32_t i = 0u;
+    uint32_t i = 0U;
    int32_t wOffset;

    /* Copy the value of Index pointer that points
@ -5902,7 +5902,7 @@ void arm_rfft_fast_f32(
    /* Loop over the blockSize */
    i = blockSize;

-    while (i > 0u)
+    while (i > 0U)
    {
      /* copy the input sample to the circular buffer */
      circBuffer[wOffset] = *src;
@ -5939,7 +5939,7 @@ void arm_rfft_fast_f32(
  int32_t dstInc,
  uint32_t blockSize)
  {
-    uint32_t i = 0u;
+    uint32_t i = 0U;
    int32_t rOffset, dst_end;

    /* Copy the value of Index pointer that points
@ -5950,7 +5950,7 @@ void arm_rfft_fast_f32(
    /* Loop over the blockSize */
    i = blockSize;

-    while (i > 0u)
+    while (i > 0U)
    {
      /* copy the sample from the circular buffer to the destination buffer */
      *dst = circBuffer[rOffset];
@ -5992,7 +5992,7 @@ void arm_rfft_fast_f32(
  int32_t srcInc,
  uint32_t blockSize)
  {
-    uint32_t i = 0u;
+    uint32_t i = 0U;
    int32_t wOffset;

    /* Copy the value of Index pointer that points
@ -6002,7 +6002,7 @@ void arm_rfft_fast_f32(
    /* Loop over the blockSize */
    i = blockSize;

-    while (i > 0u)
+    while (i > 0U)
    {
      /* copy the input sample to the circular buffer */
      circBuffer[wOffset] = *src;
@ -6050,7 +6050,7 @@ void arm_rfft_fast_f32(
    /* Loop over the blockSize */
    i = blockSize;

-    while (i > 0u)
+    while (i > 0U)
    {
      /* copy the sample from the circular buffer to the destination buffer */
      *dst = circBuffer[rOffset];
@ -6092,7 +6092,7 @@ void arm_rfft_fast_f32(
  int32_t srcInc,
  uint32_t blockSize)
  {
-    uint32_t i = 0u;
+    uint32_t i = 0U;
    int32_t wOffset;

    /* Copy the value of Index pointer that points
@ -6102,7 +6102,7 @@ void arm_rfft_fast_f32(
    /* Loop over the blockSize */
    i = blockSize;

-    while (i > 0u)
+    while (i > 0U)
    {
      /* copy the input sample to the circular buffer */
      circBuffer[wOffset] = *src;
@ -6150,7 +6150,7 @@ void arm_rfft_fast_f32(
    /* Loop over the blockSize */
    i = blockSize;

-    while (i > 0u)
+    while (i > 0U)
    {
      /* copy the sample from the circular buffer to the destination buffer */
      *dst = circBuffer[rOffset];
@ -6928,7 +6928,7 @@ void arm_rfft_fast_f32(

    /* 20 bits for the fractional part */
    /* shift left xfract by 11 to keep 1.31 format */
-    xfract = (X & 0x000FFFFF) << 11u;
+    xfract = (X & 0x000FFFFF) << 11U;

    /* Read two nearest output values from the index */
    x1 = pYData[(rI) + (int32_t)nCols * (cI)    ];
@ -6936,7 +6936,7 @@ void arm_rfft_fast_f32(

    /* 20 bits for the fractional part */
    /* shift left yfract by 11 to keep 1.31 format */
-    yfract = (Y & 0x000FFFFF) << 11u;
+    yfract = (Y & 0x000FFFFF) << 11U;

    /* Read two nearest output values from the index */
    y1 = pYData[(rI) + (int32_t)nCols * (cI + 1)    ];
@ -7020,19 +7020,19 @@ void arm_rfft_fast_f32(

    /* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */
    /* convert 13.35 to 13.31 by right shifting  and out is in 1.31 */
-    out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4u);
+    out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4U);
    acc = ((q63_t) out * (0xFFFFF - yfract));

    /* x2 * (xfract) * (1-yfract)  in 1.51 and adding to acc */
-    out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4u);
+    out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4U);
    acc += ((q63_t) out * (xfract));

    /* y1 * (1 - xfract) * (yfract)  in 1.51 and adding to acc */
-    out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4u);
+    out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4U);
    acc += ((q63_t) out * (yfract));

    /* y2 * (xfract) * (yfract)  in 1.51 and adding to acc */
-    out = (q31_t) (((q63_t) y2 * (xfract)) >> 4u);
+    out = (q31_t) (((q63_t) y2 * (xfract)) >> 4U);
    acc += ((q63_t) out * (yfract));

    /* acc is in 13.51 format and down shift acc by 36 times */
--- a/Source/BasicMathFunctions/arm_abs_f32.c
+++ b/Source/BasicMathFunctions/arm_abs_f32.c
@ -73,11 +73,11 @@ void arm_abs_f32(
  float32_t in1, in2, in3, in4;                  /* temporary variables */

  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = |A| */
    /* Calculate absolute and then store the results in the destination buffer. */
@ -115,10 +115,10 @@ void arm_abs_f32(


    /* Update source pointer to process next sampels */
-    pSrc += 4u;
+    pSrc += 4U;

    /* Update destination pointer to process next sampels */
-    pDst += 4u;
+    pDst += 4U;

    /* Decrement the loop counter */
    blkCnt--;
@ -126,7 +126,7 @@ void arm_abs_f32(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

 #else

@ -137,7 +137,7 @@ void arm_abs_f32(

 #endif /*   #if defined (ARM_MATH_DSP)   */

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = |A| */
    /* Calculate absolute and then store the results in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_abs_q15.c
+++ b/Source/BasicMathFunctions/arm_abs_q15.c
@ -67,12 +67,12 @@ void arm_abs_q15(


  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
  simd = __SIMD32_CONST(pDst);
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = |A| */
    /* Read two inputs */
@ -121,9 +121,9 @@ void arm_abs_q15(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = |A| */
    /* Read the input */
@ -145,7 +145,7 @@ void arm_abs_q15(
  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = |A| */
    /* Read the input */
--- a/Source/BasicMathFunctions/arm_abs_q31.c
+++ b/Source/BasicMathFunctions/arm_abs_q31.c
@ -65,11 +65,11 @@ void arm_abs_q31(
  q31_t in1, in2, in3, in4;

  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = |A| */
    /* Calculate absolute of input (if -1 then saturated to 0x7fffffff) and then store the results in the destination buffer. */
@ -89,7 +89,7 @@ void arm_abs_q31(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

 #else

@ -100,7 +100,7 @@ void arm_abs_q31(

 #endif /*   #if defined (ARM_MATH_DSP)   */

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = |A| */
    /* Calculate absolute value of the input (if -1 then saturated to 0x7fffffff) and then store the results in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_abs_q7.c
+++ b/Source/BasicMathFunctions/arm_abs_q7.c
@ -69,11 +69,11 @@ void arm_abs_q7(
  q31_t out1, out2, out3, out4;                  /* temporary output variables */

  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = |A| */
    /* Read inputs */
@ -109,8 +109,8 @@ void arm_abs_q7(
    *(pDst + 3) = (q7_t) out4;

    /* update pointers to process next samples */
-    pSrc += 4u;
-    pDst += 4u;
+    pSrc += 4U;
+    pDst += 4U;

    /* Decrement the loop counter */
    blkCnt--;
@ -118,7 +118,7 @@ void arm_abs_q7(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;
 #else

  /* Run the below code for Cortex-M0 */
@ -126,7 +126,7 @@ void arm_abs_q7(

 #endif /* #define ARM_MATH_CM0_FAMILY */

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = |A| */
    /* Read the input */
--- a/Source/BasicMathFunctions/arm_add_f32.c
+++ b/Source/BasicMathFunctions/arm_add_f32.c
@ -73,11 +73,11 @@ void arm_add_f32(
  float32_t inB1, inB2, inB3, inB4;              /* temporary input variables */

  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + B */
    /* Add and then store the results in the destination buffer. */
@ -100,9 +100,9 @@ void arm_add_f32(
    *(pDst + 3) = inA4 + inB4;

    /* update pointers to process next samples */
-    pSrcA += 4u;
-    pSrcB += 4u;
-    pDst += 4u;
+    pSrcA += 4U;
+    pSrcB += 4U;
+    pDst += 4U;


    /* Decrement the loop counter */
@ -111,7 +111,7 @@ void arm_add_f32(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

 #else

@ -122,7 +122,7 @@ void arm_add_f32(

 #endif /* #if defined (ARM_MATH_DSP) */

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + B */
    /* Add and then store the results in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_add_q15.c
+++ b/Source/BasicMathFunctions/arm_add_q15.c
@ -65,11 +65,11 @@ void arm_add_q15(
  q31_t inA1, inA2, inB1, inB2;

  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + B */
    /* Add and then store the results in the destination buffer. */
@ -87,9 +87,9 @@ void arm_add_q15(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + B */
    /* Add and then store the results in the destination buffer. */
@ -108,7 +108,7 @@ void arm_add_q15(
  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + B */
    /* Add and then store the results in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_add_q31.c
+++ b/Source/BasicMathFunctions/arm_add_q31.c
@ -67,11 +67,11 @@ void arm_add_q31(
  q31_t inB1, inB2, inB3, inB4;

  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + B */
    /* Add and then store the results in the destination buffer. */
@ -96,9 +96,9 @@ void arm_add_q31(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + B */
    /* Add and then store the results in the destination buffer. */
@ -117,7 +117,7 @@ void arm_add_q31(
  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + B */
    /* Add and then store the results in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_add_q7.c
+++ b/Source/BasicMathFunctions/arm_add_q7.c
@ -65,11 +65,11 @@ void arm_add_q7(


  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + B */
    /* Add and then store the results in the destination buffer. */
@ -81,9 +81,9 @@ void arm_add_q7(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + B */
    /* Add and then store the results in the destination buffer. */
@ -102,7 +102,7 @@ void arm_add_q7(
  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + B */
    /* Add and then store the results in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_dot_prod_f32.c
+++ b/Source/BasicMathFunctions/arm_dot_prod_f32.c
@ -74,11 +74,11 @@ void arm_dot_prod_f32(

 /* Run the below code for Cortex-M4 and Cortex-M3 */
  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
    /* Calculate dot product and then store the result in a temporary buffer */
@ -93,7 +93,7 @@ void arm_dot_prod_f32(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

 #else

@ -105,7 +105,7 @@ void arm_dot_prod_f32(
 #endif /* #if defined (ARM_MATH_DSP) */


-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
    /* Calculate dot product and then store the result in a temporary buffer. */
--- a/Source/BasicMathFunctions/arm_dot_prod_q15.c
+++ b/Source/BasicMathFunctions/arm_dot_prod_q15.c
@ -69,11 +69,11 @@ void arm_dot_prod_q15(


  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
    /* Calculate dot product and then store the result in a temporary buffer. */
@ -86,9 +86,9 @@ void arm_dot_prod_q15(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
    /* Calculate dot product and then store the results in a temporary buffer. */
@ -106,7 +106,7 @@ void arm_dot_prod_q15(
  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
    /* Calculate dot product and then store the results in a temporary buffer. */
--- a/Source/BasicMathFunctions/arm_dot_prod_q31.c
+++ b/Source/BasicMathFunctions/arm_dot_prod_q31.c
@ -72,11 +72,11 @@ void arm_dot_prod_q31(
  q31_t inB1, inB2, inB3, inB4;

  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
    /* Calculate dot product and then store the result in a temporary buffer. */
@ -89,10 +89,10 @@ void arm_dot_prod_q31(
    inB3 = *pSrcB++;
    inB4 = *pSrcB++;

-    sum += ((q63_t) inA1 * inB1) >> 14u;
-    sum += ((q63_t) inA2 * inB2) >> 14u;
-    sum += ((q63_t) inA3 * inB3) >> 14u;
-    sum += ((q63_t) inA4 * inB4) >> 14u;
+    sum += ((q63_t) inA1 * inB1) >> 14U;
+    sum += ((q63_t) inA2 * inB2) >> 14U;
+    sum += ((q63_t) inA3 * inB3) >> 14U;
+    sum += ((q63_t) inA4 * inB4) >> 14U;

    /* Decrement the loop counter */
    blkCnt--;
@ -100,7 +100,7 @@ void arm_dot_prod_q31(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

 #else

@ -112,11 +112,11 @@ void arm_dot_prod_q31(
 #endif /* #if defined (ARM_MATH_DSP) */


-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
    /* Calculate dot product and then store the result in a temporary buffer. */
-    sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u;
+    sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14U;

    /* Decrement the loop counter */
    blkCnt--;
--- a/Source/BasicMathFunctions/arm_dot_prod_q7.c
+++ b/Source/BasicMathFunctions/arm_dot_prod_q7.c
@ -74,11 +74,11 @@ void arm_dot_prod_q7(


  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* read 4 samples at a time from sourceA */
    input1 = *__SIMD32(pSrcA)++;
@ -104,9 +104,9 @@ void arm_dot_prod_q7(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
    /* Dot product and then store the results in a temporary buffer. */
@ -125,7 +125,7 @@ void arm_dot_prod_q7(
  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
    /* Dot product and then store the results in a temporary buffer. */
--- a/Source/BasicMathFunctions/arm_mult_f32.c
+++ b/Source/BasicMathFunctions/arm_mult_f32.c
@ -73,11 +73,11 @@ void arm_mult_f32(
  float32_t out1, out2, out3, out4;              /* temporary output variables */

  /* loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A * B */
    /* Multiply the inputs and store the results in output buffer */
@ -125,9 +125,9 @@ void arm_mult_f32(


    /* update pointers to process next samples */
-    pSrcA += 4u;
-    pSrcB += 4u;
-    pDst += 4u;
+    pSrcA += 4U;
+    pSrcB += 4U;
+    pDst += 4U;

    /* Decrement the blockSize loop counter */
    blkCnt--;
@ -135,7 +135,7 @@ void arm_mult_f32(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

 #else

@ -146,7 +146,7 @@ void arm_mult_f32(

 #endif /* #if defined (ARM_MATH_DSP) */

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A * B */
    /* Multiply the inputs and store the results in output buffer */
--- a/Source/BasicMathFunctions/arm_mult_q15.c
+++ b/Source/BasicMathFunctions/arm_mult_q15.c
@ -68,11 +68,11 @@ void arm_mult_q15(
  q31_t mul1, mul2, mul3, mul4;                  /* temporary variables */

  /* loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* read two samples at a time from sourceA */
    inA1 = *__SIMD32(pSrcA)++;
@ -114,7 +114,7 @@ void arm_mult_q15(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

 #else

@ -126,7 +126,7 @@ void arm_mult_q15(
 #endif /* #if defined (ARM_MATH_DSP) */


-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A * B */
    /* Multiply the inputs and store the result in the destination buffer */
--- a/Source/BasicMathFunctions/arm_mult_q31.c
+++ b/Source/BasicMathFunctions/arm_mult_q31.c
@ -67,11 +67,11 @@ void arm_mult_q31(
  q31_t out1, out2, out3, out4;                  /* temporary output variables */

  /* loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A * B */
    /* Multiply the inputs and then store the results in the destination buffer. */
@ -94,10 +94,10 @@ void arm_mult_q31(
    out3 = __SSAT(out3, 31);
    out4 = __SSAT(out4, 31);

-    *pDst++ = out1 << 1u;
-    *pDst++ = out2 << 1u;
-    *pDst++ = out3 << 1u;
-    *pDst++ = out4 << 1u;
+    *pDst++ = out1 << 1U;
+    *pDst++ = out2 << 1U;
+    *pDst++ = out3 << 1U;
+    *pDst++ = out4 << 1U;

    /* Decrement the blockSize loop counter */
    blkCnt--;
@ -105,9 +105,9 @@ void arm_mult_q31(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A * B */
    /* Multiply the inputs and then store the results in the destination buffer. */
@ -115,7 +115,7 @@ void arm_mult_q31(
    inB1 = *pSrcB++;
    out1 = ((q63_t) inA1 * inB1) >> 32;
    out1 = __SSAT(out1, 31);
-    *pDst++ = out1 << 1u;
+    *pDst++ = out1 << 1U;

    /* Decrement the blockSize loop counter */
    blkCnt--;
@ -129,7 +129,7 @@ void arm_mult_q31(
  blkCnt = blockSize;


-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A * B */
    /* Multiply the inputs and then store the results in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_mult_q7.c
+++ b/Source/BasicMathFunctions/arm_mult_q7.c
@ -65,11 +65,11 @@ void arm_mult_q7(
  q7_t out1, out2, out3, out4;                   /* Temporary variables to store the product */

  /* loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A * B */
    /* Multiply the inputs and store the results in temporary variables */
@ -87,7 +87,7 @@ void arm_mult_q7(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

 #else

@ -99,7 +99,7 @@ void arm_mult_q7(
 #endif /* #if defined (ARM_MATH_DSP) */


-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A * B */
    /* Multiply the inputs and store the result in the destination buffer */
--- a/Source/BasicMathFunctions/arm_negate_f32.c
+++ b/Source/BasicMathFunctions/arm_negate_f32.c
@ -73,11 +73,11 @@ void arm_negate_f32(
  float32_t in1, in2, in3, in4;                  /* temporary variables */

  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* read inputs from source */
    in1 = *pSrc;
@ -98,8 +98,8 @@ void arm_negate_f32(
    *(pDst + 3) = in4;

    /* update pointers to process next samples */
-    pSrc += 4u;
-    pDst += 4u;
+    pSrc += 4U;
+    pDst += 4U;

    /* Decrement the loop counter */
    blkCnt--;
@ -107,7 +107,7 @@ void arm_negate_f32(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

 #else

@ -118,7 +118,7 @@ void arm_negate_f32(

 #endif /* #if defined (ARM_MATH_DSP) */

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = -A */
    /* Negate and then store the results in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_negate_q15.c
+++ b/Source/BasicMathFunctions/arm_negate_q15.c
@ -70,11 +70,11 @@ void arm_negate_q15(


  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = -A */
    /* Read two inputs at a time */
@ -94,8 +94,8 @@ void arm_negate_q15(


    /* update pointers to process next samples */
-    pSrc += 4u;
-    pDst += 4u;
+    pSrc += 4U;
+    pDst += 4U;

    /* Decrement the loop counter */
    blkCnt--;
@ -103,7 +103,7 @@ void arm_negate_q15(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

 #else

@ -114,7 +114,7 @@ void arm_negate_q15(

 #endif /* #if defined (ARM_MATH_DSP) */

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = -A */
    /* Negate and then store the result in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_negate_q31.c
+++ b/Source/BasicMathFunctions/arm_negate_q31.c
@ -64,11 +64,11 @@ void arm_negate_q31(
  q31_t in1, in2, in3, in4;

  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = -A */
    /* Negate and then store the results in the destination buffer. */
@ -88,7 +88,7 @@ void arm_negate_q31(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

 #else

@ -100,7 +100,7 @@ void arm_negate_q31(
 #endif /* #if defined (ARM_MATH_DSP) */


-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = -A */
    /* Negate and then store the result in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_negate_q7.c
+++ b/Source/BasicMathFunctions/arm_negate_q7.c
@ -66,11 +66,11 @@ void arm_negate_q7(


  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = -A */
    /* Read four inputs */
@ -85,7 +85,7 @@ void arm_negate_q7(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

 #else

@ -96,7 +96,7 @@ void arm_negate_q7(

 #endif /* #if defined (ARM_MATH_DSP) */

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = -A */
    /* Negate and then store the results in the destination buffer. */ \
--- a/Source/BasicMathFunctions/arm_offset_f32.c
+++ b/Source/BasicMathFunctions/arm_offset_f32.c
@ -75,11 +75,11 @@ void arm_offset_f32(
  float32_t in1, in2, in3, in4;

  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + offset */
    /* Add offset and then store the results in the destination buffer. */
@ -118,8 +118,8 @@ void arm_offset_f32(
    *(pDst + 3) = in4;

    /* update pointers to process next samples */
-    pSrc += 4u;
-    pDst += 4u;
+    pSrc += 4U;
+    pDst += 4U;

    /* Decrement the loop counter */
    blkCnt--;
@ -127,7 +127,7 @@ void arm_offset_f32(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

 #else

@ -138,7 +138,7 @@ void arm_offset_f32(

 #endif /* #if defined (ARM_MATH_DSP) */

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + offset */
    /* Add offset and then store the result in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_offset_q15.c
+++ b/Source/BasicMathFunctions/arm_offset_q15.c
@ -66,14 +66,14 @@ void arm_offset_q15(


  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* Offset is packed to 32 bit in order to use SIMD32 for addition */
  offset_packed = __PKHBT(offset, offset, 16);

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + offset */
    /* Add offset and then store the results in the destination buffer, 2 samples at a time. */
@ -86,9 +86,9 @@ void arm_offset_q15(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + offset */
    /* Add offset and then store the results in the destination buffer. */
@ -105,7 +105,7 @@ void arm_offset_q15(
  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + offset */
    /* Add offset and then store the results in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_offset_q31.c
+++ b/Source/BasicMathFunctions/arm_offset_q31.c
@ -66,11 +66,11 @@ void arm_offset_q31(


  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + offset */
    /* Add offset and then store the results in the destination buffer. */
@ -90,9 +90,9 @@ void arm_offset_q31(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + offset */
    /* Add offset and then store the result in the destination buffer. */
@ -109,7 +109,7 @@ void arm_offset_q31(
  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + offset */
    /* Add offset and then store the result in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_offset_q7.c
+++ b/Source/BasicMathFunctions/arm_offset_q7.c
@ -66,14 +66,14 @@ void arm_offset_q7(


  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* Offset is packed to 32 bit in order to use SIMD32 for addition */
  offset_packed = __PACKq7(offset, offset, offset, offset);

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + offset */
    /* Add offset and then store the results in the destination bufferfor 4 samples at a time. */
@ -85,9 +85,9 @@ void arm_offset_q7(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + offset */
    /* Add offset and then store the result in the destination buffer. */
@ -104,7 +104,7 @@ void arm_offset_q7(
  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A + offset */
    /* Add offset and then store the result in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_scale_f32.c
+++ b/Source/BasicMathFunctions/arm_scale_f32.c
@ -87,11 +87,11 @@ void arm_scale_f32(
  float32_t in1, in2, in3, in4;                  /* temporary variabels */

  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A * scale */
    /* Scale the input and then store the results in the destination buffer. */
@ -121,8 +121,8 @@ void arm_scale_f32(
    *(pDst + 3) = in4;

    /* update pointers to process next samples */
-    pSrc += 4u;
-    pDst += 4u;
+    pSrc += 4U;
+    pDst += 4U;

    /* Decrement the loop counter */
    blkCnt--;
@ -130,7 +130,7 @@ void arm_scale_f32(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

 #else

@ -141,7 +141,7 @@ void arm_scale_f32(

 #endif /* #if defined (ARM_MATH_DSP) */

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A * scale */
    /* Scale the input and then store the result in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_scale_q15.c
+++ b/Source/BasicMathFunctions/arm_scale_q15.c
@ -72,11 +72,11 @@ void arm_scale_q15(


  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Reading 2 inputs from memory */
    inA1 = *__SIMD32(pSrc)++;
@ -112,9 +112,9 @@ void arm_scale_q15(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A * scale */
    /* Scale the input and then store the result in the destination buffer. */
@ -131,7 +131,7 @@ void arm_scale_q15(
  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A * scale */
    /* Scale the input and then store the result in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_scale_q31.c
+++ b/Source/BasicMathFunctions/arm_scale_q31.c
@ -73,13 +73,13 @@ void arm_scale_q31(


  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

-  if (sign == 0u)
+  if (sign == 0U)
  {
    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
     ** a second loop below computes the remaining 1 to 3 samples. */
-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* read four inputs from source */
      in1 = *pSrc;
@ -121,8 +121,8 @@ void arm_scale_q31(
      *(pDst + 3) = out4;

      /* Update pointers to process next sampels */
-      pSrc += 4u;
-      pDst += 4u;
+      pSrc += 4U;
+      pDst += 4U;

      /* Decrement the loop counter */
      blkCnt--;
@ -133,7 +133,7 @@ void arm_scale_q31(
  {
    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
     ** a second loop below computes the remaining 1 to 3 samples. */
-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* read four inputs from source */
      in1 = *pSrc;
@ -162,8 +162,8 @@ void arm_scale_q31(
      *(pDst + 3) = out4;

      /* Update pointers to process next sampels */
-      pSrc += 4u;
-      pDst += 4u;
+      pSrc += 4U;
+      pDst += 4U;

      /* Decrement the loop counter */
      blkCnt--;
@ -171,7 +171,7 @@ void arm_scale_q31(
  }
  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

 #else

@ -184,7 +184,7 @@ void arm_scale_q31(

  if (sign == 0)
  {
-	  while (blkCnt > 0u)
+	  while (blkCnt > 0U)
 	  {
 		/* C = A * scale */
 		/* Scale the input and then store the result in the destination buffer. */
@ -204,7 +204,7 @@ void arm_scale_q31(
  }
  else
  {
-	  while (blkCnt > 0u)
+	  while (blkCnt > 0U)
 	  {
 		/* C = A * scale */
 		/* Scale the input and then store the result in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_scale_q7.c
+++ b/Source/BasicMathFunctions/arm_scale_q7.c
@ -69,12 +69,12 @@ void arm_scale_q7(


  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;


  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* Reading 4 inputs from memory */
    in1 = *pSrc++;
@ -99,9 +99,9 @@ void arm_scale_q7(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A * scale */
    /* Scale the input and then store the result in the destination buffer. */
@ -118,7 +118,7 @@ void arm_scale_q7(
  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A * scale */
    /* Scale the input and then store the result in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_shift_q15.c
+++ b/Source/BasicMathFunctions/arm_shift_q15.c
@ -68,17 +68,17 @@ void arm_shift_q15(


  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* Getting the sign of shiftBits */
  sign = (shiftBits & 0x80);

  /* If the shift value is positive then do right shift else left shift */
-  if (sign == 0u)
+  if (sign == 0U)
  {
    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
     ** a second loop below computes the remaining 1 to 3 samples. */
-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* Read 2 inputs */
      in1 = *pSrc++;
@ -118,9 +118,9 @@ void arm_shift_q15(

    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
     ** No loop unrolling is used. */
-    blkCnt = blockSize % 0x4u;
+    blkCnt = blockSize % 0x4U;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* C = A << shiftBits */
      /* Shift and then store the results in the destination buffer. */
@ -134,7 +134,7 @@ void arm_shift_q15(
  {
    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
     ** a second loop below computes the remaining 1 to 3 samples. */
-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* Read 2 inputs */
      in1 = *pSrc++;
@ -175,9 +175,9 @@ void arm_shift_q15(

    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
     ** No loop unrolling is used. */
-    blkCnt = blockSize % 0x4u;
+    blkCnt = blockSize % 0x4U;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* C = A >> shiftBits */
      /* Shift the inputs and then store the results in the destination buffer. */
@ -196,12 +196,12 @@ void arm_shift_q15(
  sign = (shiftBits & 0x80);

  /* If the shift value is positive then do right shift else left shift */
-  if (sign == 0u)
+  if (sign == 0U)
  {
    /* Initialize blkCnt with number of samples */
    blkCnt = blockSize;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* C = A << shiftBits */
      /* Shift and then store the results in the destination buffer. */
@ -216,7 +216,7 @@ void arm_shift_q15(
    /* Initialize blkCnt with number of samples */
    blkCnt = blockSize;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* C = A >> shiftBits */
      /* Shift the inputs and then store the results in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_shift_q31.c
+++ b/Source/BasicMathFunctions/arm_shift_q31.c
@ -84,14 +84,14 @@ void arm_shift_q31(
  q31_t out1, out2, out3, out4;                  /* Temporary output variables */

  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;


-  if (sign == 0u)
+  if (sign == 0U)
  {
    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
     ** a second loop below computes the remaining 1 to 3 samples. */
-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* C = A  << shiftBits */
      /* Shift the input and then store the results in the destination buffer. */
@ -122,8 +122,8 @@ void arm_shift_q31(
      *(pDst + 3) = out4;

      /* Update destination pointer to process next sampels */
-      pSrc += 4u;
-      pDst += 4u;
+      pSrc += 4U;
+      pDst += 4U;

      /* Decrement the loop counter */
      blkCnt--;
@ -134,7 +134,7 @@ void arm_shift_q31(

    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
     ** a second loop below computes the remaining 1 to 3 samples. */
-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* C = A >>  shiftBits */
      /* Shift the input and then store the results in the destination buffer. */
@ -149,8 +149,8 @@ void arm_shift_q31(
      *(pDst + 3) = (in4 >> -shiftBits);


-      pSrc += 4u;
-      pDst += 4u;
+      pSrc += 4U;
+      pDst += 4U;

      blkCnt--;
    }
@ -159,7 +159,7 @@ void arm_shift_q31(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

 #else

@ -172,11 +172,11 @@ void arm_shift_q31(
 #endif /* #if defined (ARM_MATH_DSP) */


-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A (>> or <<) shiftBits */
    /* Shift the input and then store the result in the destination buffer. */
-    *pDst++ = (sign == 0u) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) :
+    *pDst++ = (sign == 0U) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) :
      (*pSrc++ >> -shiftBits);

    /* Decrement the loop counter */
--- a/Source/BasicMathFunctions/arm_shift_q7.c
+++ b/Source/BasicMathFunctions/arm_shift_q7.c
@ -75,17 +75,17 @@ void arm_shift_q7(


  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* Getting the sign of shiftBits */
  sign = (shiftBits & 0x80);

  /* If the shift value is positive then do right shift else left shift */
-  if (sign == 0u)
+  if (sign == 0U)
  {
    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
     ** a second loop below computes the remaining 1 to 3 samples. */
-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* C = A << shiftBits */
      /* Read 4 inputs */
@ -100,7 +100,7 @@ void arm_shift_q7(
                                   __SSAT((in3 << shiftBits), 8),
                                   __SSAT((in4 << shiftBits), 8));
      /* Update source pointer to process next sampels */
-      pSrc += 4u;
+      pSrc += 4U;

      /* Decrement the loop counter */
      blkCnt--;
@ -108,9 +108,9 @@ void arm_shift_q7(

    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
     ** No loop unrolling is used. */
-    blkCnt = blockSize % 0x4u;
+    blkCnt = blockSize % 0x4U;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* C = A << shiftBits */
      /* Shift the input and then store the result in the destination buffer. */
@ -125,7 +125,7 @@ void arm_shift_q7(
    shiftBits = -shiftBits;
    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
     ** a second loop below computes the remaining 1 to 3 samples. */
-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* C = A >> shiftBits */
      /* Read 4 inputs */
@ -139,7 +139,7 @@ void arm_shift_q7(
                                   (in3 >> shiftBits), (in4 >> shiftBits));


-      pSrc += 4u;
+      pSrc += 4U;

      /* Decrement the loop counter */
      blkCnt--;
@ -147,9 +147,9 @@ void arm_shift_q7(

    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
     ** No loop unrolling is used. */
-    blkCnt = blockSize % 0x4u;
+    blkCnt = blockSize % 0x4U;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* C = A >> shiftBits */
      /* Shift the input and then store the result in the destination buffer. */
@ -169,12 +169,12 @@ void arm_shift_q7(
  sign = (shiftBits & 0x80);

  /* If the shift value is positive then do right shift else left shift */
-  if (sign == 0u)
+  if (sign == 0U)
  {
    /* Initialize blkCnt with number of samples */
    blkCnt = blockSize;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* C = A << shiftBits */
      /* Shift the input and then store the result in the destination buffer. */
@ -189,7 +189,7 @@ void arm_shift_q7(
    /* Initialize blkCnt with number of samples */
    blkCnt = blockSize;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* C = A >> shiftBits */
      /* Shift the input and then store the result in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_sub_f32.c
+++ b/Source/BasicMathFunctions/arm_sub_f32.c
@ -74,11 +74,11 @@ void arm_sub_f32(
  float32_t inB1, inB2, inB3, inB4;              /* temporary variables */

  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A - B */
    /* Subtract and then store the results in the destination buffer. */
@ -101,9 +101,9 @@ void arm_sub_f32(


    /* Update pointers to process next sampels */
-    pSrcA += 4u;
-    pSrcB += 4u;
-    pDst += 4u;
+    pSrcA += 4U;
+    pSrcB += 4U;
+    pDst += 4U;

    /* Decrement the loop counter */
    blkCnt--;
@ -111,7 +111,7 @@ void arm_sub_f32(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

 #else

@ -122,7 +122,7 @@ void arm_sub_f32(

 #endif /* #if defined (ARM_MATH_DSP) */

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A - B */
    /* Subtract and then store the results in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_sub_q15.c
+++ b/Source/BasicMathFunctions/arm_sub_q15.c
@ -67,11 +67,11 @@ void arm_sub_q15(
  q31_t inB1, inB2;

  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A - B */
    /* Subtract and then store the results in the destination buffer two samples at a time. */
@ -89,9 +89,9 @@ void arm_sub_q15(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A - B */
    /* Subtract and then store the result in the destination buffer. */
@ -108,7 +108,7 @@ void arm_sub_q15(
  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A - B */
    /* Subtract and then store the result in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_sub_q31.c
+++ b/Source/BasicMathFunctions/arm_sub_q31.c
@ -67,11 +67,11 @@ void arm_sub_q31(
  q31_t inB1, inB2, inB3, inB4;

  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A - B */
    /* Subtract and then store the results in the destination buffer. */
@ -96,9 +96,9 @@ void arm_sub_q31(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A - B */
    /* Subtract and then store the result in the destination buffer. */
@ -115,7 +115,7 @@ void arm_sub_q31(
  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A - B */
    /* Subtract and then store the result in the destination buffer. */
--- a/Source/BasicMathFunctions/arm_sub_q7.c
+++ b/Source/BasicMathFunctions/arm_sub_q7.c
@ -64,11 +64,11 @@ void arm_sub_q7(
 /* Run the below code for Cortex-M4 and Cortex-M3 */

  /*loop Unrolling */
-  blkCnt = blockSize >> 2u;
+  blkCnt = blockSize >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A - B */
    /* Subtract and then store the results in the destination buffer 4 samples at a time. */
@ -80,9 +80,9 @@ void arm_sub_q7(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = blockSize % 0x4u;
+  blkCnt = blockSize % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A - B */
    /* Subtract and then store the result in the destination buffer. */
@ -99,7 +99,7 @@ void arm_sub_q7(
  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C = A - B */
    /* Subtract and then store the result in the destination buffer. */
--- a/Source/CommonTables/arm_const_structs.c
+++ b/Source/CommonTables/arm_const_structs.c
@ -143,49 +143,49 @@ const arm_cfft_instance_q15 arm_cfft_sR_q15_len4096 = {
 /* Floating-point structs */
 const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len32 = {
 	{ 16, twiddleCoef_32, armBitRevIndexTable32, ARMBITREVINDEXTABLE_16_TABLE_LENGTH },
-	32u,
+	32U,
 	(float32_t *)twiddleCoef_rfft_32
 };

 const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len64 = {
 	 { 32, twiddleCoef_32, armBitRevIndexTable32, ARMBITREVINDEXTABLE_32_TABLE_LENGTH },
-	64u,
+	64U,
 	(float32_t *)twiddleCoef_rfft_64
 };

 const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len128 = {
 	{ 64, twiddleCoef_64, armBitRevIndexTable64, ARMBITREVINDEXTABLE_64_TABLE_LENGTH },
-	128u,
+	128U,
 	(float32_t *)twiddleCoef_rfft_128
 };

 const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len256 = {
 	{ 128, twiddleCoef_128, armBitRevIndexTable128, ARMBITREVINDEXTABLE_128_TABLE_LENGTH },
-	256u,
+	256U,
 	(float32_t *)twiddleCoef_rfft_256
 };

 const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len512 = {
 	{ 256, twiddleCoef_256, armBitRevIndexTable256, ARMBITREVINDEXTABLE_256_TABLE_LENGTH },
-	512u,
+	512U,
 	(float32_t *)twiddleCoef_rfft_512
 };

 const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len1024 = {
 	{ 512, twiddleCoef_512, armBitRevIndexTable512, ARMBITREVINDEXTABLE_512_TABLE_LENGTH },
-	1024u,
+	1024U,
 	(float32_t *)twiddleCoef_rfft_1024
 };

 const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len2048 = {
 	{ 1024, twiddleCoef_1024, armBitRevIndexTable1024, ARMBITREVINDEXTABLE_1024_TABLE_LENGTH },
-	2048u,
+	2048U,
 	(float32_t *)twiddleCoef_rfft_2048
 };

 const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len4096 = {
 	{ 2048, twiddleCoef_2048, armBitRevIndexTable2048, ARMBITREVINDEXTABLE_2048_TABLE_LENGTH },
-	4096u,
+	4096U,
 	(float32_t *)twiddleCoef_rfft_4096
 };

@ -195,90 +195,90 @@ extern const q31_t realCoefAQ31[8192];
 extern const q31_t realCoefBQ31[8192];

 const arm_rfft_instance_q31 arm_rfft_sR_q31_len32 = {
-	32u,
+	32U,
 	0,
 	1,
-	256u,
+	256U,
 	(q31_t*)realCoefAQ31,
 	(q31_t*)realCoefBQ31,
 	&arm_cfft_sR_q31_len16
 };

 const arm_rfft_instance_q31 arm_rfft_sR_q31_len64 = {
-	64u,
+	64U,
 	0,
 	1,
-	128u,
+	128U,
 	(q31_t*)realCoefAQ31,
 	(q31_t*)realCoefBQ31,
 	&arm_cfft_sR_q31_len32
 };

 const arm_rfft_instance_q31 arm_rfft_sR_q31_len128 = {
-	128u,
+	128U,
 	0,
 	1,
-	64u,
+	64U,
 	(q31_t*)realCoefAQ31,
 	(q31_t*)realCoefBQ31,
 	&arm_cfft_sR_q31_len64
 };

 const arm_rfft_instance_q31 arm_rfft_sR_q31_len256 = {
-	256u,
+	256U,
 	0,
 	1,
-	32u,
+	32U,
 	(q31_t*)realCoefAQ31,
 	(q31_t*)realCoefBQ31,
 	&arm_cfft_sR_q31_len128
 };

 const arm_rfft_instance_q31 arm_rfft_sR_q31_len512 = {
-	512u,
+	512U,
 	0,
 	1,
-	16u,
+	16U,
 	(q31_t*)realCoefAQ31,
 	(q31_t*)realCoefBQ31,
 	&arm_cfft_sR_q31_len256
 };

 const arm_rfft_instance_q31 arm_rfft_sR_q31_len1024 = {
-	1024u,
+	1024U,
 	0,
 	1,
-	8u,
+	8U,
 	(q31_t*)realCoefAQ31,
 	(q31_t*)realCoefBQ31,
 	&arm_cfft_sR_q31_len512
 };

 const arm_rfft_instance_q31 arm_rfft_sR_q31_len2048 = {
-	2048u,
+	2048U,
 	0,
 	1,
-	4u,
+	4U,
 	(q31_t*)realCoefAQ31,
 	(q31_t*)realCoefBQ31,
 	&arm_cfft_sR_q31_len1024
 };

 const arm_rfft_instance_q31 arm_rfft_sR_q31_len4096 = {
-	4096u,
+	4096U,
 	0,
 	1,
-	2u,
+	2U,
 	(q31_t*)realCoefAQ31,
 	(q31_t*)realCoefBQ31,
 	&arm_cfft_sR_q31_len2048
 };

 const arm_rfft_instance_q31 arm_rfft_sR_q31_len8192 = {
-	8192u,
+	8192U,
 	0,
 	1,
-	1u,
+	1U,
 	(q31_t*)realCoefAQ31,
 	(q31_t*)realCoefBQ31,
 	&arm_cfft_sR_q31_len4096
@ -289,90 +289,90 @@ extern const q15_t realCoefAQ15[8192];
 extern const q15_t realCoefBQ15[8192];

 const arm_rfft_instance_q15 arm_rfft_sR_q15_len32 = {
-	32u,
+	32U,
 	0,
 	1,
-	256u,
+	256U,
 	(q15_t*)realCoefAQ15,
 	(q15_t*)realCoefBQ15,
 	&arm_cfft_sR_q15_len16
 };

 const arm_rfft_instance_q15 arm_rfft_sR_q15_len64 = {
-	64u,
+	64U,
 	0,
 	1,
-	128u,
+	128U,
 	(q15_t*)realCoefAQ15,
 	(q15_t*)realCoefBQ15,
 	&arm_cfft_sR_q15_len32
 };

 const arm_rfft_instance_q15 arm_rfft_sR_q15_len128 = {
-	128u,
+	128U,
 	0,
 	1,
-	64u,
+	64U,
 	(q15_t*)realCoefAQ15,
 	(q15_t*)realCoefBQ15,
 	&arm_cfft_sR_q15_len64
 };

 const arm_rfft_instance_q15 arm_rfft_sR_q15_len256 = {
-	256u,
+	256U,
 	0,
 	1,
-	32u,
+	32U,
 	(q15_t*)realCoefAQ15,
 	(q15_t*)realCoefBQ15,
 	&arm_cfft_sR_q15_len128
 };

 const arm_rfft_instance_q15 arm_rfft_sR_q15_len512 = {
-	512u,
+	512U,
 	0,
 	1,
-	16u,
+	16U,
 	(q15_t*)realCoefAQ15,
 	(q15_t*)realCoefBQ15,
 	&arm_cfft_sR_q15_len256
 };

 const arm_rfft_instance_q15 arm_rfft_sR_q15_len1024 = {
-	1024u,
+	1024U,
 	0,
 	1,
-	8u,
+	8U,
 	(q15_t*)realCoefAQ15,
 	(q15_t*)realCoefBQ15,
 	&arm_cfft_sR_q15_len512
 };

 const arm_rfft_instance_q15 arm_rfft_sR_q15_len2048 = {
-	2048u,
+	2048U,
 	0,
 	1,
-	4u,
+	4U,
 	(q15_t*)realCoefAQ15,
 	(q15_t*)realCoefBQ15,
 	&arm_cfft_sR_q15_len1024
 };

 const arm_rfft_instance_q15 arm_rfft_sR_q15_len4096 = {
-	4096u,
+	4096U,
 	0,
 	1,
-	2u,
+	2U,
 	(q15_t*)realCoefAQ15,
 	(q15_t*)realCoefBQ15,
 	&arm_cfft_sR_q15_len2048
 };

 const arm_rfft_instance_q15 arm_rfft_sR_q15_len8192 = {
-	8192u,
+	8192U,
 	0,
 	1,
-	1u,
+	1U,
 	(q15_t*)realCoefAQ15,
 	(q15_t*)realCoefBQ15,
 	&arm_cfft_sR_q15_len4096
--- a/Source/ComplexMathFunctions/arm_cmplx_conj_f32.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_conj_f32.c
@ -82,11 +82,11 @@ void arm_cmplx_conj_f32(
  float32_t inI1, inI2, inI3, inI4;

  /*loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[0]+jC[1] = A[0]+ j (-1) A[1] */
    /* Calculate Complex Conjugate and then store the results in the destination buffer. */
@ -131,13 +131,13 @@ void arm_cmplx_conj_f32(
    pDst[5] = inI3;

    /* increment source pointer by 8 to process next sampels */
-    pSrc += 8u;
+    pSrc += 8U;

    /* store imaginary sample to destination */
    pDst[7] = inI4;

    /* increment destination pointer by 8 to store next samples */
-    pDst += 8u;
+    pDst += 8U;

    /* Decrement the loop counter */
    blkCnt--;
@ -145,7 +145,7 @@ void arm_cmplx_conj_f32(

  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x4u;
+  blkCnt = numSamples % 0x4U;

 #else

@ -154,7 +154,7 @@ void arm_cmplx_conj_f32(

 #endif /* #if defined (ARM_MATH_DSP) */

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* realOut + j (imagOut) = realIn + j (-1) imagIn */
    /* Calculate Complex Conjugate and then store the results in the destination buffer. */
--- a/Source/ComplexMathFunctions/arm_cmplx_conj_q15.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_conj_q15.c
@ -64,11 +64,11 @@ void arm_cmplx_conj_q15(
  q31_t zero = 0;

  /*loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[0]+jC[1] = A[0]+ j (-1) A[1] */
    /* Calculate Complex Conjugate and then store the results in the destination buffer. */
@ -109,9 +109,9 @@ void arm_cmplx_conj_q15(

  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x4u;
+  blkCnt = numSamples % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[0]+jC[1] = A[0]+ j (-1) A[1] */
    /* Calculate Complex Conjugate and then store the results in the destination buffer. */
@ -128,7 +128,7 @@ void arm_cmplx_conj_q15(

  /* Run the below code for Cortex-M0 */

-  while (numSamples > 0u)
+  while (numSamples > 0U)
  {
    /* realOut + j (imagOut) = realIn+ j (-1) imagIn */
    /* Calculate Complex Conjugate and then store the results in the destination buffer. */
--- a/Source/ComplexMathFunctions/arm_cmplx_conj_q31.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_conj_q31.c
@ -65,11 +65,11 @@ void arm_cmplx_conj_q31(
  q31_t inI1, inI2, inI3, inI4;                  /* Temporary imaginary variables */

  /*loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[0]+jC[1] = A[0]+ j (-1) A[1] */
    /* Calculate Complex Conjugate and then store the results in the destination buffer. */
@ -125,14 +125,14 @@ void arm_cmplx_conj_q31(
    pDst[3] = inI2;

    /* increment source pointer by 8 to proecess next samples */
-    pSrc += 8u;
+    pSrc += 8U;

    /* store imaginary input samples */
    pDst[5] = inI3;
    pDst[7] = inI4;

    /* increment destination pointer by 8 to process next samples */
-    pDst += 8u;
+    pDst += 8U;

    /* Decrement the loop counter */
    blkCnt--;
@ -140,7 +140,7 @@ void arm_cmplx_conj_q31(

  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x4u;
+  blkCnt = numSamples % 0x4U;

 #else

@ -150,7 +150,7 @@ void arm_cmplx_conj_q31(

 #endif /* #if defined (ARM_MATH_DSP) */

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[0]+jC[1] = A[0]+ j (-1) A[1] */
    /* Calculate Complex Conjugate and then store the results in the destination buffer. */
--- a/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c
@ -89,11 +89,11 @@ void arm_cmplx_dot_prod_f32(
  uint32_t blkCnt;                               /* loop counter */

  /*loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
      a0 = *pSrcA++;
      b0 = *pSrcA++;
@ -141,9 +141,9 @@ void arm_cmplx_dot_prod_f32(

  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples & 0x3u;
+  blkCnt = numSamples & 0x3U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
      a0 = *pSrcA++;
      b0 = *pSrcA++;
@ -163,7 +163,7 @@ void arm_cmplx_dot_prod_f32(

  /* Run the below code for Cortex-M0 */

-  while (numSamples > 0u)
+  while (numSamples > 0U)
  {
      a0 = *pSrcA++;
      b0 = *pSrcA++;
--- a/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c
@ -72,11 +72,11 @@ void arm_cmplx_dot_prod_q15(


  /*loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
      a0 = *pSrcA++;
      b0 = *pSrcA++;
@ -124,9 +124,9 @@ void arm_cmplx_dot_prod_q15(

  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x4u;
+  blkCnt = numSamples % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
      a0 = *pSrcA++;
      b0 = *pSrcA++;
@ -146,7 +146,7 @@ void arm_cmplx_dot_prod_q15(

  /* Run the below code for Cortex-M0 */

-  while (numSamples > 0u)
+  while (numSamples > 0U)
  {
      a0 = *pSrcA++;
      b0 = *pSrcA++;
--- a/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c
@ -73,11 +73,11 @@ void arm_cmplx_dot_prod_q31(


  /*loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
      a0 = *pSrcA++;
      b0 = *pSrcA++;
@ -125,9 +125,9 @@ void arm_cmplx_dot_prod_q31(

  /* If the numSamples  is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x4u;
+  blkCnt = numSamples % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
      a0 = *pSrcA++;
      b0 = *pSrcA++;
@ -147,7 +147,7 @@ void arm_cmplx_dot_prod_q31(

  /* Run the below code for Cortex-M0 */

-  while (numSamples > 0u)
+  while (numSamples > 0U)
  {
      a0 = *pSrcA++;
      b0 = *pSrcA++;
--- a/Source/ComplexMathFunctions/arm_cmplx_mag_f32.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mag_f32.c
@ -82,11 +82,11 @@ void arm_cmplx_mag_f32(
  uint32_t blkCnt;                               /* loop counter */

  /*loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {

    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
@ -114,9 +114,9 @@ void arm_cmplx_mag_f32(

  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x4u;
+  blkCnt = numSamples % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
    realIn = *pSrc++;
@ -132,7 +132,7 @@ void arm_cmplx_mag_f32(

  /* Run the below code for Cortex-M0 */

-  while (numSamples > 0u)
+  while (numSamples > 0U)
  {
    /* out = sqrt((real * real) + (imag * imag)) */
    realIn = *pSrc++;
--- a/Source/ComplexMathFunctions/arm_cmplx_mag_q15.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mag_q15.c
@ -66,11 +66,11 @@ void arm_cmplx_mag_q15(


  /*loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {

    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
@ -96,9 +96,9 @@ void arm_cmplx_mag_q15(

  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x4u;
+  blkCnt = numSamples % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
    in1 = *__SIMD32(pSrc)++;
@ -116,7 +116,7 @@ void arm_cmplx_mag_q15(
  /* Run the below code for Cortex-M0 */
  q15_t real, imag;                              /* Temporary variables to hold input values */

-  while (numSamples > 0u)
+  while (numSamples > 0U)
  {
    /* out = sqrt(real * real + imag * imag) */
    real = *pSrc++;
--- a/Source/ComplexMathFunctions/arm_cmplx_mag_q31.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mag_q31.c
@ -68,11 +68,11 @@ void arm_cmplx_mag_q31(


  /*loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* read complex input from source buffer */
    real1 = pSrc[0];
@ -130,13 +130,13 @@ void arm_cmplx_mag_q31(
    arm_sqrt_q31(out1, &pDst[2]);

    /* increment destination by 8 to process next samples */
-    pSrc += 8u;
+    pSrc += 8U;

    /* calculate square root */
    arm_sqrt_q31(out3, &pDst[3]);

    /* increment destination by 4 to process next samples */
-    pDst += 4u;
+    pDst += 4U;

    /* Decrement the loop counter */
    blkCnt--;
@ -144,7 +144,7 @@ void arm_cmplx_mag_q31(

  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x4u;
+  blkCnt = numSamples % 0x4U;

 #else

@ -153,7 +153,7 @@ void arm_cmplx_mag_q31(

 #endif /* #if defined (ARM_MATH_DSP) */

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
    real = *pSrc++;
--- a/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f32.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f32.c
@ -86,11 +86,11 @@ void arm_cmplx_mag_squared_f32(
  float32_t out1, out2, out3, out4;              /* Temporary variables to hold output values */

  /*loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
    /* read real input sample from source buffer */
@ -160,13 +160,13 @@ void arm_cmplx_mag_squared_f32(
    pDst[2] = out3;

    /* increment destination pointer by 8 to process next samples */
-    pSrc += 8u;
+    pSrc += 8U;

    /* store output to destination */
    pDst[3] = out4;

    /* increment destination pointer by 4 to process next samples */
-    pDst += 4u;
+    pDst += 4U;

    /* Decrement the loop counter */
    blkCnt--;
@ -174,7 +174,7 @@ void arm_cmplx_mag_squared_f32(

  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x4u;
+  blkCnt = numSamples % 0x4U;

 #else

@ -184,7 +184,7 @@ void arm_cmplx_mag_squared_f32(

 #endif /* #if defined (ARM_MATH_DSP) */

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
    real = *pSrc++;
--- a/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c
@ -64,11 +64,11 @@ void arm_cmplx_mag_squared_q15(
  q31_t acc2, acc3;

  /*loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
    in1 = *__SIMD32(pSrc)++;
@ -93,9 +93,9 @@ void arm_cmplx_mag_squared_q15(

  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x4u;
+  blkCnt = numSamples % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
    in1 = *__SIMD32(pSrc)++;
@ -113,7 +113,7 @@ void arm_cmplx_mag_squared_q15(
  /* Run the below code for Cortex-M0 */
  q15_t real, imag;                              /* Temporary variables to store real and imaginary values */

-  while (numSamples > 0u)
+  while (numSamples > 0U)
  {
    /* out = ((real * real) + (imag * imag)) */
    real = *pSrc++;
--- a/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c
@ -65,11 +65,11 @@ void arm_cmplx_mag_squared_q31(
  uint32_t blkCnt;                               /* loop counter */

  /* loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
    real = *pSrc++;
@ -106,9 +106,9 @@ void arm_cmplx_mag_squared_q31(

  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x4u;
+  blkCnt = numSamples % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
    real = *pSrc++;
@ -126,7 +126,7 @@ void arm_cmplx_mag_squared_q31(

  /* Run the below code for Cortex-M0 */

-  while (numSamples > 0u)
+  while (numSamples > 0U)
  {
    /* out = ((real * real) + (imag * imag)) */
    real = *pSrc++;
--- a/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c
@ -86,11 +86,11 @@ void arm_cmplx_mult_cmplx_f32(


  /* loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
@ -145,18 +145,18 @@ void arm_cmplx_mult_cmplx_f32(
    acc4 = (b2 * c2);

    *(pDst + 4) = acc1;
-    pSrcA += 8u;
+    pSrcA += 8U;

    acc3 -= (b2 * d2);
    acc4 += (a2 * d2);

    *(pDst + 5) = acc2;
-    pSrcB += 8u;
+    pSrcB += 8U;

    *(pDst + 6) = acc3;
    *(pDst + 7) = acc4;

-    pDst += 8u;
+    pDst += 8U;

    /* Decrement the numSamples loop counter */
    blkCnt--;
@ -164,7 +164,7 @@ void arm_cmplx_mult_cmplx_f32(

  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x4u;
+  blkCnt = numSamples % 0x4U;

 #else

@ -173,7 +173,7 @@ void arm_cmplx_mult_cmplx_f32(

 #endif /* #if defined (ARM_MATH_DSP) */

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
--- a/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c
@ -64,11 +64,11 @@ void arm_cmplx_mult_cmplx_q15(
  uint32_t blkCnt;                               /* loop counters */

  /* loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
@ -126,9 +126,9 @@ void arm_cmplx_mult_cmplx_q15(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x4u;
+  blkCnt = numSamples % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
@ -152,7 +152,7 @@ void arm_cmplx_mult_cmplx_q15(

  /* Run the below code for Cortex-M0 */

-  while (numSamples > 0u)
+  while (numSamples > 0U)
  {
    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
--- a/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c
@ -68,11 +68,11 @@ void arm_cmplx_mult_cmplx_q31(
  /* Run the below code for Cortex-M4 and Cortex-M3 */

  /* loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
@ -174,9 +174,9 @@ void arm_cmplx_mult_cmplx_q31(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x4u;
+  blkCnt = numSamples % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
@ -212,11 +212,11 @@ void arm_cmplx_mult_cmplx_q31(
  /* Run the below code for Cortex-M0 */

  /* loop Unrolling */
-  blkCnt = numSamples >> 1u;
+  blkCnt = numSamples >> 1U;

  /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.
   ** a second loop below computes the remaining 1 sample. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
@ -272,9 +272,9 @@ void arm_cmplx_mult_cmplx_q31(

  /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x2u;
+  blkCnt = numSamples % 0x2U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
--- a/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c
@ -89,11 +89,11 @@ void arm_cmplx_mult_real_f32(
  float32_t out5, out6, out7, out8;              /* Temporary variables to hold output data */

  /* loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[2 * i] = A[2 * i] * B[i].            */
    /* C[2 * i + 1] = A[2 * i + 1] * B[i].        */
@ -164,20 +164,20 @@ void arm_cmplx_mult_real_f32(
    pCmplxDst[4] = out5;

    /* incremnet complex input buffer by 8 to process next samples */
-    pSrcCmplx += 8u;
+    pSrcCmplx += 8U;

    /* store result to destination bufer */
    pCmplxDst[5] = out6;

    /* increment real input buffer by 4 to process next samples */
-    pSrcReal += 4u;
+    pSrcReal += 4U;

    /* store result to destination bufer */
    pCmplxDst[6] = out7;
    pCmplxDst[7] = out8;

    /* increment destination buffer by 8 to process next sampels */
-    pCmplxDst += 8u;
+    pCmplxDst += 8U;

    /* Decrement the numSamples loop counter */
    blkCnt--;
@ -185,7 +185,7 @@ void arm_cmplx_mult_real_f32(

  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x4u;
+  blkCnt = numSamples % 0x4U;

 #else

@ -194,7 +194,7 @@ void arm_cmplx_mult_real_f32(

 #endif /* #if defined (ARM_MATH_DSP) */

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[2 * i] = A[2 * i] * B[i].            */
    /* C[2 * i + 1] = A[2 * i + 1] * B[i].        */
--- a/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c
@ -70,11 +70,11 @@ void arm_cmplx_mult_real_q15(
  q31_t mul1, mul2, mul3, mul4;                  /* Temporary variables to hold intermediate data */

  /* loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[2 * i] = A[2 * i] * B[i].            */
    /* C[2 * i + 1] = A[2 * i + 1] * B[i].        */
@ -103,10 +103,10 @@ void arm_cmplx_mult_real_q15(
 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */

    /* saturate the result */
-    out1 = (q15_t) __SSAT(mul1 >> 15u, 16);
-    out2 = (q15_t) __SSAT(mul2 >> 15u, 16);
-    out3 = (q15_t) __SSAT(mul3 >> 15u, 16);
-    out4 = (q15_t) __SSAT(mul4 >> 15u, 16);
+    out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
+    out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
+    out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
+    out4 = (q15_t) __SSAT(mul4 >> 15U, 16);

    /* pack real and imaginary outputs and store them to destination */
    *__SIMD32(pCmplxDst)++ = __PKHBT(out1, out2, 16);
@ -132,10 +132,10 @@ void arm_cmplx_mult_real_q15(

 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */

-    out1 = (q15_t) __SSAT(mul1 >> 15u, 16);
-    out2 = (q15_t) __SSAT(mul2 >> 15u, 16);
-    out3 = (q15_t) __SSAT(mul3 >> 15u, 16);
-    out4 = (q15_t) __SSAT(mul4 >> 15u, 16);
+    out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
+    out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
+    out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
+    out4 = (q15_t) __SSAT(mul4 >> 15U, 16);

    *__SIMD32(pCmplxDst)++ = __PKHBT(out1, out2, 16);
    *__SIMD32(pCmplxDst)++ = __PKHBT(out3, out4, 16);
@ -146,9 +146,9 @@ void arm_cmplx_mult_real_q15(

  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x4u;
+  blkCnt = numSamples % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[2 * i] = A[2 * i] * B[i].            */
    /* C[2 * i + 1] = A[2 * i + 1] * B[i].        */
@ -167,7 +167,7 @@ void arm_cmplx_mult_real_q15(

  /* Run the below code for Cortex-M0 */

-  while (numSamples > 0u)
+  while (numSamples > 0U)
  {
    /* realOut = realA * realB.            */
    /* imagOut = imagA * realB.                */
--- a/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c
@ -69,11 +69,11 @@ void arm_cmplx_mult_real_q31(
  q31_t out1, out2, out3, out4;                  /* Temporary variables to hold output data */

  /* loop Unrolling */
-  blkCnt = numSamples >> 2u;
+  blkCnt = numSamples >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[2 * i] = A[2 * i] * B[i].            */
    /* C[2 * i + 1] = A[2 * i + 1] * B[i].        */
@ -151,9 +151,9 @@ void arm_cmplx_mult_real_q31(

  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  blkCnt = numSamples % 0x4u;
+  blkCnt = numSamples % 0x4U;

-  while (blkCnt > 0u)
+  while (blkCnt > 0U)
  {
    /* C[2 * i] = A[2 * i] * B[i].            */
    /* C[2 * i + 1] = A[2 * i + 1] * B[i].        */
@ -187,7 +187,7 @@ void arm_cmplx_mult_real_q31(

  /* Run the below code for Cortex-M0 */

-  while (numSamples > 0u)
+  while (numSamples > 0U)
  {
    /* realOut = realA * realB.            */
    /* imagReal = imagA * realB.               */
--- a/Source/ControllerFunctions/arm_pid_init_f32.c
+++ b/Source/ControllerFunctions/arm_pid_init_f32.c
@ -64,7 +64,7 @@ void arm_pid_init_f32(
  if (resetStateFlag)
  {
    /* Clear the state buffer.  The size will be always 3 samples */
-    memset(S->state, 0, 3u * sizeof(float32_t));
+    memset(S->state, 0, 3U * sizeof(float32_t));
  }

 }
--- a/Source/ControllerFunctions/arm_pid_init_q15.c
+++ b/Source/ControllerFunctions/arm_pid_init_q15.c
@ -74,7 +74,7 @@ void arm_pid_init_q15(
  if (resetStateFlag)
  {
    /* Clear the state buffer.  The size will be always 3 samples */
-    memset(S->state, 0, 3u * sizeof(q15_t));
+    memset(S->state, 0, 3U * sizeof(q15_t));
  }

 #else
@ -98,7 +98,7 @@ void arm_pid_init_q15(
  if (resetStateFlag)
  {
    /* Clear the state buffer.  The size will be always 3 samples */
-    memset(S->state, 0, 3u * sizeof(q15_t));
+    memset(S->state, 0, 3U * sizeof(q15_t));
  }

 #endif /* #if defined (ARM_MATH_DSP) */
--- a/Source/ControllerFunctions/arm_pid_init_q31.c
+++ b/Source/ControllerFunctions/arm_pid_init_q31.c
@ -85,7 +85,7 @@ void arm_pid_init_q31(
  if (resetStateFlag)
  {
    /* Clear the state buffer.  The size will be always 3 samples */
-    memset(S->state, 0, 3u * sizeof(q31_t));
+    memset(S->state, 0, 3U * sizeof(q31_t));
  }

 }
--- a/Source/ControllerFunctions/arm_pid_reset_f32.c
+++ b/Source/ControllerFunctions/arm_pid_reset_f32.c
@ -45,7 +45,7 @@ void arm_pid_reset_f32(
 {

  /* Clear the state buffer.  The size will be always 3 samples */
-  memset(S->state, 0, 3u * sizeof(float32_t));
+  memset(S->state, 0, 3U * sizeof(float32_t));
 }

 /**
--- a/Source/ControllerFunctions/arm_pid_reset_q15.c
+++ b/Source/ControllerFunctions/arm_pid_reset_q15.c
@ -44,7 +44,7 @@ void arm_pid_reset_q15(
  arm_pid_instance_q15 * S)
 {
  /* Reset state to zero, The size will be always 3 samples */
-  memset(S->state, 0, 3u * sizeof(q15_t));
+  memset(S->state, 0, 3U * sizeof(q15_t));
 }

 /**
--- a/Source/ControllerFunctions/arm_pid_reset_q31.c
+++ b/Source/ControllerFunctions/arm_pid_reset_q31.c
@ -45,7 +45,7 @@ void arm_pid_reset_q31(
 {

  /* Clear the state buffer.  The size will be always 3 samples */
-  memset(S->state, 0, 3u * sizeof(q31_t));
+  memset(S->state, 0, 3U * sizeof(q31_t));
 }

 /**
--- a/Source/FilteringFunctions/arm_biquad_cascade_df1_32x64_init_q31.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df1_32x64_init_q31.c
@ -87,7 +87,7 @@ void arm_biquad_cas_df1_32x64_init_q31(
  S->pCoeffs = pCoeffs;

  /* Clear state buffer and size is always 4 * numStages */
-  memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(q63_t));
+  memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(q63_t));

  /* Assign state pointer */
  S->pState = pState;
--- a/Source/FilteringFunctions/arm_biquad_cascade_df1_32x64_q31.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df1_32x64_q31.c
@ -190,8 +190,8 @@ void arm_biquad_cas_df1_32x64_q31(
  int32_t shift = (int32_t) S->postShift + 1;    /*  Shift to be applied to the output */
  uint32_t sample, stage = S->numStages;         /*  loop counters                     */
  q31_t acc_l, acc_h;                            /*  temporary output               */
-  uint32_t uShift = ((uint32_t) S->postShift + 1u);
-  uint32_t lShift = 32u - uShift;                /*  Shift to be applied to the output */
+  uint32_t uShift = ((uint32_t) S->postShift + 1U);
+  uint32_t lShift = 32U - uShift;                /*  Shift to be applied to the output */


 #if defined (ARM_MATH_DSP)
@ -219,11 +219,11 @@ void arm_biquad_cas_df1_32x64_q31(
     * acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
     */

-    sample = blockSize >> 2u;
+    sample = blockSize >> 2U;

    /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
     ** a second loop below computes the remaining 1 to 3 samples. */
-    while (sample > 0u)
+    while (sample > 0U)
    {
      /* Read the input */
      Xn = *pIn++;
@ -297,7 +297,7 @@ void arm_biquad_cas_df1_32x64_q31(

      /* The result is converted to 1.31 */
      /* Store the output in the destination buffer. */
-      *(pOut + 1u) = acc_h;
+      *(pOut + 1U) = acc_h;

      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */

@ -329,7 +329,7 @@ void arm_biquad_cas_df1_32x64_q31(
      acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;

      /* Store the output in the destination buffer in 1.31 format. */
-      *(pOut + 2u) = acc_h;
+      *(pOut + 2U) = acc_h;

      /* Read the fourth input into Xn, to reuse the value */
      Xn = *pIn++;
@ -363,7 +363,7 @@ void arm_biquad_cas_df1_32x64_q31(
      acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;

      /* Store the output in the destination buffer in 1.31 format. */
-      *(pOut + 3u) = acc_h;
+      *(pOut + 3U) = acc_h;

      /* Every time after the output is computed state should be updated. */
      /* The states should be updated as:  */
@ -375,7 +375,7 @@ void arm_biquad_cas_df1_32x64_q31(
      Xn1 = Xn;

      /* update output pointer */
-      pOut += 4u;
+      pOut += 4U;

      /* decrement the loop counter */
      sample--;
@ -383,9 +383,9 @@ void arm_biquad_cas_df1_32x64_q31(

    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
     ** No loop unrolling is used. */
-    sample = (blockSize & 0x3u);
+    sample = (blockSize & 0x3U);

-    while (sample > 0u)
+    while (sample > 0U)
    {
      /* Read the input */
      Xn = *pIn++;
@ -476,7 +476,7 @@ void arm_biquad_cas_df1_32x64_q31(

    sample = blockSize;

-    while (sample > 0u)
+    while (sample > 0U)
    {
      /* Read the input */
      Xn = *pIn++;
--- a/Source/FilteringFunctions/arm_biquad_cascade_df1_f32.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df1_f32.c
@ -207,11 +207,11 @@ void arm_biquad_cascade_df1_f32(
     *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1]   + a2 * y[n-2]
     */

-    sample = blockSize >> 2u;
+    sample = blockSize >> 2U;

    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
     ** a second loop below computes the remaining 1 to 3 samples. */
-    while (sample > 0u)
+    while (sample > 0U)
    {
      /* Read the first input */
      Xn = *pIn++;
@ -286,9 +286,9 @@ void arm_biquad_cascade_df1_f32(

    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
     ** No loop unrolling is used. */
-    sample = blockSize & 0x3u;
+    sample = blockSize & 0x3U;

-    while (sample > 0u)
+    while (sample > 0U)
    {
      /* Read the input */
      Xn = *pIn++;
@ -331,7 +331,7 @@ void arm_biquad_cascade_df1_f32(
    /* decrement the loop counter */
    stage--;

-  } while (stage > 0u);
+  } while (stage > 0U);

 #else

@ -358,7 +358,7 @@ void arm_biquad_cascade_df1_f32(

    sample = blockSize;

-    while (sample > 0u)
+    while (sample > 0U)
    {
      /* Read the input */
      Xn = *pIn++;
@ -400,7 +400,7 @@ void arm_biquad_cascade_df1_f32(
    /* decrement the loop counter */
    stage--;

-  } while (stage > 0u);
+  } while (stage > 0U);

 #endif /* #if defined (ARM_MATH_DSP) */

--- a/Source/FilteringFunctions/arm_biquad_cascade_df1_fast_q15.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df1_fast_q15.c
@ -104,11 +104,11 @@ void arm_biquad_cascade_df1_fast_q15(
     *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
     *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
     */
-    sample = blockSize >> 1u;
+    sample = blockSize >> 1U;

    /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.
     ** a second loop below computes the remaining 1 sample. */
-    while (sample > 0u)
+    while (sample > 0U)
    {

      /* Read the input */
@ -198,7 +198,7 @@ void arm_biquad_cascade_df1_fast_q15(
    /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
     ** No loop unrolling is used. */

-    if ((blockSize & 0x1u) != 0u)
+    if ((blockSize & 0x1U) != 0U)
    {
      /* Read the input */
      in = *pIn++;
@ -264,7 +264,7 @@ void arm_biquad_cascade_df1_fast_q15(
    /* Decrement the loop counter */
    stage--;

-  } while (stage > 0u);
+  } while (stage > 0U);
 }


--- a/Source/FilteringFunctions/arm_biquad_cascade_df1_fast_q31.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df1_fast_q31.c
@ -100,11 +100,11 @@ void arm_biquad_cascade_df1_fast_q31(
     *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
     */

-    sample = blockSize >> 2u;
+    sample = blockSize >> 2U;

    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
     ** a second loop below computes the remaining 1 to 3 samples. */
-    while (sample > 0u)
+    while (sample > 0U)
    {
      /* Read the input */
      Xn = *pIn;
@ -130,7 +130,7 @@ void arm_biquad_cascade_df1_fast_q31(
      Yn2 = acc << shift;

      /* Read the second input */
-      Xn2 = *(pIn + 1u);
+      Xn2 = *(pIn + 1U);

      /* Store the output in the destination buffer. */
      *pOut = Yn2;
@ -156,10 +156,10 @@ void arm_biquad_cascade_df1_fast_q31(
      Yn1 = acc << shift;

      /* Read the third input  */
-      Xn1 = *(pIn + 2u);
+      Xn1 = *(pIn + 2U);

      /* Store the output in the destination buffer. */
-      *(pOut + 1u) = Yn1;
+      *(pOut + 1U) = Yn1;

      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
      /* acc =  b0 * x[n] */
@ -182,11 +182,11 @@ void arm_biquad_cascade_df1_fast_q31(
      Yn2 = acc << shift;

      /* Read the forth input */
-      Xn = *(pIn + 3u);
+      Xn = *(pIn + 3U);

      /* Store the output in the destination buffer. */
-      *(pOut + 2u) = Yn2;
-      pIn += 4u;
+      *(pOut + 2U) = Yn2;
+      pIn += 4U;

      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
      /* acc =  b0 * x[n] */
@ -217,8 +217,8 @@ void arm_biquad_cascade_df1_fast_q31(
      Xn1 = Xn;

      /* Store the output in the destination buffer. */
-      *(pOut + 3u) = Yn1;
-      pOut += 4u;
+      *(pOut + 3U) = Yn1;
+      pOut += 4U;

      /* decrement the loop counter */
      sample--;
@ -226,9 +226,9 @@ void arm_biquad_cascade_df1_fast_q31(

    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
     ** No loop unrolling is used. */
-    sample = (blockSize & 0x3u);
+    sample = (blockSize & 0x3U);

-   while (sample > 0u)
+   while (sample > 0U)
   {
      /* Read the input */
      Xn = *pIn++;
--- a/Source/FilteringFunctions/arm_biquad_cascade_df1_init_f32.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df1_init_f32.c
@ -86,7 +86,7 @@ void arm_biquad_cascade_df1_init_f32(
  S->pCoeffs = pCoeffs;

  /* Clear state buffer and size is always 4 * numStages */
-  memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(float32_t));
+  memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(float32_t));

  /* Assign state pointer */
  S->pState = pState;
--- a/Source/FilteringFunctions/arm_biquad_cascade_df1_init_q15.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df1_init_q15.c
@ -88,7 +88,7 @@ void arm_biquad_cascade_df1_init_q15(
  S->pCoeffs = pCoeffs;

  /* Clear state buffer and size is always 4 * numStages */
-  memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(q15_t));
+  memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(q15_t));

  /* Assign state pointer */
  S->pState = pState;
--- a/Source/FilteringFunctions/arm_biquad_cascade_df1_init_q31.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df1_init_q31.c
@ -87,7 +87,7 @@ void arm_biquad_cascade_df1_init_q31(
  S->pCoeffs = pCoeffs;

  /* Clear state buffer and size is always 4 * numStages */
-  memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(q31_t));
+  memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(q31_t));

  /* Assign state pointer */
  S->pState = pState;
--- a/Source/FilteringFunctions/arm_biquad_cascade_df1_q15.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df1_q15.c
@ -109,11 +109,11 @@ void arm_biquad_cascade_df1_q15(
     *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
     *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
     */
-    sample = blockSize >> 1u;
+    sample = blockSize >> 1U;

    /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.
     ** a second loop below computes the remaining 1 sample. */
-    while (sample > 0u)
+    while (sample > 0U)
    {

      /* Read the input */
@ -220,7 +220,7 @@ void arm_biquad_cascade_df1_q15(
    /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
     ** No loop unrolling is used. */

-    if ((blockSize & 0x1u) != 0u)
+    if ((blockSize & 0x1U) != 0U)
    {
      /* Read the input */
      in = *pIn++;
@ -295,7 +295,7 @@ void arm_biquad_cascade_df1_q15(
    /* Decrement the loop counter */
    stage--;

-  } while (stage > 0u);
+  } while (stage > 0U);

 #else

@ -334,7 +334,7 @@ void arm_biquad_cascade_df1_q15(

    sample = blockSize;

-    while (sample > 0u)
+    while (sample > 0U)
    {
      /* Read the input */
      Xn = *pIn++;
--- a/Source/FilteringFunctions/arm_biquad_cascade_df1_q31.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df1_q31.c
@ -65,8 +65,8 @@ void arm_biquad_cascade_df1_q31(
  uint32_t blockSize)
 {
  q63_t acc;                                     /*  accumulator                   */
-  uint32_t uShift = ((uint32_t) S->postShift + 1u);
-  uint32_t lShift = 32u - uShift;                /*  Shift to be applied to the output */
+  uint32_t uShift = ((uint32_t) S->postShift + 1U);
+  uint32_t lShift = 32U - uShift;                /*  Shift to be applied to the output */
  q31_t *pIn = pSrc;                             /*  input pointer initialization  */
  q31_t *pOut = pDst;                            /*  output pointer initialization */
  q31_t *pState = S->pState;                     /*  pState pointer initialization */
@ -104,11 +104,11 @@ void arm_biquad_cascade_df1_q31(
     *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
     */

-    sample = blockSize >> 2u;
+    sample = blockSize >> 2U;

    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
     ** a second loop below computes the remaining 1 to 3 samples. */
-    while (sample > 0u)
+    while (sample > 0U)
    {
      /* Read the input */
      Xn = *pIn++;
@ -246,9 +246,9 @@ void arm_biquad_cascade_df1_q31(

    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
     ** No loop unrolling is used. */
-    sample = (blockSize & 0x3u);
+    sample = (blockSize & 0x3U);

-    while (sample > 0u)
+    while (sample > 0U)
    {
      /* Read the input */
      Xn = *pIn++;
@ -327,7 +327,7 @@ void arm_biquad_cascade_df1_q31(

    sample = blockSize;

-    while (sample > 0u)
+    while (sample > 0U)
    {
      /* Read the input */
      Xn = *pIn++;
--- a/Source/FilteringFunctions/arm_biquad_cascade_df2T_f32.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df2T_f32.c
@ -171,19 +171,19 @@ uint32_t blockSize)
      b2 = pCoeffs[2];
      a1 = pCoeffs[3];
      /* Apply loop unrolling and compute 16 output values simultaneously. */
-      sample = blockSize >> 4u;
+      sample = blockSize >> 4U;
      a2 = pCoeffs[4];

      /*Reading the state values */
      d1 = pState[0];
      d2 = pState[1];

-      pCoeffs += 5u;
+      pCoeffs += 5U;


      /* First part of the processing with loop unrolling.  Compute 16 outputs at a time.
       ** a second loop below computes the remaining 1 to 15 samples. */
-      while (sample > 0u) {
+      while (sample > 0U) {

         /* y[n] = b0 * x[n] + d1 */
         /* d1 = b1 * x[n] + a1 * y[n] + d2 */
@ -366,7 +366,7 @@ uint32_t blockSize)
      }

      sample = blockSize & 0xFu;
-      while (sample > 0u) {
+      while (sample > 0U) {
         Xn1 = *pIn;
         acc1 = b0 * Xn1 + d1;

@ -392,12 +392,12 @@ uint32_t blockSize)
      /* decrement the loop counter */
      stage--;

-      pState += 2u;
+      pState += 2U;

      /*Reset the output working pointer */
      pOut = pDst;

-   } while (stage > 0u);
+   } while (stage > 0U);

 #elif defined(ARM_MATH_CM0_FAMILY)

@ -419,7 +419,7 @@ uint32_t blockSize)

      sample = blockSize;

-      while (sample > 0u)
+      while (sample > 0U)
      {
         /* Read the input */
         Xn1 = *pIn++;
@ -454,7 +454,7 @@ uint32_t blockSize)
      /* decrement the loop counter */
      stage--;

-   } while (stage > 0u);
+   } while (stage > 0U);

 #else

@ -480,11 +480,11 @@ uint32_t blockSize)
      d2 = pState[1];

      /* Apply loop unrolling and compute 4 output values simultaneously. */
-      sample = blockSize >> 2u;
+      sample = blockSize >> 2U;

      /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-      while (sample > 0u) {
+      while (sample > 0U) {

         /* y[n] = b0 * x[n] + d1 */
         /* d1 = b1 * x[n] + a1 * y[n] + d2 */
@ -546,8 +546,8 @@ uint32_t blockSize)
         sample--;
      }

-      sample = blockSize & 0x3u;
-      while (sample > 0u) {
+      sample = blockSize & 0x3U;
+      while (sample > 0U) {
         Xn1 = *pIn++;

         p0 = b0 * Xn1;
@ -578,7 +578,7 @@ uint32_t blockSize)
      /* decrement the loop counter */
      stage--;

-   } while (stage > 0u);
+   } while (stage > 0U);

 #endif

--- a/Source/FilteringFunctions/arm_biquad_cascade_df2T_f64.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df2T_f64.c
@ -171,19 +171,19 @@ uint32_t blockSize)
      b2 = pCoeffs[2];
      a1 = pCoeffs[3];
      /* Apply loop unrolling and compute 16 output values simultaneously. */
-      sample = blockSize >> 4u;
+      sample = blockSize >> 4U;
      a2 = pCoeffs[4];

      /*Reading the state values */
      d1 = pState[0];
      d2 = pState[1];

-      pCoeffs += 5u;
+      pCoeffs += 5U;


      /* First part of the processing with loop unrolling.  Compute 16 outputs at a time.
       ** a second loop below computes the remaining 1 to 15 samples. */
-      while (sample > 0u) {
+      while (sample > 0U) {

         /* y[n] = b0 * x[n] + d1 */
         /* d1 = b1 * x[n] + a1 * y[n] + d2 */
@ -366,7 +366,7 @@ uint32_t blockSize)
      }

      sample = blockSize & 0xFu;
-      while (sample > 0u) {
+      while (sample > 0U) {
         Xn1 = *pIn;
         acc1 = b0 * Xn1 + d1;

@ -392,12 +392,12 @@ uint32_t blockSize)
      /* decrement the loop counter */
      stage--;

-      pState += 2u;
+      pState += 2U;

      /*Reset the output working pointer */
      pOut = pDst;

-   } while (stage > 0u);
+   } while (stage > 0U);

 #elif defined(ARM_MATH_CM0_FAMILY)

@ -419,7 +419,7 @@ uint32_t blockSize)

      sample = blockSize;

-      while (sample > 0u)
+      while (sample > 0U)
      {
         /* Read the input */
         Xn1 = *pIn++;
@ -454,7 +454,7 @@ uint32_t blockSize)
      /* decrement the loop counter */
      stage--;

-   } while (stage > 0u);
+   } while (stage > 0U);

 #else

@ -480,11 +480,11 @@ uint32_t blockSize)
      d2 = pState[1];

      /* Apply loop unrolling and compute 4 output values simultaneously. */
-      sample = blockSize >> 2u;
+      sample = blockSize >> 2U;

      /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
-      while (sample > 0u) {
+      while (sample > 0U) {

         /* y[n] = b0 * x[n] + d1 */
         /* d1 = b1 * x[n] + a1 * y[n] + d2 */
@ -546,8 +546,8 @@ uint32_t blockSize)
         sample--;
      }

-      sample = blockSize & 0x3u;
-      while (sample > 0u) {
+      sample = blockSize & 0x3U;
+      while (sample > 0U) {
         Xn1 = *pIn++;

         p0 = b0 * Xn1;
@ -578,7 +578,7 @@ uint32_t blockSize)
      /* decrement the loop counter */
      stage--;

-   } while (stage > 0u);
+   } while (stage > 0U);

 #endif

--- a/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c
@ -78,7 +78,7 @@ void arm_biquad_cascade_df2T_init_f32(
  S->pCoeffs = pCoeffs;

  /* Clear state buffer and size is always 2 * numStages */
-  memset(pState, 0, (2u * (uint32_t) numStages) * sizeof(float32_t));
+  memset(pState, 0, (2U * (uint32_t) numStages) * sizeof(float32_t));

  /* Assign state pointer */
  S->pState = pState;
--- a/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f64.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df2T_init_f64.c
@ -78,7 +78,7 @@ void arm_biquad_cascade_df2T_init_f64(
  S->pCoeffs = pCoeffs;

  /* Clear state buffer and size is always 2 * numStages */
-  memset(pState, 0, (2u * (uint32_t) numStages) * sizeof(float64_t));
+  memset(pState, 0, (2U * (uint32_t) numStages) * sizeof(float64_t));

  /* Assign state pointer */
  S->pState = pState;
--- a/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f32.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f32.c
@ -171,7 +171,7 @@ uint32_t blockSize)
        b2 = pCoeffs[2];
        a1 = pCoeffs[3];
        /* Apply loop unrolling and compute 8 output values simultaneously. */
-        sample = blockSize >> 3u;
+        sample = blockSize >> 3U;
        a2 = pCoeffs[4];

        /*Reading the state values */
@ -180,11 +180,11 @@ uint32_t blockSize)
        d1b = pState[2];
        d2b = pState[3];

-        pCoeffs += 5u;
+        pCoeffs += 5U;

        /* First part of the processing with loop unrolling.  Compute 8 outputs at a time.
        ** a second loop below computes the remaining 1 to 7 samples. */
-        while (sample > 0u) {
+        while (sample > 0U) {

            /* y[n] = b0 * x[n] + d1 */
            /* d1 = b1 * x[n] + a1 * y[n] + d2 */
@ -367,8 +367,8 @@ uint32_t blockSize)
            d2b += a2 * acc8b;
        }

-        sample = blockSize & 0x7u;
-        while (sample > 0u) {
+        sample = blockSize & 0x7U;
+        while (sample > 0U) {
            /* Read the input */
            Xn1a = *pIn++; //Channel a
            Xn1b = *pIn++; //Channel b
@ -405,11 +405,11 @@ uint32_t blockSize)
        /* decrement the loop counter */
        stage--;

-        pState += 4u;
+        pState += 4U;
        /*Reset the output working pointer */
        pOut = pDst;

-    } while (stage > 0u);
+    } while (stage > 0U);

 #elif defined(ARM_MATH_CM0_FAMILY)

@ -433,7 +433,7 @@ uint32_t blockSize)

        sample = blockSize;

-        while (sample > 0u)
+        while (sample > 0U)
        {
            /* Read the input */
            Xn1a = *pIn++; //Channel a
@ -475,7 +475,7 @@ uint32_t blockSize)
        /* decrement the loop counter */
        stage--;

-    } while (stage > 0u);
+    } while (stage > 0U);

 #else

@ -503,11 +503,11 @@ uint32_t blockSize)
        d2b = pState[3];

        /* Apply loop unrolling and compute 4 output values simultaneously. */
-        sample = blockSize >> 2u;
+        sample = blockSize >> 2U;

        /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
        ** a second loop below computes the remaining 1 to 3 samples. */
-        while (sample > 0u) {
+        while (sample > 0U) {

            /* y[n] = b0 * x[n] + d1 */
            /* d1 = b1 * x[n] + a1 * y[n] + d2 */
@ -613,8 +613,8 @@ uint32_t blockSize)
            sample--;
        }

-        sample = blockSize & 0x3u;
-        while (sample > 0u) {
+        sample = blockSize & 0x3U;
+        while (sample > 0U) {
            Xn1a = *pIn++;
            Xn1b = *pIn++;

@ -658,7 +658,7 @@ uint32_t blockSize)
        /* decrement the loop counter */
        stage--;

-    } while (stage > 0u);
+    } while (stage > 0U);

 #endif

--- a/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_init_f32.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_init_f32.c
@ -78,7 +78,7 @@ void arm_biquad_cascade_stereo_df2T_init_f32(
  S->pCoeffs = pCoeffs;

  /* Clear state buffer and size is always 4 * numStages */
-  memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(float32_t));
+  memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(float32_t));

  /* Assign state pointer */
  S->pState = pState;
--- a/Source/FilteringFunctions/arm_conv_f32.c
+++ b/Source/FilteringFunctions/arm_conv_f32.c
@ -168,8 +168,8 @@ void arm_conv_f32(

  /* The algorithm is implemented in three stages.
     The loop counters of each stage is initiated here. */
-  blockSize1 = srcBLen - 1u;
-  blockSize2 = srcALen - (srcBLen - 1u);
+  blockSize1 = srcBLen - 1U;
+  blockSize2 = srcALen - (srcBLen - 1U);
  blockSize3 = blockSize1;

  /* --------------------------
@ -184,7 +184,7 @@ void arm_conv_f32(

  /* In this stage the MAC operations are increased by 1 for every iteration.
     The count variable holds the number of MAC operations performed */
-  count = 1u;
+  count = 1U;

  /* Working pointer of inputA */
  px = pIn1;
@ -198,17 +198,17 @@ void arm_conv_f32(
   * ----------------------*/

  /* The first stage starts here */
-  while (blockSize1 > 0u)
+  while (blockSize1 > 0U)
  {
    /* Accumulator is made zero for every iteration */
    sum = 0.0f;

    /* Apply loop unrolling and compute 4 MACs simultaneously. */
-    k = count >> 2u;
+    k = count >> 2U;

    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
     ** a second loop below computes MACs for the remaining 1 to 3 samples. */
-    while (k > 0u)
+    while (k > 0U)
    {
      /* x[0] * y[srcBLen - 1] */
      sum += *px++ * *py--;
@ -228,9 +228,9 @@ void arm_conv_f32(

    /* If the count is not a multiple of 4, compute any remaining MACs here.
     ** No loop unrolling is used. */
-    k = count % 0x4u;
+    k = count % 0x4U;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* Perform the multiply-accumulate */
      sum += *px++ * *py--;
@ -267,11 +267,11 @@ void arm_conv_f32(
  px = pIn1;

  /* Working pointer of inputB */
-  pSrc2 = pIn2 + (srcBLen - 1u);
+  pSrc2 = pIn2 + (srcBLen - 1U);
  py = pSrc2;

  /* count is index by which the pointer pIn1 to be incremented */
-  count = 0u;
+  count = 0U;

  /* -------------------
   * Stage2 process
@ -280,12 +280,12 @@ void arm_conv_f32(
  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
   * So, to loop unroll over blockSize2,
   * srcBLen should be greater than or equal to 4 */
-  if (srcBLen >= 4u)
+  if (srcBLen >= 4U)
  {
    /* Loop unroll over blockSize2, by 4 */
-    blkCnt = blockSize2 >> 2u;
+    blkCnt = blockSize2 >> 2U;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* Set all accumulators to zero */
      acc0 = 0.0f;
@ -299,7 +299,7 @@ void arm_conv_f32(
      x2 = *(px++);

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
-      k = srcBLen >> 2u;
+      k = srcBLen >> 2U;

      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
       ** a second loop below computes MACs for the remaining 1 to 3 samples. */
@ -328,7 +328,7 @@ void arm_conv_f32(
        c0 = *(py--);

        /* Read x[4] sample */
-        x0 = *(px + 1u);
+        x0 = *(px + 1U);

        /* Perform the multiply-accumulate */
        /* acc0 +=  x[1] * y[srcBLen - 2] */
@ -344,7 +344,7 @@ void arm_conv_f32(
        c0 = *(py--);

        /* Read x[5] sample */
-        x1 = *(px + 2u);
+        x1 = *(px + 2U);

        /* Perform the multiply-accumulates */
        /* acc0 +=  x[2] * y[srcBLen - 3] */
@ -360,8 +360,8 @@ void arm_conv_f32(
        c0 = *(py--);

        /* Read x[6] sample */
-        x2 = *(px + 3u);
-        px += 4u;
+        x2 = *(px + 3U);
+        px += 4U;

        /* Perform the multiply-accumulates */
        /* acc0 +=  x[3] * y[srcBLen - 4] */
@ -378,9 +378,9 @@ void arm_conv_f32(

      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
       ** No loop unrolling is used. */
-      k = srcBLen % 0x4u;
+      k = srcBLen % 0x4U;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Read y[srcBLen - 5] sample */
        c0 = *(py--);
@ -414,7 +414,7 @@ void arm_conv_f32(
      *pOut++ = acc3;

      /* Increment the pointer pIn1 index, count by 4 */
-      count += 4u;
+      count += 4U;

      /* Update the inputA and inputB pointers for next MAC calculation */
      px = pIn1 + count;
@ -428,19 +428,19 @@ void arm_conv_f32(

    /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.
     ** No loop unrolling is used. */
-    blkCnt = blockSize2 % 0x4u;
+    blkCnt = blockSize2 % 0x4U;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* Accumulator is made zero for every iteration */
      sum = 0.0f;

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
-      k = srcBLen >> 2u;
+      k = srcBLen >> 2U;

      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
       ** a second loop below computes MACs for the remaining 1 to 3 samples. */
-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulates */
        sum += *px++ * *py--;
@ -454,9 +454,9 @@ void arm_conv_f32(

      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
       ** No loop unrolling is used. */
-      k = srcBLen % 0x4u;
+      k = srcBLen % 0x4U;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulate */
        sum += *px++ * *py--;
@ -485,7 +485,7 @@ void arm_conv_f32(
     * the blockSize2 loop cannot be unrolled by 4 */
    blkCnt = blockSize2;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* Accumulator is made zero for every iteration */
      sum = 0.0f;
@ -493,7 +493,7 @@ void arm_conv_f32(
      /* srcBLen number of MACS should be performed */
      k = srcBLen;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulate */
        sum += *px++ * *py--;
@ -533,28 +533,28 @@ void arm_conv_f32(
     The blockSize3 variable holds the number of MAC operations performed */

  /* Working pointer of inputA */
-  pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
+  pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
  px = pSrc1;

  /* Working pointer of inputB */
-  pSrc2 = pIn2 + (srcBLen - 1u);
+  pSrc2 = pIn2 + (srcBLen - 1U);
  py = pSrc2;

  /* -------------------
   * Stage3 process
   * ------------------*/

-  while (blockSize3 > 0u)
+  while (blockSize3 > 0U)
  {
    /* Accumulator is made zero for every iteration */
    sum = 0.0f;

    /* Apply loop unrolling and compute 4 MACs simultaneously. */
-    k = blockSize3 >> 2u;
+    k = blockSize3 >> 2U;

    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
     ** a second loop below computes MACs for the remaining 1 to 3 samples. */
-    while (k > 0u)
+    while (k > 0U)
    {
      /* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */
      sum += *px++ * *py--;
@ -574,9 +574,9 @@ void arm_conv_f32(

    /* If the blockSize3 is not a multiple of 4, compute any remaining MACs here.
     ** No loop unrolling is used. */
-    k = blockSize3 % 0x4u;
+    k = blockSize3 % 0x4U;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* Perform the multiply-accumulates */
      /* sum +=  x[srcALen-1] * y[srcBLen-1] */
@ -607,13 +607,13 @@ void arm_conv_f32(
  uint32_t i, j;                                 /* loop counters */

  /* Loop to calculate convolution for output length number of times */
-  for (i = 0u; i < ((srcALen + srcBLen) - 1u); i++)
+  for (i = 0U; i < ((srcALen + srcBLen) - 1U); i++)
  {
    /* Initialize sum with zero to carry out MAC operations */
    sum = 0.0f;

    /* Loop to perform MAC operations according to convolution equation */
-    for (j = 0u; j <= i; j++)
+    for (j = 0U; j <= i; j++)
    {
      /* Check the array limitations */
      if ((((i - j) < srcBLen) && (j < srcALen)))
--- a/Source/FilteringFunctions/arm_conv_fast_opt_q15.c
+++ b/Source/FilteringFunctions/arm_conv_fast_opt_q15.c
@ -127,13 +127,13 @@ void arm_conv_fast_opt_q15(
  px = pIn2;

  /* Apply loop unrolling and do 4 Copies simultaneously. */
-  k = srcBLen >> 2u;
+  k = srcBLen >> 2U;

  /* First part of the processing with loop unrolling copies 4 data points at a time.
   ** a second loop below copies for the remaining 1 to 3 samples. */

  /* Copy smaller length input sequence in reverse order into second scratch buffer */
-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner */
    *pScr2-- = *px++;
@ -147,9 +147,9 @@ void arm_conv_fast_opt_q15(

  /* If the count is not a multiple of 4, copy remaining samples here.
   ** No loop unrolling is used. */
-  k = srcBLen % 0x4u;
+  k = srcBLen % 0x4U;

-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner for remaining samples */
    *pScr2-- = *px++;
@ -162,11 +162,11 @@ void arm_conv_fast_opt_q15(
  pScr1 = pScratch1;

  /* Assuming scratch1 buffer is aligned by 32-bit */
-  /* Fill (srcBLen - 1u) zeros in scratch1 buffer */
-  arm_fill_q15(0, pScr1, (srcBLen - 1u));
+  /* Fill (srcBLen - 1U) zeros in scratch1 buffer */
+  arm_fill_q15(0, pScr1, (srcBLen - 1U));

  /* Update temporary scratch pointer */
-  pScr1 += (srcBLen - 1u);
+  pScr1 += (srcBLen - 1U);

  /* Copy bigger length sequence(srcALen) samples in scratch1 buffer */

@ -181,11 +181,11 @@ void arm_conv_fast_opt_q15(
 #else

  /* Apply loop unrolling and do 4 Copies simultaneously. */
-  k = srcALen >> 2u;
+  k = srcALen >> 2U;

  /* First part of the processing with loop unrolling copies 4 data points at a time.
   ** a second loop below copies for the remaining 1 to 3 samples. */
-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner */
    *pScr1++ = *pIn1++;
@ -199,9 +199,9 @@ void arm_conv_fast_opt_q15(

  /* If the count is not a multiple of 4, copy remaining samples here.
   ** No loop unrolling is used. */
-  k = srcALen % 0x4u;
+  k = srcALen % 0x4U;

-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner for remaining samples */
    *pScr1++ = *pIn1++;
@ -215,20 +215,20 @@ void arm_conv_fast_opt_q15(

 #ifndef UNALIGNED_SUPPORT_DISABLE

-  /* Fill (srcBLen - 1u) zeros at end of scratch buffer */
-  arm_fill_q15(0, pScr1, (srcBLen - 1u));
+  /* Fill (srcBLen - 1U) zeros at end of scratch buffer */
+  arm_fill_q15(0, pScr1, (srcBLen - 1U));

  /* Update pointer */
-  pScr1 += (srcBLen - 1u);
+  pScr1 += (srcBLen - 1U);

 #else

  /* Apply loop unrolling and do 4 Copies simultaneously. */
-  k = (srcBLen - 1u) >> 2u;
+  k = (srcBLen - 1U) >> 2U;

  /* First part of the processing with loop unrolling copies 4 data points at a time.
   ** a second loop below copies for the remaining 1 to 3 samples. */
-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner */
    *pScr1++ = 0;
@ -242,9 +242,9 @@ void arm_conv_fast_opt_q15(

  /* If the count is not a multiple of 4, copy remaining samples here.
   ** No loop unrolling is used. */
-  k = (srcBLen - 1u) % 0x4u;
+  k = (srcBLen - 1U) % 0x4U;

-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner for remaining samples */
    *pScr1++ = 0;
@ -266,7 +266,7 @@ void arm_conv_fast_opt_q15(
   ** a second loop below process for the remaining 1 to 3 samples. */

  /* Actual convolution process starts here */
-  blkCnt = (srcALen + srcBLen - 1u) >> 2;
+  blkCnt = (srcALen + srcBLen - 1U) >> 2;

  while (blkCnt > 0)
  {
@ -285,16 +285,16 @@ void arm_conv_fast_opt_q15(
    /* Read next two samples from scratch1 buffer */
    x2 = *__SIMD32(pScr1)++;

-    tapCnt = (srcBLen) >> 2u;
+    tapCnt = (srcBLen) >> 2U;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {

 #ifndef UNALIGNED_SUPPORT_DISABLE

      /* Read four samples from smaller buffer */
      y1 = _SIMD32_OFFSET(pIn2);
-      y2 = _SIMD32_OFFSET(pIn2 + 2u);
+      y2 = _SIMD32_OFFSET(pIn2 + 2U);

      /* multiply and accumlate */
      acc0 = __SMLAD(x1, y1, acc0);
@ -327,7 +327,7 @@ void arm_conv_fast_opt_q15(
      acc3 = __SMLADX(x3, y1, acc3);
      acc1 = __SMLADX(x3, y2, acc1);

-      x2 = _SIMD32_OFFSET(pScr1 + 2u);
+      x2 = _SIMD32_OFFSET(pScr1 + 2U);

 #ifndef ARM_MATH_BIG_ENDIAN
      x3 = __PKHBT(x2, x1, 0);
@ -412,8 +412,8 @@ void arm_conv_fast_opt_q15(
 #endif	/*	#ifndef UNALIGNED_SUPPORT_DISABLE	*/

      /* update scratch pointers */
-      pIn2 += 4u;
-      pScr1 += 4u;
+      pIn2 += 4U;
+      pScr1 += 4U;


      /* Decrement the loop counter */
@ -421,12 +421,12 @@ void arm_conv_fast_opt_q15(
    }

    /* Update scratch pointer for remaining samples of smaller length sequence */
-    pScr1 -= 4u;
+    pScr1 -= 4U;

    /* apply same above for remaining samples of smaller length sequence */
-    tapCnt = (srcBLen) & 3u;
+    tapCnt = (srcBLen) & 3U;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {

      /* accumlate the results */
@ -435,7 +435,7 @@ void arm_conv_fast_opt_q15(
      acc2 += (*pScr1++ * *pIn2);
      acc3 += (*pScr1++ * *pIn2++);

-      pScr1 -= 3u;
+      pScr1 -= 3U;

      /* Decrement the loop counter */
      tapCnt--;
@ -470,12 +470,12 @@ void arm_conv_fast_opt_q15(
    /* Initialization of inputB pointer */
    pIn2 = py;

-    pScratch1 += 4u;
+    pScratch1 += 4U;

  }


-  blkCnt = (srcALen + srcBLen - 1u) & 0x3;
+  blkCnt = (srcALen + srcBLen - 1U) & 0x3;

  /* Calculate convolution for remaining samples of Bigger length sequence */
  while (blkCnt > 0)
@ -486,9 +486,9 @@ void arm_conv_fast_opt_q15(
    /* Clear Accumlators */
    acc0 = 0;

-    tapCnt = (srcBLen) >> 1u;
+    tapCnt = (srcBLen) >> 1U;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {

      acc0 += (*pScr1++ * *pIn2++);
@ -498,10 +498,10 @@ void arm_conv_fast_opt_q15(
      tapCnt--;
    }

-    tapCnt = (srcBLen) & 1u;
+    tapCnt = (srcBLen) & 1U;

    /* apply same above for remaining samples of smaller length sequence */
-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {

      /* accumlate the results */
@ -520,7 +520,7 @@ void arm_conv_fast_opt_q15(
    /* Initialization of inputB pointer */
    pIn2 = py;

-    pScratch1 += 1u;
+    pScratch1 += 1U;

  }

--- a/Source/FilteringFunctions/arm_conv_fast_q15.c
+++ b/Source/FilteringFunctions/arm_conv_fast_q15.c
@ -116,8 +116,8 @@ void arm_conv_fast_q15(

  /* The algorithm is implemented in three stages.
     The loop counters of each stage is initiated here. */
-  blockSize1 = srcBLen - 1u;
-  blockSize2 = srcALen - (srcBLen - 1u);
+  blockSize1 = srcBLen - 1U;
+  blockSize2 = srcALen - (srcBLen - 1U);
  blockSize3 = blockSize1;

  /* --------------------------
@ -132,7 +132,7 @@ void arm_conv_fast_q15(

  /* In this stage the MAC operations are increased by 1 for every iteration.
     The count variable holds the number of MAC operations performed */
-  count = 1u;
+  count = 1U;

  /* Working pointer of inputA */
  px = pIn1;
@ -150,7 +150,7 @@ void arm_conv_fast_q15(
  /* Second part of this stage computes the MAC operations greater than or equal to 4 */

  /* The first part of the stage starts here */
-  while ((count < 4u) && (blockSize1 > 0u))
+  while ((count < 4U) && (blockSize1 > 0U))
  {
    /* Accumulator is made zero for every iteration */
    sum = 0;
@ -159,7 +159,7 @@ void arm_conv_fast_q15(
     * inputA samples and inputB samples */
    k = count;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* Perform the multiply-accumulates */
      sum = __SMLAD(*px++, *py--, sum);
@ -188,17 +188,17 @@ void arm_conv_fast_q15(
   * y[srcBLen] and y[srcBLen-1] coefficients, py is decremented by 1 */
  py = py - 1;

-  while (blockSize1 > 0u)
+  while (blockSize1 > 0U)
  {
    /* Accumulator is made zero for every iteration */
    sum = 0;

    /* Apply loop unrolling and compute 4 MACs simultaneously. */
-    k = count >> 2u;
+    k = count >> 2U;

    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
     ** a second loop below computes MACs for the remaining 1 to 3 samples. */
-    while (k > 0u)
+    while (k > 0U)
    {
      /* Perform the multiply-accumulates */
      /* x[0], x[1] are multiplied with y[srcBLen - 1], y[srcBLen - 2] respectively */
@ -212,13 +212,13 @@ void arm_conv_fast_q15(

    /* For the next MAC operations, the pointer py is used without SIMD
     * So, py is incremented by 1 */
-    py = py + 1u;
+    py = py + 1U;

    /* If the count is not a multiple of 4, compute any remaining MACs here.
     ** No loop unrolling is used. */
-    k = count % 0x4u;
+    k = count % 0x4U;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* Perform the multiply-accumulates */
      sum = __SMLAD(*px++, *py--, sum);
@ -231,7 +231,7 @@ void arm_conv_fast_q15(
    *pOut++ = (q15_t) (sum >> 15);

    /* Update the inputA and inputB pointers for next MAC calculation */
-    py = pIn2 + (count - 1u);
+    py = pIn2 + (count - 1U);
    px = pIn1;

    /* Increment the MAC count */
@ -255,11 +255,11 @@ void arm_conv_fast_q15(
  px = pIn1;

  /* Working pointer of inputB */
-  pSrc2 = pIn2 + (srcBLen - 1u);
+  pSrc2 = pIn2 + (srcBLen - 1U);
  py = pSrc2;

  /* count is the index by which the pointer pIn1 to be incremented */
-  count = 0u;
+  count = 0U;


  /* --------------------
@ -269,14 +269,14 @@ void arm_conv_fast_q15(
  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
   * So, to loop unroll over blockSize2,
   * srcBLen should be greater than or equal to 4 */
-  if (srcBLen >= 4u)
+  if (srcBLen >= 4U)
  {
    /* Loop unroll over blockSize2, by 4 */
-    blkCnt = blockSize2 >> 2u;
+    blkCnt = blockSize2 >> 2U;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
-      py = py - 1u;
+      py = py - 1U;

      /* Set all accumulators to zero */
      acc0 = 0;
@ -289,11 +289,11 @@ void arm_conv_fast_q15(
      x0 = *__SIMD32(px);
      /* read x[1], x[2] samples */
      x1 = _SIMD32_OFFSET(px+1);
-	  px+= 2u;
+	  px+= 2U;


      /* Apply loop unrolling and compute 4 MACs simultaneously. */
-      k = srcBLen >> 2u;
+      k = srcBLen >> 2U;

      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
       ** a second loop below computes MACs for the remaining 1 to 3 samples. */
@ -335,7 +335,7 @@ void arm_conv_fast_q15(

        /* Read x[5], x[6] */
        x1 = _SIMD32_OFFSET(px+3);
-		px += 4u;
+		px += 4U;

        /* acc2 +=  x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */
        acc2 = __SMLADX(x0, c0, acc2);
@ -350,16 +350,16 @@ void arm_conv_fast_q15(

      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
       ** No loop unrolling is used. */
-      k = srcBLen % 0x4u;
+      k = srcBLen % 0x4U;

-      if (k == 1u)
+      if (k == 1U)
      {
        /* Read y[srcBLen - 5] */
        c0 = *(py+1);

 #ifdef  ARM_MATH_BIG_ENDIAN

-        c0 = c0 << 16u;
+        c0 = c0 << 16U;

 #else

@ -378,7 +378,7 @@ void arm_conv_fast_q15(
        acc3 = __SMLADX(x3, c0, acc3);
      }

-      if (k == 2u)
+      if (k == 2U)
      {
        /* Read y[srcBLen - 5], y[srcBLen - 6] */
        c0 = _SIMD32_OFFSET(py);
@ -388,7 +388,7 @@ void arm_conv_fast_q15(

        /* Read x[9] */
        x2 = _SIMD32_OFFSET(px+1);
-		px += 2u;
+		px += 2U;

        /* Perform the multiply-accumulates */
        acc0 = __SMLADX(x0, c0, acc0);
@ -397,7 +397,7 @@ void arm_conv_fast_q15(
        acc3 = __SMLADX(x2, c0, acc3);
      }

-      if (k == 3u)
+      if (k == 3U)
      {
        /* Read y[srcBLen - 5], y[srcBLen - 6] */
        c0 = _SIMD32_OFFSET(py);
@ -418,7 +418,7 @@ void arm_conv_fast_q15(
 		c0 = *(py-1);
 #ifdef  ARM_MATH_BIG_ENDIAN

-        c0 = c0 << 16u;
+        c0 = c0 << 16U;
 #else

        c0 = c0 & 0x0000FFFF;
@ -426,7 +426,7 @@ void arm_conv_fast_q15(

        /* Read x[10] */
        x3 =  _SIMD32_OFFSET(px+2);
-		px += 3u;
+		px += 3U;

        /* Perform the multiply-accumulates */
        acc0 = __SMLADX(x1, c0, acc0);
@ -449,7 +449,7 @@ void arm_conv_fast_q15(
 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */

      /* Increment the pointer pIn1 index, count by 4 */
-      count += 4u;
+      count += 4U;

      /* Update the inputA and inputB pointers for next MAC calculation */
      px = pIn1 + count;
@ -461,19 +461,19 @@ void arm_conv_fast_q15(

    /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.
     ** No loop unrolling is used. */
-    blkCnt = blockSize2 % 0x4u;
+    blkCnt = blockSize2 % 0x4U;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* Accumulator is made zero for every iteration */
      sum = 0;

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
-      k = srcBLen >> 2u;
+      k = srcBLen >> 2U;

      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
       ** a second loop below computes MACs for the remaining 1 to 3 samples. */
-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulates */
        sum += ((q31_t) * px++ * *py--);
@ -487,9 +487,9 @@ void arm_conv_fast_q15(

      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
       ** No loop unrolling is used. */
-      k = srcBLen % 0x4u;
+      k = srcBLen % 0x4U;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulates */
        sum += ((q31_t) * px++ * *py--);
@ -518,7 +518,7 @@ void arm_conv_fast_q15(
     * the blockSize2 loop cannot be unrolled by 4 */
    blkCnt = blockSize2;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* Accumulator is made zero for every iteration */
      sum = 0;
@ -526,7 +526,7 @@ void arm_conv_fast_q15(
      /* srcBLen number of MACS should be performed */
      k = srcBLen;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulate */
        sum += ((q31_t) * px++ * *py--);
@ -566,12 +566,12 @@ void arm_conv_fast_q15(
     The blockSize3 variable holds the number of MAC operations performed */

  /* Working pointer of inputA */
-  pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
+  pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
  px = pSrc1;

  /* Working pointer of inputB */
-  pSrc2 = pIn2 + (srcBLen - 1u);
-  pIn2 = pSrc2 - 1u;
+  pSrc2 = pIn2 + (srcBLen - 1U);
+  pIn2 = pSrc2 - 1U;
  py = pIn2;

  /* -------------------
@ -583,19 +583,19 @@ void arm_conv_fast_q15(
  /* Second part of this stage computes the MAC operations less than or equal to 4 */

  /* The first part of the stage starts here */
-  j = blockSize3 >> 2u;
+  j = blockSize3 >> 2U;

-  while ((j > 0u) && (blockSize3 > 0u))
+  while ((j > 0U) && (blockSize3 > 0U))
  {
    /* Accumulator is made zero for every iteration */
    sum = 0;

    /* Apply loop unrolling and compute 4 MACs simultaneously. */
-    k = blockSize3 >> 2u;
+    k = blockSize3 >> 2U;

    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
     ** a second loop below computes MACs for the remaining 1 to 3 samples. */
-    while (k > 0u)
+    while (k > 0U)
    {
      /* x[srcALen - srcBLen + 1], x[srcALen - srcBLen + 2] are multiplied
       * with y[srcBLen - 1], y[srcBLen - 2] respectively */
@ -610,13 +610,13 @@ void arm_conv_fast_q15(

    /* For the next MAC operations, the pointer py is used without SIMD
     * So, py is incremented by 1 */
-    py = py + 1u;
+    py = py + 1U;

    /* If the blockSize3 is not a multiple of 4, compute any remaining MACs here.
     ** No loop unrolling is used. */
-    k = blockSize3 % 0x4u;
+    k = blockSize3 % 0x4U;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* sum += x[srcALen - srcBLen + 5] * y[srcBLen - 5] */
      sum = __SMLAD(*px++, *py--, sum);
@ -641,9 +641,9 @@ void arm_conv_fast_q15(
  /* The second part of the stage starts here */
  /* SIMD is not used for the next MAC operations,
   * so pointer py is updated to read only one sample at a time */
-  py = py + 1u;
+  py = py + 1U;

-  while (blockSize3 > 0u)
+  while (blockSize3 > 0U)
  {
    /* Accumulator is made zero for every iteration */
    sum = 0;
@ -651,7 +651,7 @@ void arm_conv_fast_q15(
    /* Apply loop unrolling and compute 4 MACs simultaneously. */
    k = blockSize3;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* Perform the multiply-accumulates */
      /* sum +=  x[srcALen-1] * y[srcBLen-1] */
@ -720,8 +720,8 @@ void arm_conv_fast_q15(

  /* The algorithm is implemented in three stages.
     The loop counters of each stage is initiated here. */
-  blockSize1 = srcBLen - 1u;
-  blockSize2 = srcALen - (srcBLen - 1u);
+  blockSize1 = srcBLen - 1U;
+  blockSize2 = srcALen - (srcBLen - 1U);
  blockSize3 = blockSize1;

  /* --------------------------
@ -736,7 +736,7 @@ void arm_conv_fast_q15(

  /* In this stage the MAC operations are increased by 1 for every iteration.
     The count variable holds the number of MAC operations performed */
-  count = 1u;
+  count = 1U;

  /* Working pointer of inputA */
  px = pIn1;
@ -754,7 +754,7 @@ void arm_conv_fast_q15(
  /* Second part of this stage computes the MAC operations greater than or equal to 4 */

  /* The first part of the stage starts here */
-  while ((count < 4u) && (blockSize1 > 0u))
+  while ((count < 4U) && (blockSize1 > 0U))
  {
    /* Accumulator is made zero for every iteration */
    sum = 0;
@ -763,7 +763,7 @@ void arm_conv_fast_q15(
     * inputA samples and inputB samples */
    k = count;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* Perform the multiply-accumulates */
      sum += ((q31_t) * px++ * *py--);
@ -792,19 +792,19 @@ void arm_conv_fast_q15(
   * y[srcBLen] and y[srcBLen-1] coefficients, py is decremented by 1 */
  py = py - 1;

-  while (blockSize1 > 0u)
+  while (blockSize1 > 0U)
  {
    /* Accumulator is made zero for every iteration */
    sum = 0;

    /* Apply loop unrolling and compute 4 MACs simultaneously. */
-    k = count >> 2u;
+    k = count >> 2U;

    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
     ** a second loop below computes MACs for the remaining 1 to 3 samples. */
 	py++;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* Perform the multiply-accumulates */
        sum += ((q31_t) * px++ * *py--);
@ -818,9 +818,9 @@ void arm_conv_fast_q15(

    /* If the count is not a multiple of 4, compute any remaining MACs here.
     ** No loop unrolling is used. */
-    k = count % 0x4u;
+    k = count % 0x4U;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* Perform the multiply-accumulates */
      sum += ((q31_t) * px++ * *py--);
@ -833,7 +833,7 @@ void arm_conv_fast_q15(
    *pOut++ = (q15_t) (sum >> 15);

    /* Update the inputA and inputB pointers for next MAC calculation */
-    py = pIn2 + (count - 1u);
+    py = pIn2 + (count - 1U);
    px = pIn1;

    /* Increment the MAC count */
@ -857,11 +857,11 @@ void arm_conv_fast_q15(
  px = pIn1;

  /* Working pointer of inputB */
-  pSrc2 = pIn2 + (srcBLen - 1u);
+  pSrc2 = pIn2 + (srcBLen - 1U);
  py = pSrc2;

  /* count is the index by which the pointer pIn1 to be incremented */
-  count = 0u;
+  count = 0U;


  /* --------------------
@ -871,14 +871,14 @@ void arm_conv_fast_q15(
  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
   * So, to loop unroll over blockSize2,
   * srcBLen should be greater than or equal to 4 */
-  if (srcBLen >= 4u)
+  if (srcBLen >= 4U)
  {
    /* Loop unroll over blockSize2, by 4 */
-    blkCnt = blockSize2 >> 2u;
+    blkCnt = blockSize2 >> 2U;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
-      py = py - 1u;
+      py = py - 1U;

      /* Set all accumulators to zero */
      acc0 = 0;
@ -905,7 +905,7 @@ void arm_conv_fast_q15(
 #endif	/*	#ifndef ARM_MATH_BIG_ENDIAN	   */

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
-      k = srcBLen >> 2u;
+      k = srcBLen >> 2U;

      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
       ** a second loop below computes MACs for the remaining 1 to 3 samples. */
@ -995,7 +995,7 @@ void arm_conv_fast_q15(

 #endif	/*	#ifndef ARM_MATH_BIG_ENDIAN	   */

-		px += 4u;
+		px += 4U;

        /* acc2 +=  x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */
        acc2 = __SMLADX(x0, c0, acc2);
@ -1010,16 +1010,16 @@ void arm_conv_fast_q15(

      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
       ** No loop unrolling is used. */
-      k = srcBLen % 0x4u;
+      k = srcBLen % 0x4U;

-      if (k == 1u)
+      if (k == 1U)
      {
        /* Read y[srcBLen - 5] */
        c0 = *(py+1);

 #ifdef  ARM_MATH_BIG_ENDIAN

-        c0 = c0 << 16u;
+        c0 = c0 << 16U;

 #else

@ -1050,7 +1050,7 @@ void arm_conv_fast_q15(
        acc3 = __SMLADX(x3, c0, acc3);
      }

-      if (k == 2u)
+      if (k == 2U)
      {
        /* Read y[srcBLen - 5], y[srcBLen - 6] */
 		a = *py;
@ -1083,7 +1083,7 @@ void arm_conv_fast_q15(
 	  x2 = __PKHBT(a, b, 16);

 #endif	/*	#ifndef ARM_MATH_BIG_ENDIAN	   */
-		px += 2u;
+		px += 2U;

        /* Perform the multiply-accumulates */
        acc0 = __SMLADX(x0, c0, acc0);
@ -1092,7 +1092,7 @@ void arm_conv_fast_q15(
        acc3 = __SMLADX(x2, c0, acc3);
      }

-      if (k == 3u)
+      if (k == 3U)
      {
        /* Read y[srcBLen - 5], y[srcBLen - 6] */
 		a = *py;
@ -1136,7 +1136,7 @@ void arm_conv_fast_q15(
 		c0 = *(py-1);
 #ifdef  ARM_MATH_BIG_ENDIAN

-        c0 = c0 << 16u;
+        c0 = c0 << 16U;
 #else

        c0 = c0 & 0x0000FFFF;
@ -1156,7 +1156,7 @@ void arm_conv_fast_q15(

 #endif	/*	#ifndef ARM_MATH_BIG_ENDIAN	*/

-		px += 3u;
+		px += 3U;

        /* Perform the multiply-accumulates */
        acc0 = __SMLADX(x1, c0, acc0);
@ -1172,7 +1172,7 @@ void arm_conv_fast_q15(
 	  *pOut++ = (q15_t)(acc3 >> 15);

      /* Increment the pointer pIn1 index, count by 4 */
-      count += 4u;
+      count += 4U;

      /* Update the inputA and inputB pointers for next MAC calculation */
      px = pIn1 + count;
@ -1184,19 +1184,19 @@ void arm_conv_fast_q15(

    /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.
     ** No loop unrolling is used. */
-    blkCnt = blockSize2 % 0x4u;
+    blkCnt = blockSize2 % 0x4U;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* Accumulator is made zero for every iteration */
      sum = 0;

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
-      k = srcBLen >> 2u;
+      k = srcBLen >> 2U;

      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
       ** a second loop below computes MACs for the remaining 1 to 3 samples. */
-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulates */
        sum += ((q31_t) * px++ * *py--);
@ -1210,9 +1210,9 @@ void arm_conv_fast_q15(

      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
       ** No loop unrolling is used. */
-      k = srcBLen % 0x4u;
+      k = srcBLen % 0x4U;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulates */
        sum += ((q31_t) * px++ * *py--);
@ -1241,7 +1241,7 @@ void arm_conv_fast_q15(
     * the blockSize2 loop cannot be unrolled by 4 */
    blkCnt = blockSize2;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* Accumulator is made zero for every iteration */
      sum = 0;
@ -1249,7 +1249,7 @@ void arm_conv_fast_q15(
      /* srcBLen number of MACS should be performed */
      k = srcBLen;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulate */
        sum += ((q31_t) * px++ * *py--);
@ -1289,12 +1289,12 @@ void arm_conv_fast_q15(
     The blockSize3 variable holds the number of MAC operations performed */

  /* Working pointer of inputA */
-  pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
+  pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
  px = pSrc1;

  /* Working pointer of inputB */
-  pSrc2 = pIn2 + (srcBLen - 1u);
-  pIn2 = pSrc2 - 1u;
+  pSrc2 = pIn2 + (srcBLen - 1U);
+  pIn2 = pSrc2 - 1U;
  py = pIn2;

  /* -------------------
@ -1306,21 +1306,21 @@ void arm_conv_fast_q15(
  /* Second part of this stage computes the MAC operations less than or equal to 4 */

  /* The first part of the stage starts here */
-  j = blockSize3 >> 2u;
+  j = blockSize3 >> 2U;

-  while ((j > 0u) && (blockSize3 > 0u))
+  while ((j > 0U) && (blockSize3 > 0U))
  {
    /* Accumulator is made zero for every iteration */
    sum = 0;

    /* Apply loop unrolling and compute 4 MACs simultaneously. */
-    k = blockSize3 >> 2u;
+    k = blockSize3 >> 2U;

    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
     ** a second loop below computes MACs for the remaining 1 to 3 samples. */
 	py++;

-    while (k > 0u)
+    while (k > 0U)
    {
        sum += ((q31_t) * px++ * *py--);
        sum += ((q31_t) * px++ * *py--);
@ -1332,9 +1332,9 @@ void arm_conv_fast_q15(

    /* If the blockSize3 is not a multiple of 4, compute any remaining MACs here.
     ** No loop unrolling is used. */
-    k = blockSize3 % 0x4u;
+    k = blockSize3 % 0x4U;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* sum += x[srcALen - srcBLen + 5] * y[srcBLen - 5] */
        sum += ((q31_t) * px++ * *py--);
@ -1359,9 +1359,9 @@ void arm_conv_fast_q15(
  /* The second part of the stage starts here */
  /* SIMD is not used for the next MAC operations,
   * so pointer py is updated to read only one sample at a time */
-  py = py + 1u;
+  py = py + 1U;

-  while (blockSize3 > 0u)
+  while (blockSize3 > 0U)
  {
    /* Accumulator is made zero for every iteration */
    sum = 0;
@ -1369,7 +1369,7 @@ void arm_conv_fast_q15(
    /* Apply loop unrolling and compute 4 MACs simultaneously. */
    k = blockSize3;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* Perform the multiply-accumulates */
      /* sum +=  x[srcALen-1] * y[srcBLen-1] */
--- a/Source/FilteringFunctions/arm_conv_fast_q31.c
+++ b/Source/FilteringFunctions/arm_conv_fast_q31.c
@ -117,8 +117,8 @@ void arm_conv_fast_q31(

  /* The algorithm is implemented in three stages.
     The loop counters of each stage is initiated here. */
-  blockSize1 = srcBLen - 1u;
-  blockSize2 = srcALen - (srcBLen - 1u);
+  blockSize1 = srcBLen - 1U;
+  blockSize2 = srcALen - (srcBLen - 1U);
  blockSize3 = blockSize1;

  /* --------------------------
@ -133,7 +133,7 @@ void arm_conv_fast_q31(

  /* In this stage the MAC operations are increased by 1 for every iteration.
     The count variable holds the number of MAC operations performed */
-  count = 1u;
+  count = 1U;

  /* Working pointer of inputA */
  px = pIn1;
@ -147,17 +147,17 @@ void arm_conv_fast_q31(
   * ----------------------*/

  /* The first stage starts here */
-  while (blockSize1 > 0u)
+  while (blockSize1 > 0U)
  {
    /* Accumulator is made zero for every iteration */
    sum = 0;

    /* Apply loop unrolling and compute 4 MACs simultaneously. */
-    k = count >> 2u;
+    k = count >> 2U;

    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
     ** a second loop below computes MACs for the remaining 1 to 3 samples. */
-    while (k > 0u)
+    while (k > 0U)
    {
      /* x[0] * y[srcBLen - 1] */
      sum = (q31_t) ((((q63_t) sum << 32) +
@ -181,9 +181,9 @@ void arm_conv_fast_q31(

    /* If the count is not a multiple of 4, compute any remaining MACs here.
     ** No loop unrolling is used. */
-    k = count % 0x4u;
+    k = count % 0x4U;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* Perform the multiply-accumulate */
      sum = (q31_t) ((((q63_t) sum << 32) +
@ -221,11 +221,11 @@ void arm_conv_fast_q31(
  px = pIn1;

  /* Working pointer of inputB */
-  pSrc2 = pIn2 + (srcBLen - 1u);
+  pSrc2 = pIn2 + (srcBLen - 1U);
  py = pSrc2;

  /* count is index by which the pointer pIn1 to be incremented */
-  count = 0u;
+  count = 0U;

  /* -------------------
   * Stage2 process
@ -234,12 +234,12 @@ void arm_conv_fast_q31(
  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
   * So, to loop unroll over blockSize2,
   * srcBLen should be greater than or equal to 4 */
-  if (srcBLen >= 4u)
+  if (srcBLen >= 4U)
  {
    /* Loop unroll over blockSize2, by 4 */
-    blkCnt = blockSize2 >> 2u;
+    blkCnt = blockSize2 >> 2U;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* Set all accumulators to zero */
      acc0 = 0;
@ -253,7 +253,7 @@ void arm_conv_fast_q31(
      x2 = *(px++);

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
-      k = srcBLen >> 2u;
+      k = srcBLen >> 2U;

      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
       ** a second loop below computes MACs for the remaining 1 to 3 samples. */
@ -331,9 +331,9 @@ void arm_conv_fast_q31(

      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
       ** No loop unrolling is used. */
-      k = srcBLen % 0x4u;
+      k = srcBLen % 0x4U;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Read y[srcBLen - 5] sample */
        c0 = *(py--);
@ -367,7 +367,7 @@ void arm_conv_fast_q31(
      *pOut++ = (q31_t) (acc3 << 1);

      /* Increment the pointer pIn1 index, count by 4 */
-      count += 4u;
+      count += 4U;

      /* Update the inputA and inputB pointers for next MAC calculation */
      px = pIn1 + count;
@ -379,19 +379,19 @@ void arm_conv_fast_q31(

    /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.
     ** No loop unrolling is used. */
-    blkCnt = blockSize2 % 0x4u;
+    blkCnt = blockSize2 % 0x4U;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* Accumulator is made zero for every iteration */
      sum = 0;

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
-      k = srcBLen >> 2u;
+      k = srcBLen >> 2U;

      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
       ** a second loop below computes MACs for the remaining 1 to 3 samples. */
-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulates */
        sum = (q31_t) ((((q63_t) sum << 32) +
@ -409,9 +409,9 @@ void arm_conv_fast_q31(

      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
       ** No loop unrolling is used. */
-      k = srcBLen % 0x4u;
+      k = srcBLen % 0x4U;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulate */
        sum = (q31_t) ((((q63_t) sum << 32) +
@ -441,7 +441,7 @@ void arm_conv_fast_q31(
     * the blockSize2 loop cannot be unrolled by 4 */
    blkCnt = blockSize2;

-    while (blkCnt > 0u)
+    while (blkCnt > 0U)
    {
      /* Accumulator is made zero for every iteration */
      sum = 0;
@ -449,7 +449,7 @@ void arm_conv_fast_q31(
      /* srcBLen number of MACS should be performed */
      k = srcBLen;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulate */
        sum = (q31_t) ((((q63_t) sum << 32) +
@ -490,28 +490,28 @@ void arm_conv_fast_q31(
     The blockSize3 variable holds the number of MAC operations performed */

  /* Working pointer of inputA */
-  pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
+  pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
  px = pSrc1;

  /* Working pointer of inputB */
-  pSrc2 = pIn2 + (srcBLen - 1u);
+  pSrc2 = pIn2 + (srcBLen - 1U);
  py = pSrc2;

  /* -------------------
   * Stage3 process
   * ------------------*/

-  while (blockSize3 > 0u)
+  while (blockSize3 > 0U)
  {
    /* Accumulator is made zero for every iteration */
    sum = 0;

    /* Apply loop unrolling and compute 4 MACs simultaneously. */
-    k = blockSize3 >> 2u;
+    k = blockSize3 >> 2U;

    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
     ** a second loop below computes MACs for the remaining 1 to 3 samples. */
-    while (k > 0u)
+    while (k > 0U)
    {
      /* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */
      sum = (q31_t) ((((q63_t) sum << 32) +
@ -535,9 +535,9 @@ void arm_conv_fast_q31(

    /* If the blockSize3 is not a multiple of 4, compute any remaining MACs here.
     ** No loop unrolling is used. */
-    k = blockSize3 % 0x4u;
+    k = blockSize3 % 0x4U;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* Perform the multiply-accumulate */
      sum = (q31_t) ((((q63_t) sum << 32) +
--- a/Source/FilteringFunctions/arm_conv_opt_q15.c
+++ b/Source/FilteringFunctions/arm_conv_opt_q15.c
@ -130,12 +130,12 @@ void arm_conv_opt_q15(
  px = pIn2;

  /* Apply loop unrolling and do 4 Copies simultaneously. */
-  k = srcBLen >> 2u;
+  k = srcBLen >> 2U;

  /* First part of the processing with loop unrolling copies 4 data points at a time.
   ** a second loop below copies for the remaining 1 to 3 samples. */
  /* Copy smaller length input sequence in reverse order into second scratch buffer */
-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner */
    *pScr2-- = *px++;
@ -149,9 +149,9 @@ void arm_conv_opt_q15(

  /* If the count is not a multiple of 4, copy remaining samples here.
   ** No loop unrolling is used. */
-  k = srcBLen % 0x4u;
+  k = srcBLen % 0x4U;

-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner for remaining samples */
    *pScr2-- = *px++;
@ -164,11 +164,11 @@ void arm_conv_opt_q15(
  pScr1 = pScratch1;

  /* Assuming scratch1 buffer is aligned by 32-bit */
-  /* Fill (srcBLen - 1u) zeros in scratch buffer */
-  arm_fill_q15(0, pScr1, (srcBLen - 1u));
+  /* Fill (srcBLen - 1U) zeros in scratch buffer */
+  arm_fill_q15(0, pScr1, (srcBLen - 1U));

  /* Update temporary scratch pointer */
-  pScr1 += (srcBLen - 1u);
+  pScr1 += (srcBLen - 1U);

  /* Copy bigger length sequence(srcALen) samples in scratch1 buffer */

@ -183,11 +183,11 @@ void arm_conv_opt_q15(
 #else

  /* Apply loop unrolling and do 4 Copies simultaneously. */
-  k = srcALen >> 2u;
+  k = srcALen >> 2U;

  /* First part of the processing with loop unrolling copies 4 data points at a time.
   ** a second loop below copies for the remaining 1 to 3 samples. */
-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner */
    *pScr1++ = *pIn1++;
@ -201,9 +201,9 @@ void arm_conv_opt_q15(

  /* If the count is not a multiple of 4, copy remaining samples here.
   ** No loop unrolling is used. */
-  k = srcALen % 0x4u;
+  k = srcALen % 0x4U;

-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner for remaining samples */
    *pScr1++ = *pIn1++;
@ -217,20 +217,20 @@ void arm_conv_opt_q15(

 #ifndef UNALIGNED_SUPPORT_DISABLE

-  /* Fill (srcBLen - 1u) zeros at end of scratch buffer */
-  arm_fill_q15(0, pScr1, (srcBLen - 1u));
+  /* Fill (srcBLen - 1U) zeros at end of scratch buffer */
+  arm_fill_q15(0, pScr1, (srcBLen - 1U));

  /* Update pointer */
-  pScr1 += (srcBLen - 1u);
+  pScr1 += (srcBLen - 1U);

 #else

  /* Apply loop unrolling and do 4 Copies simultaneously. */
-  k = (srcBLen - 1u) >> 2u;
+  k = (srcBLen - 1U) >> 2U;

  /* First part of the processing with loop unrolling copies 4 data points at a time.
   ** a second loop below copies for the remaining 1 to 3 samples. */
-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner */
    *pScr1++ = 0;
@ -244,9 +244,9 @@ void arm_conv_opt_q15(

  /* If the count is not a multiple of 4, copy remaining samples here.
   ** No loop unrolling is used. */
-  k = (srcBLen - 1u) % 0x4u;
+  k = (srcBLen - 1U) % 0x4U;

-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner for remaining samples */
    *pScr1++ = 0;
@ -268,7 +268,7 @@ void arm_conv_opt_q15(
   ** a second loop below process for the remaining 1 to 3 samples. */

  /* Actual convolution process starts here */
-  blkCnt = (srcALen + srcBLen - 1u) >> 2;
+  blkCnt = (srcALen + srcBLen - 1U) >> 2;

  while (blkCnt > 0)
  {
@ -287,16 +287,16 @@ void arm_conv_opt_q15(
    /* Read next two samples from scratch1 buffer */
    x2 = *__SIMD32(pScr1)++;

-    tapCnt = (srcBLen) >> 2u;
+    tapCnt = (srcBLen) >> 2U;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {

 #ifndef UNALIGNED_SUPPORT_DISABLE

      /* Read four samples from smaller buffer */
      y1 = _SIMD32_OFFSET(pIn2);
-      y2 = _SIMD32_OFFSET(pIn2 + 2u);
+      y2 = _SIMD32_OFFSET(pIn2 + 2U);

      /* multiply and accumlate */
      acc0 = __SMLALD(x1, y1, acc0);
@ -329,7 +329,7 @@ void arm_conv_opt_q15(
      acc3 = __SMLALDX(x3, y1, acc3);
      acc1 = __SMLALDX(x3, y2, acc1);

-      x2 = _SIMD32_OFFSET(pScr1 + 2u);
+      x2 = _SIMD32_OFFSET(pScr1 + 2U);

 #ifndef ARM_MATH_BIG_ENDIAN
      x3 = __PKHBT(x2, x1, 0);
@ -413,8 +413,8 @@ void arm_conv_opt_q15(

 #endif	/*	#ifndef UNALIGNED_SUPPORT_DISABLE	*/

-      pIn2 += 4u;
-      pScr1 += 4u;
+      pIn2 += 4U;
+      pScr1 += 4U;


      /* Decrement the loop counter */
@ -422,12 +422,12 @@ void arm_conv_opt_q15(
    }

    /* Update scratch pointer for remaining samples of smaller length sequence */
-    pScr1 -= 4u;
+    pScr1 -= 4U;

    /* apply same above for remaining samples of smaller length sequence */
-    tapCnt = (srcBLen) & 3u;
+    tapCnt = (srcBLen) & 3U;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {

      /* accumlate the results */
@ -436,7 +436,7 @@ void arm_conv_opt_q15(
      acc2 += (*pScr1++ * *pIn2);
      acc3 += (*pScr1++ * *pIn2++);

-      pScr1 -= 3u;
+      pScr1 -= 3U;

      /* Decrement the loop counter */
      tapCnt--;
@ -469,12 +469,12 @@ void arm_conv_opt_q15(
    /* Initialization of inputB pointer */
    pIn2 = py;

-    pScratch1 += 4u;
+    pScratch1 += 4U;

  }


-  blkCnt = (srcALen + srcBLen - 1u) & 0x3;
+  blkCnt = (srcALen + srcBLen - 1U) & 0x3;

  /* Calculate convolution for remaining samples of Bigger length sequence */
  while (blkCnt > 0)
@ -485,9 +485,9 @@ void arm_conv_opt_q15(
    /* Clear Accumlators */
    acc0 = 0;

-    tapCnt = (srcBLen) >> 1u;
+    tapCnt = (srcBLen) >> 1U;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {

      /* Read next two samples from scratch1 buffer */
@ -498,10 +498,10 @@ void arm_conv_opt_q15(
      tapCnt--;
    }

-    tapCnt = (srcBLen) & 1u;
+    tapCnt = (srcBLen) & 1U;

    /* apply same above for remaining samples of smaller length sequence */
-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {

      /* accumlate the results */
@ -521,7 +521,7 @@ void arm_conv_opt_q15(
    /* Initialization of inputB pointer */
    pIn2 = py;

-    pScratch1 += 1u;
+    pScratch1 += 1U;

  }

--- a/Source/FilteringFunctions/arm_conv_opt_q7.c
+++ b/Source/FilteringFunctions/arm_conv_opt_q7.c
@ -117,11 +117,11 @@ void arm_conv_opt_q7(
  px = pIn2 + srcBLen - 1;

  /* Apply loop unrolling and do 4 Copies simultaneously. */
-  k = srcBLen >> 2u;
+  k = srcBLen >> 2U;

  /* First part of the processing with loop unrolling copies 4 data points at a time.
   ** a second loop below copies for the remaining 1 to 3 samples. */
-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner */
    x4 = (q15_t) * px--;
@ -139,9 +139,9 @@ void arm_conv_opt_q7(

  /* If the count is not a multiple of 4, copy remaining samples here.
   ** No loop unrolling is used. */
-  k = srcBLen % 0x4u;
+  k = srcBLen % 0x4U;

-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner for remaining samples */
    x4 = (q15_t) * px--;
@ -154,19 +154,19 @@ void arm_conv_opt_q7(
  /* Initialze temporary scratch pointer */
  pScr1 = pScratch1;

-  /* Fill (srcBLen - 1u) zeros in scratch buffer */
-  arm_fill_q15(0, pScr1, (srcBLen - 1u));
+  /* Fill (srcBLen - 1U) zeros in scratch buffer */
+  arm_fill_q15(0, pScr1, (srcBLen - 1U));

  /* Update temporary scratch pointer */
-  pScr1 += (srcBLen - 1u);
+  pScr1 += (srcBLen - 1U);

  /* Copy (srcALen) samples in scratch buffer */
  /* Apply loop unrolling and do 4 Copies simultaneously. */
-  k = srcALen >> 2u;
+  k = srcALen >> 2U;

  /* First part of the processing with loop unrolling copies 4 data points at a time.
   ** a second loop below copies for the remaining 1 to 3 samples. */
-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner */
    x4 = (q15_t) * pIn1++;
@ -184,9 +184,9 @@ void arm_conv_opt_q7(

  /* If the count is not a multiple of 4, copy remaining samples here.
   ** No loop unrolling is used. */
-  k = srcALen % 0x4u;
+  k = srcALen % 0x4U;

-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner for remaining samples */
    x4 = (q15_t) * pIn1++;
@ -198,20 +198,20 @@ void arm_conv_opt_q7(

 #ifndef UNALIGNED_SUPPORT_DISABLE

-  /* Fill (srcBLen - 1u) zeros at end of scratch buffer */
-  arm_fill_q15(0, pScr1, (srcBLen - 1u));
+  /* Fill (srcBLen - 1U) zeros at end of scratch buffer */
+  arm_fill_q15(0, pScr1, (srcBLen - 1U));

  /* Update pointer */
-  pScr1 += (srcBLen - 1u);
+  pScr1 += (srcBLen - 1U);

 #else

  /* Apply loop unrolling and do 4 Copies simultaneously. */
-  k = (srcBLen - 1u) >> 2u;
+  k = (srcBLen - 1U) >> 2U;

  /* First part of the processing with loop unrolling copies 4 data points at a time.
   ** a second loop below copies for the remaining 1 to 3 samples. */
-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner */
    *pScr1++ = 0;
@ -225,9 +225,9 @@ void arm_conv_opt_q7(

  /* If the count is not a multiple of 4, copy remaining samples here.
   ** No loop unrolling is used. */
-  k = (srcBLen - 1u) % 0x4u;
+  k = (srcBLen - 1U) % 0x4U;

-  while (k > 0u)
+  while (k > 0U)
  {
    /* copy second buffer in reversal manner for remaining samples */
    *pScr1++ = 0;
@ -247,7 +247,7 @@ void arm_conv_opt_q7(
  pScr2 = py;

  /* Actual convolution process starts here */
-  blkCnt = (srcALen + srcBLen - 1u) >> 2;
+  blkCnt = (srcALen + srcBLen - 1U) >> 2;

  while (blkCnt > 0)
  {
@ -266,9 +266,9 @@ void arm_conv_opt_q7(
    /* Read next two samples from scratch1 buffer */
    x2 = *__SIMD32(pScr1)++;

-    tapCnt = (srcBLen) >> 2u;
+    tapCnt = (srcBLen) >> 2U;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {

      /* Read four samples from smaller buffer */
@ -301,7 +301,7 @@ void arm_conv_opt_q7(
      acc3 = __SMLADX(x3, y1, acc3);

      /* Read four samples from smaller buffer */
-      y1 = _SIMD32_OFFSET(pScr2 + 2u);
+      y1 = _SIMD32_OFFSET(pScr2 + 2U);

      acc0 = __SMLAD(x2, y1, acc0);

@ -319,7 +319,7 @@ void arm_conv_opt_q7(

      acc3 = __SMLADX(x3, y1, acc3);

-      pScr2 += 4u;
+      pScr2 += 4U;


      /* Decrement the loop counter */
@ -329,13 +329,13 @@ void arm_conv_opt_q7(


    /* Update scratch pointer for remaining samples of smaller length sequence */
-    pScr1 -= 4u;
+    pScr1 -= 4U;


    /* apply same above for remaining samples of smaller length sequence */
-    tapCnt = (srcBLen) & 3u;
+    tapCnt = (srcBLen) & 3U;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {

      /* accumlate the results */
@ -344,7 +344,7 @@ void arm_conv_opt_q7(
      acc2 += (*pScr1++ * *pScr2);
      acc3 += (*pScr1++ * *pScr2++);

-      pScr1 -= 3u;
+      pScr1 -= 3U;

      /* Decrement the loop counter */
      tapCnt--;
@ -353,22 +353,22 @@ void arm_conv_opt_q7(
    blkCnt--;

    /* Store the result in the accumulator in the destination buffer. */
-    out0 = (q7_t) (__SSAT(acc0 >> 7u, 8));
-    out1 = (q7_t) (__SSAT(acc1 >> 7u, 8));
-    out2 = (q7_t) (__SSAT(acc2 >> 7u, 8));
-    out3 = (q7_t) (__SSAT(acc3 >> 7u, 8));
+    out0 = (q7_t) (__SSAT(acc0 >> 7U, 8));
+    out1 = (q7_t) (__SSAT(acc1 >> 7U, 8));
+    out2 = (q7_t) (__SSAT(acc2 >> 7U, 8));
+    out3 = (q7_t) (__SSAT(acc3 >> 7U, 8));

    *__SIMD32(pOut)++ = __PACKq7(out0, out1, out2, out3);

    /* Initialization of inputB pointer */
    pScr2 = py;

-    pScratch1 += 4u;
+    pScratch1 += 4U;

  }


-  blkCnt = (srcALen + srcBLen - 1u) & 0x3;
+  blkCnt = (srcALen + srcBLen - 1U) & 0x3;

  /* Calculate convolution for remaining samples of Bigger length sequence */
  while (blkCnt > 0)
@ -379,9 +379,9 @@ void arm_conv_opt_q7(
    /* Clear Accumlators */
    acc0 = 0;

-    tapCnt = (srcBLen) >> 1u;
+    tapCnt = (srcBLen) >> 1U;

-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {
      acc0 += (*pScr1++ * *pScr2++);
      acc0 += (*pScr1++ * *pScr2++);
@ -390,10 +390,10 @@ void arm_conv_opt_q7(
      tapCnt--;
    }

-    tapCnt = (srcBLen) & 1u;
+    tapCnt = (srcBLen) & 1U;

    /* apply same above for remaining samples of smaller length sequence */
-    while (tapCnt > 0u)
+    while (tapCnt > 0U)
    {

      /* accumlate the results */
@ -406,12 +406,12 @@ void arm_conv_opt_q7(
    blkCnt--;

    /* Store the result in the accumulator in the destination buffer. */
-    *pOut++ = (q7_t) (__SSAT(acc0 >> 7u, 8));
+    *pOut++ = (q7_t) (__SSAT(acc0 >> 7U, 8));

    /* Initialization of inputB pointer */
    pScr2 = py;

-    pScratch1 += 1u;
+    pScratch1 += 1U;

  }

--- a/Source/FilteringFunctions/arm_conv_partial_f32.c
+++ b/Source/FilteringFunctions/arm_conv_partial_f32.c
@ -103,13 +103,13 @@ arm_status arm_conv_partial_f32(
  float32_t *pSrc1, *pSrc2;                      /* Intermediate pointers */
  float32_t sum, acc0, acc1, acc2, acc3;         /* Accumulator */
  float32_t x0, x1, x2, x3, c0;                  /* Temporary variables to hold state and coefficient values */
-  uint32_t j, k, count = 0u, blkCnt, check;
+  uint32_t j, k, count = 0U, blkCnt, check;
  int32_t blockSize1, blockSize2, blockSize3;    /* loop counters */
  arm_status status;                             /* status of Partial convolution */


  /* Check for range of output samples to be calculated */
-  if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
+  if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U))))
  {
    /* Set status as ARM_MATH_ARGUMENT_ERROR */
    status = ARM_MATH_ARGUMENT_ERROR;
@ -148,7 +148,7 @@ arm_status arm_conv_partial_f32(
    blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
    blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
    blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
-    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1u)) ? blockSize1 :
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :
                                     (int32_t) numPoints) : 0;
    blockSize2 = ((int32_t) check - blockSize3) -
      (blockSize1 + (int32_t) firstIndex);
@ -181,7 +181,7 @@ arm_status arm_conv_partial_f32(
       The count variable holds the number of MAC operations performed.
       Since the partial convolution starts from from firstIndex
       Number of Macs to be performed is firstIndex + 1 */
-    count = 1u + firstIndex;
+    count = 1U + firstIndex;

    /* Working pointer of inputA */
    px = pIn1;
@ -201,11 +201,11 @@ arm_status arm_conv_partial_f32(
      sum = 0.0f;

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
-      k = count >> 2u;
+      k = count >> 2U;

      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
       ** a second loop below computes MACs for the remaining 1 to 3 samples. */
-      while (k > 0u)
+      while (k > 0U)
      {
        /* x[0] * y[srcBLen - 1] */
        sum += *px++ * *py--;
@ -225,9 +225,9 @@ arm_status arm_conv_partial_f32(

      /* If the count is not a multiple of 4, compute any remaining MACs here.
       ** No loop unrolling is used. */
-      k = count % 0x4u;
+      k = count % 0x4U;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulates */
        sum += *px++ * *py--;
@ -271,11 +271,11 @@ arm_status arm_conv_partial_f32(
    }

    /* Working pointer of inputB */
-    pSrc2 = pIn2 + (srcBLen - 1u);
+    pSrc2 = pIn2 + (srcBLen - 1U);
    py = pSrc2;

    /* count is index by which the pointer pIn1 to be incremented */
-    count = 0u;
+    count = 0U;

    /* -------------------
     * Stage2 process
@ -284,12 +284,12 @@ arm_status arm_conv_partial_f32(
    /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
     * So, to loop unroll over blockSize2,
     * srcBLen should be greater than or equal to 4 */
-    if (srcBLen >= 4u)
+    if (srcBLen >= 4U)
    {
      /* Loop unroll over blockSize2, by 4 */
-      blkCnt = ((uint32_t) blockSize2 >> 2u);
+      blkCnt = ((uint32_t) blockSize2 >> 2U);

-      while (blkCnt > 0u)
+      while (blkCnt > 0U)
      {
        /* Set all accumulators to zero */
        acc0 = 0.0f;
@ -303,7 +303,7 @@ arm_status arm_conv_partial_f32(
        x2 = *(px++);

        /* Apply loop unrolling and compute 4 MACs simultaneously. */
-        k = srcBLen >> 2u;
+        k = srcBLen >> 2U;

        /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
         ** a second loop below computes MACs for the remaining 1 to 3 samples. */
@ -381,9 +381,9 @@ arm_status arm_conv_partial_f32(

        /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
         ** No loop unrolling is used. */
-        k = srcBLen % 0x4u;
+        k = srcBLen % 0x4U;

-        while (k > 0u)
+        while (k > 0U)
        {
          /* Read y[srcBLen - 5] sample */
          c0 = *(py--);
@ -417,7 +417,7 @@ arm_status arm_conv_partial_f32(
        *pOut++ = acc3;

        /* Increment the pointer pIn1 index, count by 1 */
-        count += 4u;
+        count += 4U;

        /* Update the inputA and inputB pointers for next MAC calculation */
        px = pIn1 + count;
@ -429,19 +429,19 @@ arm_status arm_conv_partial_f32(

      /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.
       ** No loop unrolling is used. */
-      blkCnt = (uint32_t) blockSize2 % 0x4u;
+      blkCnt = (uint32_t) blockSize2 % 0x4U;

-      while (blkCnt > 0u)
+      while (blkCnt > 0U)
      {
        /* Accumulator is made zero for every iteration */
        sum = 0.0f;

        /* Apply loop unrolling and compute 4 MACs simultaneously. */
-        k = srcBLen >> 2u;
+        k = srcBLen >> 2U;

        /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
         ** a second loop below computes MACs for the remaining 1 to 3 samples. */
-        while (k > 0u)
+        while (k > 0U)
        {
          /* Perform the multiply-accumulates */
          sum += *px++ * *py--;
@ -455,9 +455,9 @@ arm_status arm_conv_partial_f32(

        /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
         ** No loop unrolling is used. */
-        k = srcBLen % 0x4u;
+        k = srcBLen % 0x4U;

-        while (k > 0u)
+        while (k > 0U)
        {
          /* Perform the multiply-accumulate */
          sum += *px++ * *py--;
@ -486,7 +486,7 @@ arm_status arm_conv_partial_f32(
       * the blockSize2 loop cannot be unrolled by 4 */
      blkCnt = (uint32_t) blockSize2;

-      while (blkCnt > 0u)
+      while (blkCnt > 0U)
      {
        /* Accumulator is made zero for every iteration */
        sum = 0.0f;
@ -494,7 +494,7 @@ arm_status arm_conv_partial_f32(
        /* srcBLen number of MACS should be performed */
        k = srcBLen;

-        while (k > 0u)
+        while (k > 0U)
        {
          /* Perform the multiply-accumulate */
          sum += *px++ * *py--;
@ -532,14 +532,14 @@ arm_status arm_conv_partial_f32(

    /* In this stage the MAC operations are decreased by 1 for every iteration.
       The count variable holds the number of MAC operations performed */
-    count = srcBLen - 1u;
+    count = srcBLen - 1U;

    /* Working pointer of inputA */
-    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
+    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
    px = pSrc1;

    /* Working pointer of inputB */
-    pSrc2 = pIn2 + (srcBLen - 1u);
+    pSrc2 = pIn2 + (srcBLen - 1U);
    py = pSrc2;

    while (blockSize3 > 0)
@ -548,11 +548,11 @@ arm_status arm_conv_partial_f32(
      sum = 0.0f;

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
-      k = count >> 2u;
+      k = count >> 2U;

      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
       ** a second loop below computes MACs for the remaining 1 to 3 samples. */
-      while (k > 0u)
+      while (k > 0U)
      {
        /* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */
        sum += *px++ * *py--;
@ -572,9 +572,9 @@ arm_status arm_conv_partial_f32(

      /* If the count is not a multiple of 4, compute any remaining MACs here.
       ** No loop unrolling is used. */
-      k = count % 0x4u;
+      k = count % 0x4U;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulates */
        /* sum +=  x[srcALen-1] * y[srcBLen-1] */
@ -617,7 +617,7 @@ arm_status arm_conv_partial_f32(
  arm_status status;                             /* status of Partial convolution */

  /* Check for range of output samples to be calculated */
-  if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
+  if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U))))
  {
    /* Set status as ARM_ARGUMENT_ERROR */
    status = ARM_MATH_ARGUMENT_ERROR;
@ -631,7 +631,7 @@ arm_status arm_conv_partial_f32(
      sum = 0.0f;

      /* Loop to perform MAC operations according to convolution equation */
-      for (j = 0u; j <= i; j++)
+      for (j = 0U; j <= i; j++)
      {
        /* Check the array limitations for inputs */
        if ((((i - j) < srcBLen) && (j < srcALen)))
--- a/Source/FilteringFunctions/arm_conv_partial_fast_opt_q15.c
+++ b/Source/FilteringFunctions/arm_conv_partial_fast_opt_q15.c
@ -88,7 +88,7 @@ arm_status arm_conv_partial_fast_opt_q15(
  uint32_t tapCnt;                               /* loop count */

  /* Check for range of output samples to be calculated */
-  if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
+  if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U))))
  {
    /* Set status as ARM_MATH_ARGUMENT_ERROR */
    status = ARM_MATH_ARGUMENT_ERROR;
@ -131,13 +131,13 @@ arm_status arm_conv_partial_fast_opt_q15(
    px = pIn2;

    /* Apply loop unrolling and do 4 Copies simultaneously. */
-    k = srcBLen >> 2u;
+    k = srcBLen >> 2U;

    /* First part of the processing with loop unrolling copies 4 data points at a time.
     ** a second loop below copies for the remaining 1 to 3 samples. */

    /* Copy smaller length input sequence in reverse order into second scratch buffer */
-    while (k > 0u)
+    while (k > 0U)
    {
      /* copy second buffer in reversal manner */
      *pScr2-- = *px++;
@ -151,9 +151,9 @@ arm_status arm_conv_partial_fast_opt_q15(

    /* If the count is not a multiple of 4, copy remaining samples here.
     ** No loop unrolling is used. */
-    k = srcBLen % 0x4u;
+    k = srcBLen % 0x4U;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* copy second buffer in reversal manner for remaining samples */
      *pScr2-- = *px++;
@ -166,11 +166,11 @@ arm_status arm_conv_partial_fast_opt_q15(
    pScr1 = pScratch1;

    /* Assuming scratch1 buffer is aligned by 32-bit */
-    /* Fill (srcBLen - 1u) zeros in scratch buffer */
-    arm_fill_q15(0, pScr1, (srcBLen - 1u));
+    /* Fill (srcBLen - 1U) zeros in scratch buffer */
+    arm_fill_q15(0, pScr1, (srcBLen - 1U));

    /* Update temporary scratch pointer */
-    pScr1 += (srcBLen - 1u);
+    pScr1 += (srcBLen - 1U);

    /* Copy bigger length sequence(srcALen) samples in scratch1 buffer */

@ -180,11 +180,11 @@ arm_status arm_conv_partial_fast_opt_q15(
    /* Update pointers */
    pScr1 += srcALen;

-    /* Fill (srcBLen - 1u) zeros at end of scratch buffer */
-    arm_fill_q15(0, pScr1, (srcBLen - 1u));
+    /* Fill (srcBLen - 1U) zeros at end of scratch buffer */
+    arm_fill_q15(0, pScr1, (srcBLen - 1U));

    /* Update pointer */
-    pScr1 += (srcBLen - 1u);
+    pScr1 += (srcBLen - 1U);

    /* Initialization of pIn2 pointer */
    pIn2 = py;
@ -216,14 +216,14 @@ arm_status arm_conv_partial_fast_opt_q15(
      /* Read next two samples from scratch1 buffer */
      x2 = *__SIMD32(pScr1)++;

-      tapCnt = (srcBLen) >> 2u;
+      tapCnt = (srcBLen) >> 2U;

-      while (tapCnt > 0u)
+      while (tapCnt > 0U)
      {

        /* Read four samples from smaller buffer */
        y1 = _SIMD32_OFFSET(pIn2);
-        y2 = _SIMD32_OFFSET(pIn2 + 2u);
+        y2 = _SIMD32_OFFSET(pIn2 + 2U);

        /* multiply and accumlate */
        acc0 = __SMLAD(x1, y1, acc0);
@ -257,7 +257,7 @@ arm_status arm_conv_partial_fast_opt_q15(
        acc3 = __SMLADX(x3, y1, acc3);
        acc1 = __SMLADX(x3, y2, acc1);

-        x2 = _SIMD32_OFFSET(pScr1 + 2u);
+        x2 = _SIMD32_OFFSET(pScr1 + 2U);

 #ifndef ARM_MATH_BIG_ENDIAN
        x3 = __PKHBT(x2, x1, 0);
@ -268,8 +268,8 @@ arm_status arm_conv_partial_fast_opt_q15(
        acc3 = __SMLADX(x3, y2, acc3);

        /* update scratch pointers */
-        pIn2 += 4u;
-        pScr1 += 4u;
+        pIn2 += 4U;
+        pScr1 += 4U;


        /* Decrement the loop counter */
@ -277,12 +277,12 @@ arm_status arm_conv_partial_fast_opt_q15(
      }

      /* Update scratch pointer for remaining samples of smaller length sequence */
-      pScr1 -= 4u;
+      pScr1 -= 4U;

      /* apply same above for remaining samples of smaller length sequence */
-      tapCnt = (srcBLen) & 3u;
+      tapCnt = (srcBLen) & 3U;

-      while (tapCnt > 0u)
+      while (tapCnt > 0U)
      {

        /* accumlate the results */
@ -291,7 +291,7 @@ arm_status arm_conv_partial_fast_opt_q15(
        acc2 += (*pScr1++ * *pIn2);
        acc3 += (*pScr1++ * *pIn2++);

-        pScr1 -= 3u;
+        pScr1 -= 3U;

        /* Decrement the loop counter */
        tapCnt--;
@ -321,7 +321,7 @@ arm_status arm_conv_partial_fast_opt_q15(
      /* Initialization of inputB pointer */
      pIn2 = py;

-      pScratch1 += 4u;
+      pScratch1 += 4U;

    }

@ -337,9 +337,9 @@ arm_status arm_conv_partial_fast_opt_q15(
      /* Clear Accumlators */
      acc0 = 0;

-      tapCnt = (srcBLen) >> 1u;
+      tapCnt = (srcBLen) >> 1U;

-      while (tapCnt > 0u)
+      while (tapCnt > 0U)
      {

        /* Read next two samples from scratch1 buffer */
@ -354,10 +354,10 @@ arm_status arm_conv_partial_fast_opt_q15(
        tapCnt--;
      }

-      tapCnt = (srcBLen) & 1u;
+      tapCnt = (srcBLen) & 1U;

      /* apply same above for remaining samples of smaller length sequence */
-      while (tapCnt > 0u)
+      while (tapCnt > 0U)
      {

        /* accumlate the results */
@ -376,7 +376,7 @@ arm_status arm_conv_partial_fast_opt_q15(
      /* Initialization of inputB pointer */
      pIn2 = py;

-      pScratch1 += 1u;
+      pScratch1 += 1U;

    }
    /* set status as ARM_MATH_SUCCESS */
@ -416,7 +416,7 @@ arm_status arm_conv_partial_fast_opt_q15(


  /* Check for range of output samples to be calculated */
-  if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
+  if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U))))
  {
    /* Set status as ARM_MATH_ARGUMENT_ERROR */
    status = ARM_MATH_ARGUMENT_ERROR;
@ -459,11 +459,11 @@ arm_status arm_conv_partial_fast_opt_q15(
    px = pIn2;

    /* Apply loop unrolling and do 4 Copies simultaneously. */
-    k = srcBLen >> 2u;
+    k = srcBLen >> 2U;

    /* First part of the processing with loop unrolling copies 4 data points at a time.
     ** a second loop below copies for the remaining 1 to 3 samples. */
-    while (k > 0u)
+    while (k > 0U)
    {
      /* copy second buffer in reversal manner */
      *pScr2-- = *px++;
@ -477,9 +477,9 @@ arm_status arm_conv_partial_fast_opt_q15(

    /* If the count is not a multiple of 4, copy remaining samples here.
     ** No loop unrolling is used. */
-    k = srcBLen % 0x4u;
+    k = srcBLen % 0x4U;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* copy second buffer in reversal manner for remaining samples */
      *pScr2-- = *px++;
@ -491,21 +491,21 @@ arm_status arm_conv_partial_fast_opt_q15(
    /* Initialze temporary scratch pointer */
    pScr1 = pScratch1;

-    /* Fill (srcBLen - 1u) zeros in scratch buffer */
-    arm_fill_q15(0, pScr1, (srcBLen - 1u));
+    /* Fill (srcBLen - 1U) zeros in scratch buffer */
+    arm_fill_q15(0, pScr1, (srcBLen - 1U));

    /* Update temporary scratch pointer */
-    pScr1 += (srcBLen - 1u);
+    pScr1 += (srcBLen - 1U);

    /* Copy bigger length sequence(srcALen) samples in scratch1 buffer */


    /* Apply loop unrolling and do 4 Copies simultaneously. */
-    k = srcALen >> 2u;
+    k = srcALen >> 2U;

    /* First part of the processing with loop unrolling copies 4 data points at a time.
     ** a second loop below copies for the remaining 1 to 3 samples. */
-    while (k > 0u)
+    while (k > 0U)
    {
      /* copy second buffer in reversal manner */
      *pScr1++ = *pIn1++;
@ -519,9 +519,9 @@ arm_status arm_conv_partial_fast_opt_q15(

    /* If the count is not a multiple of 4, copy remaining samples here.
     ** No loop unrolling is used. */
-    k = srcALen % 0x4u;
+    k = srcALen % 0x4U;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* copy second buffer in reversal manner for remaining samples */
      *pScr1++ = *pIn1++;
@ -532,11 +532,11 @@ arm_status arm_conv_partial_fast_opt_q15(


    /* Apply loop unrolling and do 4 Copies simultaneously. */
-    k = (srcBLen - 1u) >> 2u;
+    k = (srcBLen - 1U) >> 2U;

    /* First part of the processing with loop unrolling copies 4 data points at a time.
     ** a second loop below copies for the remaining 1 to 3 samples. */
-    while (k > 0u)
+    while (k > 0U)
    {
      /* copy second buffer in reversal manner */
      *pScr1++ = 0;
@ -550,9 +550,9 @@ arm_status arm_conv_partial_fast_opt_q15(

    /* If the count is not a multiple of 4, copy remaining samples here.
     ** No loop unrolling is used. */
-    k = (srcBLen - 1u) % 0x4u;
+    k = (srcBLen - 1U) % 0x4U;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* copy second buffer in reversal manner for remaining samples */
      *pScr1++ = 0;
@ -591,14 +591,14 @@ arm_status arm_conv_partial_fast_opt_q15(
      x20 = *pScr1++;
      x21 = *pScr1++;

-      tapCnt = (srcBLen) >> 2u;
+      tapCnt = (srcBLen) >> 2U;

-      while (tapCnt > 0u)
+      while (tapCnt > 0U)
      {

        /* Read two samples from smaller buffer */
        y10 = *pIn2;
-        y11 = *(pIn2 + 1u);
+        y11 = *(pIn2 + 1U);

        /* multiply and accumlate */
        acc0 += (q31_t) x10 *y10;
@ -612,15 +612,15 @@ arm_status arm_conv_partial_fast_opt_q15(

        /* Read next two samples from scratch1 buffer */
        x10 = *pScr1;
-        x11 = *(pScr1 + 1u);
+        x11 = *(pScr1 + 1U);

        /* multiply and accumlate */
        acc3 += (q31_t) x21 *y10;
        acc3 += (q31_t) x10 *y11;

        /* Read next two samples from scratch2 buffer */
-        y10 = *(pIn2 + 2u);
-        y11 = *(pIn2 + 3u);
+        y10 = *(pIn2 + 2U);
+        y11 = *(pIn2 + 3U);

        /* multiply and accumlate */
        acc0 += (q31_t) x20 *y10;
@ -639,20 +639,20 @@ arm_status arm_conv_partial_fast_opt_q15(
        acc3 += (q31_t) x20 *y11;

        /* update scratch pointers */
-        pIn2 += 4u;
-        pScr1 += 4u;
+        pIn2 += 4U;
+        pScr1 += 4U;

        /* Decrement the loop counter */
        tapCnt--;
      }

      /* Update scratch pointer for remaining samples of smaller length sequence */
-      pScr1 -= 4u;
+      pScr1 -= 4U;

      /* apply same above for remaining samples of smaller length sequence */
-      tapCnt = (srcBLen) & 3u;
+      tapCnt = (srcBLen) & 3U;

-      while (tapCnt > 0u)
+      while (tapCnt > 0U)
      {
        /* accumlate the results */
        acc0 += (*pScr1++ * *pIn2);
@ -660,7 +660,7 @@ arm_status arm_conv_partial_fast_opt_q15(
        acc2 += (*pScr1++ * *pIn2);
        acc3 += (*pScr1++ * *pIn2++);

-        pScr1 -= 3u;
+        pScr1 -= 3U;

        /* Decrement the loop counter */
        tapCnt--;
@ -678,7 +678,7 @@ arm_status arm_conv_partial_fast_opt_q15(
      /* Initialization of inputB pointer */
      pIn2 = py;

-      pScratch1 += 4u;
+      pScratch1 += 4U;

    }

@ -694,9 +694,9 @@ arm_status arm_conv_partial_fast_opt_q15(
      /* Clear Accumlators */
      acc0 = 0;

-      tapCnt = (srcBLen) >> 1u;
+      tapCnt = (srcBLen) >> 1U;

-      while (tapCnt > 0u)
+      while (tapCnt > 0U)
      {

        /* Read next two samples from scratch1 buffer */
@ -715,10 +715,10 @@ arm_status arm_conv_partial_fast_opt_q15(
        tapCnt--;
      }

-      tapCnt = (srcBLen) & 1u;
+      tapCnt = (srcBLen) & 1U;

      /* apply same above for remaining samples of smaller length sequence */
-      while (tapCnt > 0u)
+      while (tapCnt > 0U)
      {

        /* accumlate the results */
@ -736,7 +736,7 @@ arm_status arm_conv_partial_fast_opt_q15(
      /* Initialization of inputB pointer */
      pIn2 = py;

-      pScratch1 += 1u;
+      pScratch1 += 1U;

    }

--- a/Source/FilteringFunctions/arm_conv_partial_fast_q15.c
+++ b/Source/FilteringFunctions/arm_conv_partial_fast_q15.c
@ -76,7 +76,7 @@ arm_status arm_conv_partial_fast_q15(
  arm_status status;                             /* status of Partial convolution */

  /* Check for range of output samples to be calculated */
-  if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
+  if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U))))
  {
    /* Set status as ARM_MATH_ARGUMENT_ERROR */
    status = ARM_MATH_ARGUMENT_ERROR;
@ -115,7 +115,7 @@ arm_status arm_conv_partial_fast_q15(
    blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
    blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
    blockSize1 = (((int32_t) srcBLen - 1) - (int32_t) firstIndex);
-    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1u)) ? blockSize1 :
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :
                                     (int32_t) numPoints) : 0;
    blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) +
                                    (int32_t) firstIndex);
@ -148,7 +148,7 @@ arm_status arm_conv_partial_fast_q15(
       The count variable holds the number of MAC operations performed.
       Since the partial convolution starts from firstIndex
       Number of Macs to be performed is firstIndex + 1 */
-    count = 1u + firstIndex;
+    count = 1U + firstIndex;

    /* Working pointer of inputA */
    px = pIn1;
@ -166,7 +166,7 @@ arm_status arm_conv_partial_fast_q15(
    /* Second part of this stage computes the MAC operations greater than or equal to 4 */

    /* The first part of the stage starts here */
-    while ((count < 4u) && (blockSize1 > 0))
+    while ((count < 4U) && (blockSize1 > 0))
    {
      /* Accumulator is made zero for every iteration */
      sum = 0;
@ -175,7 +175,7 @@ arm_status arm_conv_partial_fast_q15(
       * inputA samples and inputB samples */
      k = count;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulates */
        sum = __SMLAD(*px++, *py--, sum);
@ -210,11 +210,11 @@ arm_status arm_conv_partial_fast_q15(
      sum = 0;

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
-      k = count >> 2u;
+      k = count >> 2U;

      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
       ** a second loop below computes MACs for the remaining 1 to 3 samples. */
-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulates */
        /* x[0], x[1] are multiplied with y[srcBLen - 1], y[srcBLen - 2] respectively */
@ -228,13 +228,13 @@ arm_status arm_conv_partial_fast_q15(

      /* For the next MAC operations, the pointer py is used without SIMD
       * So, py is incremented by 1 */
-      py = py + 1u;
+      py = py + 1U;

      /* If the count is not a multiple of 4, compute any remaining MACs here.
       ** No loop unrolling is used. */
-      k = count % 0x4u;
+      k = count % 0x4U;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulates */
        sum = __SMLAD(*px++, *py--, sum);
@ -247,7 +247,7 @@ arm_status arm_conv_partial_fast_q15(
      *pOut++ = (q15_t) (sum >> 15);

      /* Update the inputA and inputB pointers for next MAC calculation */
-      py = ++pSrc2 - 1u;
+      py = ++pSrc2 - 1U;
      px = pIn1;

      /* Increment the MAC count */
@ -278,11 +278,11 @@ arm_status arm_conv_partial_fast_q15(
    }

    /* Working pointer of inputB */
-    pSrc2 = pIn2 + (srcBLen - 1u);
+    pSrc2 = pIn2 + (srcBLen - 1U);
    py = pSrc2;

    /* count is the index by which the pointer pIn1 to be incremented */
-    count = 0u;
+    count = 0U;


    /* --------------------
@ -292,14 +292,14 @@ arm_status arm_conv_partial_fast_q15(
    /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
     * So, to loop unroll over blockSize2,
     * srcBLen should be greater than or equal to 4 */
-    if (srcBLen >= 4u)
+    if (srcBLen >= 4U)
    {
      /* Loop unroll over blockSize2, by 4 */
-      blkCnt = ((uint32_t) blockSize2 >> 2u);
+      blkCnt = ((uint32_t) blockSize2 >> 2U);

-      while (blkCnt > 0u)
+      while (blkCnt > 0U)
      {
-      py = py - 1u;
+      py = py - 1U;

        /* Set all accumulators to zero */
        acc0 = 0;
@ -312,11 +312,11 @@ arm_status arm_conv_partial_fast_q15(
      x0 = *__SIMD32(px);
        /* read x[1], x[2] samples */
      x1 = _SIMD32_OFFSET(px+1);
-	  px+= 2u;
+	  px+= 2U;


        /* Apply loop unrolling and compute 4 MACs simultaneously. */
-        k = srcBLen >> 2u;
+        k = srcBLen >> 2U;

        /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
         ** a second loop below computes MACs for the remaining 1 to 3 samples. */
@ -358,7 +358,7 @@ arm_status arm_conv_partial_fast_q15(

          /* Read x[5], x[6] */
        x1 = _SIMD32_OFFSET(px+3);
-		px += 4u;
+		px += 4U;

          /* acc2 +=  x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */
          acc2 = __SMLADX(x0, c0, acc2);
@ -373,15 +373,15 @@ arm_status arm_conv_partial_fast_q15(

        /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
         ** No loop unrolling is used. */
-        k = srcBLen % 0x4u;
+        k = srcBLen % 0x4U;

-        if (k == 1u)
+        if (k == 1U)
        {
          /* Read y[srcBLen - 5] */
        c0 = *(py+1);
 #ifdef  ARM_MATH_BIG_ENDIAN

-        c0 = c0 << 16u;
+        c0 = c0 << 16U;

 #else

@ -400,7 +400,7 @@ arm_status arm_conv_partial_fast_q15(
          acc3 = __SMLADX(x3, c0, acc3);
        }

-        if (k == 2u)
+        if (k == 2U)
        {
          /* Read y[srcBLen - 5], y[srcBLen - 6] */
        c0 = _SIMD32_OFFSET(py);
@ -410,7 +410,7 @@ arm_status arm_conv_partial_fast_q15(

        /* Read x[9] */
        x2 = _SIMD32_OFFSET(px+1);
-		px += 2u;
+		px += 2U;

          /* Perform the multiply-accumulates */
          acc0 = __SMLADX(x0, c0, acc0);
@ -419,7 +419,7 @@ arm_status arm_conv_partial_fast_q15(
          acc3 = __SMLADX(x2, c0, acc3);
        }

-        if (k == 3u)
+        if (k == 3U)
        {
          /* Read y[srcBLen - 5], y[srcBLen - 6] */
        c0 = _SIMD32_OFFSET(py);
@ -439,7 +439,7 @@ arm_status arm_conv_partial_fast_q15(
 		c0 = *(py-1);
 #ifdef  ARM_MATH_BIG_ENDIAN

-        c0 = c0 << 16u;
+        c0 = c0 << 16U;
 #else

        c0 = c0 & 0x0000FFFF;
@ -447,7 +447,7 @@ arm_status arm_conv_partial_fast_q15(

          /* Read x[10] */
        x3 =  _SIMD32_OFFSET(px+2);
-		px += 3u;
+		px += 3U;

          /* Perform the multiply-accumulates */
          acc0 = __SMLADX(x1, c0, acc0);
@ -470,7 +470,7 @@ arm_status arm_conv_partial_fast_q15(
 #endif /*      #ifndef  ARM_MATH_BIG_ENDIAN    */

        /* Increment the pointer pIn1 index, count by 4 */
-        count += 4u;
+        count += 4U;

        /* Update the inputA and inputB pointers for next MAC calculation */
        px = pIn1 + count;
@ -482,19 +482,19 @@ arm_status arm_conv_partial_fast_q15(

      /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.
       ** No loop unrolling is used. */
-      blkCnt = (uint32_t) blockSize2 % 0x4u;
+      blkCnt = (uint32_t) blockSize2 % 0x4U;

-      while (blkCnt > 0u)
+      while (blkCnt > 0U)
      {
        /* Accumulator is made zero for every iteration */
        sum = 0;

        /* Apply loop unrolling and compute 4 MACs simultaneously. */
-        k = srcBLen >> 2u;
+        k = srcBLen >> 2U;

        /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
         ** a second loop below computes MACs for the remaining 1 to 3 samples. */
-        while (k > 0u)
+        while (k > 0U)
        {
          /* Perform the multiply-accumulates */
          sum += ((q31_t) * px++ * *py--);
@ -508,9 +508,9 @@ arm_status arm_conv_partial_fast_q15(

        /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
         ** No loop unrolling is used. */
-        k = srcBLen % 0x4u;
+        k = srcBLen % 0x4U;

-        while (k > 0u)
+        while (k > 0U)
        {
          /* Perform the multiply-accumulates */
          sum += ((q31_t) * px++ * *py--);
@ -539,7 +539,7 @@ arm_status arm_conv_partial_fast_q15(
       * the blockSize2 loop cannot be unrolled by 4 */
      blkCnt = (uint32_t) blockSize2;

-      while (blkCnt > 0u)
+      while (blkCnt > 0U)
      {
        /* Accumulator is made zero for every iteration */
        sum = 0;
@ -547,7 +547,7 @@ arm_status arm_conv_partial_fast_q15(
        /* srcBLen number of MACS should be performed */
        k = srcBLen;

-        while (k > 0u)
+        while (k > 0U)
        {
          /* Perform the multiply-accumulate */
          sum += ((q31_t) * px++ * *py--);
@ -585,15 +585,15 @@ arm_status arm_conv_partial_fast_q15(

    /* In this stage the MAC operations are decreased by 1 for every iteration.
       The count variable holds the number of MAC operations performed */
-    count = srcBLen - 1u;
+    count = srcBLen - 1U;

    /* Working pointer of inputA */
-    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
+    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
    px = pSrc1;

    /* Working pointer of inputB */
-    pSrc2 = pIn2 + (srcBLen - 1u);
-    pIn2 = pSrc2 - 1u;
+    pSrc2 = pIn2 + (srcBLen - 1U);
+    pIn2 = pSrc2 - 1U;
    py = pIn2;

    /* -------------------
@ -605,19 +605,19 @@ arm_status arm_conv_partial_fast_q15(
    /* Second part of this stage computes the MAC operations less than or equal to 4 */

    /* The first part of the stage starts here */
-    j = count >> 2u;
+    j = count >> 2U;

-    while ((j > 0u) && (blockSize3 > 0))
+    while ((j > 0U) && (blockSize3 > 0))
    {
      /* Accumulator is made zero for every iteration */
      sum = 0;

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
-      k = count >> 2u;
+      k = count >> 2U;

      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
       ** a second loop below computes MACs for the remaining 1 to 3 samples. */
-      while (k > 0u)
+      while (k > 0U)
      {
        /* x[srcALen - srcBLen + 1], x[srcALen - srcBLen + 2] are multiplied
         * with y[srcBLen - 1], y[srcBLen - 2] respectively */
@ -632,13 +632,13 @@ arm_status arm_conv_partial_fast_q15(

      /* For the next MAC operations, the pointer py is used without SIMD
       * So, py is incremented by 1 */
-      py = py + 1u;
+      py = py + 1U;

      /* If the count is not a multiple of 4, compute any remaining MACs here.
       ** No loop unrolling is used. */
-      k = count % 0x4u;
+      k = count % 0x4U;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* sum += x[srcALen - srcBLen + 5] * y[srcBLen - 5] */
        sum = __SMLAD(*px++, *py--, sum);
@ -666,7 +666,7 @@ arm_status arm_conv_partial_fast_q15(
    /* The second part of the stage starts here */
    /* SIMD is not used for the next MAC operations,
     * so pointer py is updated to read only one sample at a time */
-    py = py + 1u;
+    py = py + 1U;

    while (blockSize3 > 0)
    {
@ -676,7 +676,7 @@ arm_status arm_conv_partial_fast_q15(
      /* Apply loop unrolling and compute 4 MACs simultaneously. */
      k = count;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulates */
        /* sum +=  x[srcALen-1] * y[srcBLen-1] */
@ -723,7 +723,7 @@ arm_status arm_conv_partial_fast_q15(
  q15_t a, b;

  /* Check for range of output samples to be calculated */
-  if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
+  if ((firstIndex + numPoints) > ((srcALen + (srcBLen - 1U))))
  {
    /* Set status as ARM_MATH_ARGUMENT_ERROR */
    status = ARM_MATH_ARGUMENT_ERROR;
@ -762,7 +762,7 @@ arm_status arm_conv_partial_fast_q15(
    blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
    blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
    blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
-    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1u)) ? blockSize1 :
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :
                                     (int32_t) numPoints) : 0;
    blockSize2 = ((int32_t) check - blockSize3) -
      (blockSize1 + (int32_t) firstIndex);
@ -795,7 +795,7 @@ arm_status arm_conv_partial_fast_q15(
       The count variable holds the number of MAC operations performed.
       Since the partial convolution starts from firstIndex
       Number of Macs to be performed is firstIndex + 1 */
-    count = 1u + firstIndex;
+    count = 1U + firstIndex;

    /* Working pointer of inputA */
    px = pIn1;
@ -813,7 +813,7 @@ arm_status arm_conv_partial_fast_q15(
    /* Second part of this stage computes the MAC operations greater than or equal to 4 */

    /* The first part of the stage starts here */
-  while ((count < 4u) && (blockSize1 > 0))
+  while ((count < 4U) && (blockSize1 > 0))
    {
      /* Accumulator is made zero for every iteration */
      sum = 0;
@ -822,7 +822,7 @@ arm_status arm_conv_partial_fast_q15(
       * inputA samples and inputB samples */
      k = count;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulates */
      sum += ((q31_t) * px++ * *py--);
@ -857,13 +857,13 @@ arm_status arm_conv_partial_fast_q15(
      sum = 0;

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
-      k = count >> 2u;
+      k = count >> 2U;

      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
       ** a second loop below computes MACs for the remaining 1 to 3 samples. */
 	py++;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* Perform the multiply-accumulates */
        sum += ((q31_t) * px++ * *py--);
@ -877,9 +877,9 @@ arm_status arm_conv_partial_fast_q15(

      /* If the count is not a multiple of 4, compute any remaining MACs here.
       ** No loop unrolling is used. */
-      k = count % 0x4u;
+      k = count % 0x4U;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulates */
      sum += ((q31_t) * px++ * *py--);
@ -892,7 +892,7 @@ arm_status arm_conv_partial_fast_q15(
      *pOut++ = (q15_t) (sum >> 15);

      /* Update the inputA and inputB pointers for next MAC calculation */
-      py = ++pSrc2 - 1u;
+      py = ++pSrc2 - 1U;
      px = pIn1;

      /* Increment the MAC count */
@ -923,11 +923,11 @@ arm_status arm_conv_partial_fast_q15(
    }

    /* Working pointer of inputB */
-    pSrc2 = pIn2 + (srcBLen - 1u);
+    pSrc2 = pIn2 + (srcBLen - 1U);
    py = pSrc2;

    /* count is the index by which the pointer pIn1 to be incremented */
-    count = 0u;
+    count = 0U;


    /* --------------------
@ -937,14 +937,14 @@ arm_status arm_conv_partial_fast_q15(
    /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
     * So, to loop unroll over blockSize2,
     * srcBLen should be greater than or equal to 4 */
-    if (srcBLen >= 4u)
+    if (srcBLen >= 4U)
    {
      /* Loop unroll over blockSize2, by 4 */
-      blkCnt = ((uint32_t) blockSize2 >> 2u);
+      blkCnt = ((uint32_t) blockSize2 >> 2U);

-      while (blkCnt > 0u)
+      while (blkCnt > 0U)
      {
-      py = py - 1u;
+      py = py - 1U;

        /* Set all accumulators to zero */
        acc0 = 0;
@ -971,7 +971,7 @@ arm_status arm_conv_partial_fast_q15(
 #endif	/*	#ifndef ARM_MATH_BIG_ENDIAN	   */

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
-      k = srcBLen >> 2u;
+      k = srcBLen >> 2U;

      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
       ** a second loop below computes MACs for the remaining 1 to 3 samples. */
@ -1061,7 +1061,7 @@ arm_status arm_conv_partial_fast_q15(

 #endif	/*	#ifndef ARM_MATH_BIG_ENDIAN	   */

-		px += 4u;
+		px += 4U;

        /* acc2 +=  x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */
        acc2 = __SMLADX(x0, c0, acc2);
@ -1076,16 +1076,16 @@ arm_status arm_conv_partial_fast_q15(

      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
       ** No loop unrolling is used. */
-      k = srcBLen % 0x4u;
+      k = srcBLen % 0x4U;

-      if (k == 1u)
+      if (k == 1U)
      {
        /* Read y[srcBLen - 5] */
        c0 = *(py+1);

 #ifdef  ARM_MATH_BIG_ENDIAN

-        c0 = c0 << 16u;
+        c0 = c0 << 16U;

 #else

@ -1116,7 +1116,7 @@ arm_status arm_conv_partial_fast_q15(
        acc3 = __SMLADX(x3, c0, acc3);
      }

-      if (k == 2u)
+      if (k == 2U)
      {
        /* Read y[srcBLen - 5], y[srcBLen - 6] */
 		a = *py;
@ -1149,7 +1149,7 @@ arm_status arm_conv_partial_fast_q15(
 	  x2 = __PKHBT(a, b, 16);

 #endif	/*	#ifndef ARM_MATH_BIG_ENDIAN	   */
-		px += 2u;
+		px += 2U;

        /* Perform the multiply-accumulates */
        acc0 = __SMLADX(x0, c0, acc0);
@ -1158,7 +1158,7 @@ arm_status arm_conv_partial_fast_q15(
        acc3 = __SMLADX(x2, c0, acc3);
      }

-      if (k == 3u)
+      if (k == 3U)
      {
        /* Read y[srcBLen - 5], y[srcBLen - 6] */
 		a = *py;
@ -1202,7 +1202,7 @@ arm_status arm_conv_partial_fast_q15(
 		c0 = *(py-1);
 #ifdef  ARM_MATH_BIG_ENDIAN

-        c0 = c0 << 16u;
+        c0 = c0 << 16U;
 #else

        c0 = c0 & 0x0000FFFF;
@ -1222,7 +1222,7 @@ arm_status arm_conv_partial_fast_q15(

 #endif	/*	#ifndef ARM_MATH_BIG_ENDIAN	*/

-		px += 3u;
+		px += 3U;

        /* Perform the multiply-accumulates */
        acc0 = __SMLADX(x1, c0, acc0);
@ -1238,7 +1238,7 @@ arm_status arm_conv_partial_fast_q15(
 	  *pOut++ = (q15_t)(acc3 >> 15);

        /* Increment the pointer pIn1 index, count by 4 */
-        count += 4u;
+        count += 4U;

        /* Update the inputA and inputB pointers for next MAC calculation */
        px = pIn1 + count;
@ -1250,19 +1250,19 @@ arm_status arm_conv_partial_fast_q15(

      /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.
       ** No loop unrolling is used. */
-      blkCnt = (uint32_t) blockSize2 % 0x4u;
+      blkCnt = (uint32_t) blockSize2 % 0x4U;

-      while (blkCnt > 0u)
+      while (blkCnt > 0U)
      {
        /* Accumulator is made zero for every iteration */
        sum = 0;

        /* Apply loop unrolling and compute 4 MACs simultaneously. */
-        k = srcBLen >> 2u;
+        k = srcBLen >> 2U;

        /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
         ** a second loop below computes MACs for the remaining 1 to 3 samples. */
-        while (k > 0u)
+        while (k > 0U)
        {
          /* Perform the multiply-accumulates */
          sum += ((q31_t) * px++ * *py--);
@ -1276,9 +1276,9 @@ arm_status arm_conv_partial_fast_q15(

        /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.
         ** No loop unrolling is used. */
-        k = srcBLen % 0x4u;
+        k = srcBLen % 0x4U;

-        while (k > 0u)
+        while (k > 0U)
        {
          /* Perform the multiply-accumulates */
          sum += ((q31_t) * px++ * *py--);
@ -1307,7 +1307,7 @@ arm_status arm_conv_partial_fast_q15(
       * the blockSize2 loop cannot be unrolled by 4 */
      blkCnt = (uint32_t) blockSize2;

-      while (blkCnt > 0u)
+      while (blkCnt > 0U)
      {
        /* Accumulator is made zero for every iteration */
        sum = 0;
@ -1315,7 +1315,7 @@ arm_status arm_conv_partial_fast_q15(
        /* srcBLen number of MACS should be performed */
        k = srcBLen;

-        while (k > 0u)
+        while (k > 0U)
        {
          /* Perform the multiply-accumulate */
          sum += ((q31_t) * px++ * *py--);
@ -1353,15 +1353,15 @@ arm_status arm_conv_partial_fast_q15(

    /* In this stage the MAC operations are decreased by 1 for every iteration.
       The count variable holds the number of MAC operations performed */
-    count = srcBLen - 1u;
+    count = srcBLen - 1U;

    /* Working pointer of inputA */
-    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
+    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1U);
    px = pSrc1;

    /* Working pointer of inputB */
-    pSrc2 = pIn2 + (srcBLen - 1u);
-    pIn2 = pSrc2 - 1u;
+    pSrc2 = pIn2 + (srcBLen - 1U);
+    pIn2 = pSrc2 - 1U;
    py = pIn2;

    /* -------------------
@ -1373,21 +1373,21 @@ arm_status arm_conv_partial_fast_q15(
    /* Second part of this stage computes the MAC operations less than or equal to 4 */

    /* The first part of the stage starts here */
-    j = count >> 2u;
+    j = count >> 2U;

-    while ((j > 0u) && (blockSize3 > 0))
+    while ((j > 0U) && (blockSize3 > 0))
    {
      /* Accumulator is made zero for every iteration */
      sum = 0;

      /* Apply loop unrolling and compute 4 MACs simultaneously. */
-      k = count >> 2u;
+      k = count >> 2U;

      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.
       ** a second loop below computes MACs for the remaining 1 to 3 samples. */
 	py++;

-    while (k > 0u)
+    while (k > 0U)
    {
      /* Perform the multiply-accumulates */
        sum += ((q31_t) * px++ * *py--);
@ -1401,9 +1401,9 @@ arm_status arm_conv_partial_fast_q15(

      /* If the count is not a multiple of 4, compute any remaining MACs here.
       ** No loop unrolling is used. */
-      k = count % 0x4u;
+      k = count % 0x4U;

-      while (k > 0u)
+      while (k > 0U)
      {
      /* Perform the multiply-accumulates */
        sum += ((q31_t) * px++ * *py--);
@ -1431,7 +1431,7 @@ arm_status arm_conv_partial_fast_q15(
    /* The second part of the stage starts here */
    /* SIMD is not used for the next MAC operations,
     * so pointer py is updated to read only one sample at a time */
-    py = py + 1u;
+    py = py + 1U;

  while (blockSize3 > 0)
    {
@ -1441,7 +1441,7 @@ arm_status arm_conv_partial_fast_q15(
      /* Apply loop unrolling and compute 4 MACs simultaneously. */
      k = count;

-      while (k > 0u)
+      while (k > 0U)
      {
        /* Perform the multiply-accumulates */
        /* sum +=  x[srcALen-1] * y[srcBLen-1] */
--- a/Show More
+++ b/Show More