CMSIS-DSP: Remove some gcc compilation warnings.

5 years ago · 4014866174
parent acaa70a62a
commit 4014866174
27 changed files with 128 additions and 95 deletions
--- a/Examples/ARM/arm_bayes_example/arm_bayes_example_f32.c
+++ b/Examples/ARM/arm_bayes_example/arm_bayes_example_f32.c
@ -92,6 +92,7 @@ int32_t main(void)

  /* Result of the classifier */
  float32_t result[NB_OF_CLASSES];
+  float32_t temp[NB_OF_CLASSES];
  float32_t maxProba;
  uint32_t index;
  
@ -105,7 +106,7 @@ int32_t main(void)
  in[0] = 1.5f;
  in[1] = 1.0f;

-  index = arm_gaussian_naive_bayes_predict_f32(&S, in, result);
+  index = arm_gaussian_naive_bayes_predict_f32(&S, in, result,temp);

  maxProba = result[index];

@ -116,7 +117,7 @@ int32_t main(void)
  in[0] = -1.5f;
  in[1] = 1.0f;

-  index = arm_gaussian_naive_bayes_predict_f32(&S, in, result);
+  index = arm_gaussian_naive_bayes_predict_f32(&S, in, result,temp);

  maxProba = result[index];

@ -127,7 +128,7 @@ int32_t main(void)
  in[0] = 0.0f;
  in[1] = -3.0f;

-  index = arm_gaussian_naive_bayes_predict_f32(&S, in, result);
+  index = arm_gaussian_naive_bayes_predict_f32(&S, in, result,temp);

  maxProba = result[index];

--- a/Include/dsp/bayes_functions.h
+++ b/Include/dsp/bayes_functions.h
@ -69,7 +69,8 @@ typedef struct
 *
 * @param[in]  S                        points to a naive bayes instance structure
 * @param[in]  in                       points to the elements of the input vector.
- * @param[in]  pBuffer   points to a buffer of length numberOfClasses
+ * @param[out] *pOutputProbabilities    points to a buffer of length numberOfClasses containing estimated probabilities
+ * @param[out] *pBufferB                points to a temporary buffer of length numberOfClasses
 * @return The predicted class
 *
 */
@ -77,7 +78,8 @@ typedef struct

 uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S, 
   const float32_t * in, 
-   float32_t *pBuffer);
+   float32_t *pOutputProbabilities,
+   float32_t *pBufferB);


 #ifdef   __cplusplus
--- a/Include/dsp/bayes_functions_f16.h
+++ b/Include/dsp/bayes_functions_f16.h
@ -60,7 +60,8 @@ typedef struct
 *
 * @param[in]  S                        points to a naive bayes instance structure
 * @param[in]  in                       points to the elements of the input vector.
- * @param[in]  pBuffer   points to a buffer of length numberOfClasses
+ * @param[out] *pOutputProbabilities    points to a buffer of length numberOfClasses containing estimated probabilities
+ * @param[out] *pBufferB                points to a temporary buffer of length numberOfClasses
 * @return The predicted class
 *
 */
@ -68,7 +69,8 @@ typedef struct

 uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_instance_f16 *S, 
   const float16_t * in, 
-   float16_t *pBuffer);
+   float16_t *pOutputProbabilities,
+   float16_t *pBufferB);

 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
 #ifdef   __cplusplus
--- a/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c
+++ b/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c
@ -45,11 +45,10 @@
 *
 * @param[in]  *S                       points to a naive bayes instance structure
 * @param[in]  *in                      points to the elements of the input vector.
- * @param[in]  *pBuffer   points to a buffer of length numberOfClasses
+ * @param[out] *pOutputProbabilities    points to a buffer of length numberOfClasses containing estimated probabilities
+ * @param[out] *pBufferB                points to a temporary buffer of length numberOfClasses
 * @return The predicted class
 *
- * @par If the number of classes is big, MVE version will consume lot of
- * stack since the log prior are computed on the stack.
 *
 */

@ -60,19 +59,21 @@

 uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_instance_f16 *S, 
   const float16_t * in, 
-   float16_t *pBuffer)
+   float16_t *pOutputProbabilities,
+   float16_t *pBufferB
+   )
 {
    uint32_t         nbClass;
    const float16_t *pTheta = S->theta;
    const float16_t *pSigma = S->sigma;
-    float16_t      *buffer = pBuffer;
+    float16_t      *buffer = pOutputProbabilities;
    const float16_t *pIn = in;
    float16_t       result;
    f16x8_t         vsigma;
    _Float16       tmp;
    f16x8_t         vacc1, vacc2;
    uint32_t        index;
-    float16_t       logclassPriors[S->numberOfClasses];
+    float16_t       *logclassPriors=pBufferB;
    float16_t      *pLogPrior = logclassPriors;

    arm_vlog_f16((float16_t *) S->classPriors, logclassPriors, S->numberOfClasses);
@ -135,38 +136,31 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins
        buffer++;
    }

-    arm_max_f16(pBuffer, S->numberOfClasses, &result, &index);
+    arm_max_f16(pOutputProbabilities, S->numberOfClasses, &result, &index);

    return (index);
 }

 #else

-/**
- * @brief Naive Gaussian Bayesian Estimator
- *
- * @param[in]  *S         points to a naive bayes instance structure
- * @param[in]  *in        points to the elements of the input vector.
- * @param[in]  *pBuffer   points to a buffer of length numberOfClasses
- * @return The predicted class
- *
- */
 uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_instance_f16 *S, 
   const float16_t * in, 
-   float16_t *pBuffer)
+   float16_t *pOutputProbabilities,
+   float16_t *pBufferB)
 {
    uint32_t nbClass;
    uint32_t nbDim;
    const float16_t *pPrior = S->classPriors;
    const float16_t *pTheta = S->theta;
    const float16_t *pSigma = S->sigma;
-    float16_t *buffer = pBuffer;
+    float16_t *buffer = pOutputProbabilities;
    const float16_t *pIn=in;
    float16_t result;
    _Float16 sigma;
    _Float16 tmp;
    _Float16 acc1,acc2;
    uint32_t index;
+    (void)pBufferB;

    pTheta=S->theta;
    pSigma=S->sigma;
@ -199,7 +193,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins
        buffer++;
    }

-    arm_max_f16(pBuffer,S->numberOfClasses,&result,&index);
+    arm_max_f16(pOutputProbabilities,S->numberOfClasses,&result,&index);

    return(index);
 }
--- a/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f32.c
+++ b/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f32.c
@ -43,11 +43,10 @@
 *
 * @param[in]   *S                      points to a naive bayes instance structure
 * @param[in]   *in                     points to the elements of the input vector.
- * @param[in]  *pBuffer   points to a buffer of length numberOfClasses
+ * @param[out]  *pOutputProbabilities   points to a buffer of length numberOfClasses containing estimated probabilities
+ * @param[out]  *pBufferB               points to a temporary buffer of length numberOfClasses
 * @return The predicted class
 *
- * @par If the number of classes is big, MVE version will consume lot of
- * stack since the log prior are computed on the stack.
 *
 */

@ -58,19 +57,21 @@

 uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S, 
   const float32_t * in, 
-   float32_t *pBuffer)
+   float32_t *pOutputProbabilities,
+   float32_t *pBufferB
+   )
 {
    uint32_t         nbClass;
    const float32_t *pTheta = S->theta;
    const float32_t *pSigma = S->sigma;
-    float32_t      *buffer = pBuffer;
+    float32_t      *buffer = pOutputProbabilities;
    const float32_t *pIn = in;
    float32_t       result;
    f32x4_t         vsigma;
    float32_t       tmp;
    f32x4_t         vacc1, vacc2;
    uint32_t        index;
-    float32_t       logclassPriors[S->numberOfClasses];
+    float32_t       *logclassPriors=pBufferB;
    float32_t      *pLogPrior = logclassPriors;

    arm_vlog_f32((float32_t *) S->classPriors, logclassPriors, S->numberOfClasses);
@ -133,7 +134,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins
        buffer++;
    }

-    arm_max_f32(pBuffer, S->numberOfClasses, &result, &index);
+    arm_max_f32(pOutputProbabilities, S->numberOfClasses, &result, &index);

    return (index);
 }
@ -148,7 +149,8 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins

 uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S, 
   const float32_t * in, 
-   float32_t *pBuffer)
+   float32_t *pOutputProbabilities,
+   float32_t *pBufferB)
 {
    
    const float32_t *pPrior = S->classPriors;
@ -159,7 +161,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins
    const float32_t *pTheta1 = S->theta + S->vectorDimension;
    const float32_t *pSigma1 = S->sigma + S->vectorDimension;

-    float32_t *buffer = pBuffer;
+    float32_t *buffer = pOutputProbabilities;
    const float32_t *pIn=in;

    float32_t result;
@ -174,6 +176,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins
    float32x2_t tmpV2;
    float32x4_t thetaV,thetaV1;
    float32x4_t inV;
+    (void)pBufferB;

    epsilonV = vdupq_n_f32(S->epsilon);

@ -322,32 +325,24 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins
        classBlkCnt--;
    }

-    arm_max_f32(pBuffer,S->numberOfClasses,&result,&index);
+    arm_max_f32(pOutputProbabilities,S->numberOfClasses,&result,&index);

    return(index);
 }

 #else

-/**
- * @brief Naive Gaussian Bayesian Estimator
- *
- * @param[in]  *S         points to a naive bayes instance structure
- * @param[in]  *in        points to the elements of the input vector.
- * @param[in]  *pBuffer   points to a buffer of length numberOfClasses
- * @return The predicted class
- *
- */
 uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S, 
   const float32_t * in, 
-   float32_t *pBuffer)
+   float32_t *pOutputProbabilities,
+   float32_t *pBufferB)
 {
    uint32_t nbClass;
    uint32_t nbDim;
    const float32_t *pPrior = S->classPriors;
    const float32_t *pTheta = S->theta;
    const float32_t *pSigma = S->sigma;
-    float32_t *buffer = pBuffer;
+    float32_t *buffer = pOutputProbabilities;
    const float32_t *pIn=in;
    float32_t result;
    float32_t sigma;
@ -355,6 +350,8 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins
    float32_t acc1,acc2;
    uint32_t index;

+    (void)pBufferB;
+
    pTheta=S->theta;
    pSigma=S->sigma;

@ -386,7 +383,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins
        buffer++;
    }

-    arm_max_f32(pBuffer,S->numberOfClasses,&result,&index);
+    arm_max_f32(pOutputProbabilities,S->numberOfClasses,&result,&index);

    return(index);
 }
--- a/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c
@ -79,7 +79,7 @@ void arm_cmplx_mult_real_f16(
        float16_t * pCmplxDst,
        uint32_t numSamples)
 {
-    const static uint16_t stride_cmplx_x_real_16[8] = {
+    static const uint16_t stride_cmplx_x_real_16[8] = {
        0, 0, 1, 1, 2, 2, 3, 3
        };
    uint32_t blockSizeC = numSamples * CMPLX_DIM;   /* loop counters */
--- a/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c
@ -77,7 +77,7 @@ void arm_cmplx_mult_real_f32(
        float32_t * pCmplxDst,
        uint32_t numSamples)
 {
-    const static uint32_t stride_cmplx_x_real_32[4] = { 0, 0, 1, 1 };
+    static const uint32_t stride_cmplx_x_real_32[4] = { 0, 0, 1, 1 };

    uint32_t blockSizeC = numSamples * CMPLX_DIM;   /* loop counters */
    uint32_t blkCnt;
--- a/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c
@ -57,7 +57,7 @@ void arm_cmplx_mult_real_q15(
        q15_t * pCmplxDst,
        uint32_t numSamples)
 {
-  const static uint16_t stride_cmplx_x_real_16[8] = {
+  static const uint16_t stride_cmplx_x_real_16[8] = {
      0, 0, 1, 1, 2, 2, 3, 3
      };
  q15x8_t rVec;
--- a/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c
@ -58,7 +58,7 @@ void arm_cmplx_mult_real_q31(
        uint32_t numSamples)
 {

-    const static uint32_t stride_cmplx_x_real_32[4] = {
+    static const uint32_t stride_cmplx_x_real_32[4] = {
        0, 0, 1, 1
    };
    q31x4_t rVec;
--- a/Source/FilteringFunctions/arm_conv_partial_f32.c
+++ b/Source/FilteringFunctions/arm_conv_partial_f32.c
@ -142,7 +142,7 @@ arm_status arm_conv_partial_f32(
    blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
    blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
    blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
-    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : numPoints) : 0;
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : (int32_t)numPoints) : 0;
    blockSize2 = ((int32_t) check - blockSize3) - (blockSize1 + (int32_t) firstIndex);
    blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;

--- a/Source/FilteringFunctions/arm_conv_partial_fast_q31.c
+++ b/Source/FilteringFunctions/arm_conv_partial_fast_q31.c
@ -118,7 +118,7 @@ arm_status arm_conv_partial_fast_q31(
    blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
    blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
    blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
-    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :  numPoints) : 0;
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :  (int32_t)numPoints) : 0;
    blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + (int32_t) firstIndex);
    blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;

--- a/Source/FilteringFunctions/arm_conv_partial_q15.c
+++ b/Source/FilteringFunctions/arm_conv_partial_q15.c
@ -119,7 +119,7 @@ arm_status arm_conv_partial_q15(
    blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
    blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
    blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
-    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :  numPoints) : 0;
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :  (int32_t)numPoints) : 0;
    blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + (int32_t) firstIndex);
    blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;

--- a/Source/FilteringFunctions/arm_conv_partial_q31.c
+++ b/Source/FilteringFunctions/arm_conv_partial_q31.c
@ -121,7 +121,7 @@ arm_status arm_conv_partial_q31(
    blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
    blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
    blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
-    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :  numPoints) : 0;
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 :  (int32_t)numPoints) : 0;
    blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + (int32_t) firstIndex);
    blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;

--- a/Source/FilteringFunctions/arm_conv_partial_q7.c
+++ b/Source/FilteringFunctions/arm_conv_partial_q7.c
@ -123,7 +123,7 @@ arm_status arm_conv_partial_q7(
    blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
    blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
    blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
-    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : numPoints) : 0;
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : (int32_t)numPoints) : 0;
    blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + (int32_t) firstIndex);
    blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;

--- a/Source/FilteringFunctions/arm_fir_f16.c
+++ b/Source/FilteringFunctions/arm_fir_f16.c
@ -59,6 +59,7 @@
            vecAcc0 = vfmaq(vecAcc0, vecIn0, c[i]);                        \
        }

+#define NB_TAPS 4
 __STATIC_INLINE void arm_fir_f16_1_4_mve(const arm_fir_instance_f16 * S, 
    const float16_t * __restrict pSrc, 
    float16_t * __restrict pDst, uint32_t blockSize)
@ -74,7 +75,6 @@ __STATIC_INLINE void arm_fir_f16_1_4_mve(const arm_fir_instance_f16 * S,
    int32_t         blkCnt;
    float16x8_t         vecIn0;
    float16x8_t         vecAcc0;
-    const int       NB_TAPS=4;
    float16_t       c[NB_TAPS];


@ -147,8 +147,9 @@ __STATIC_INLINE void arm_fir_f16_1_4_mve(const arm_fir_instance_f16 * S,
    }

 }
+#undef NB_TAPS

-
+#define NB_TAPS 8
 __STATIC_INLINE void arm_fir_f16_5_8_mve(const arm_fir_instance_f16 * S, 
    const float16_t * __restrict pSrc, 
    float16_t * __restrict pDst, uint32_t blockSize)
@ -164,7 +165,6 @@ __STATIC_INLINE void arm_fir_f16_5_8_mve(const arm_fir_instance_f16 * S,
    int32_t         blkCnt;
    float16x8_t         vecIn0;
    float16x8_t         vecAcc0;
-    const int       NB_TAPS=8;
    float16_t       c[NB_TAPS];


@ -237,7 +237,7 @@ __STATIC_INLINE void arm_fir_f16_5_8_mve(const arm_fir_instance_f16 * S,
    }

 }
-
+#undef NB_TAPS

 void arm_fir_f16(const arm_fir_instance_f16 * S, 
  const float16_t * pSrc, 
--- a/Source/FilteringFunctions/arm_fir_f32.c
+++ b/Source/FilteringFunctions/arm_fir_f32.c
@ -160,6 +160,7 @@
        }


+#define NB_TAPS 4
 __STATIC_INLINE void arm_fir_f32_1_4_mve(const arm_fir_instance_f32 * S, 
  const float32_t * __restrict pSrc, 
  float32_t * __restrict pDst, uint32_t blockSize)
@ -176,7 +177,6 @@ __STATIC_INLINE void arm_fir_f32_1_4_mve(const arm_fir_instance_f32 * S,
    int32_t         blkCnt;
    float32x4_t         vecIn0;
    float32x4_t         vecAcc0;
-    const int       NB_TAPS=4;
    float32_t       c[NB_TAPS];
    const float32_t *pCoeffsCur = pCoeffs;

@ -243,8 +243,7 @@ __STATIC_INLINE void arm_fir_f32_1_4_mve(const arm_fir_instance_f32 * S,
    }
    while (blkCnt > 0);
 }
-
-
+#undef NB_TAPS

 __STATIC_INLINE void arm_fir_f32_5_8_mve(const arm_fir_instance_f32 * S, 
  const float32_t * __restrict pSrc, 
--- a/Source/FilteringFunctions/arm_fir_q15.c
+++ b/Source/FilteringFunctions/arm_fir_q15.c
@ -86,14 +86,13 @@
    uint32_t        numTaps = S->numTaps;   /* Number of filter coefficients in the filter */\
    int32_t         blkCnt;                                                                  \
    q15x8_t         vecIn0;                                                                  \
-    const int32_t   nbVecTaps = (NBTAPS / 8);                                                \
                                                                                             \
    /*                                                                                       \
     * load coefs                                                                            \
     */                                                                                      \
-    q15x8_t         vecCoeffs[nbVecTaps];                                                    \
+    q15x8_t         vecCoeffs[NBVECTAPS];                                                    \
                                                                                             \
-    for (int i = 0; i < nbVecTaps; i++)                                                      \
+    for (int i = 0; i < NBVECTAPS; i++)                                                      \
        vecCoeffs[i] = vldrhq_s16(pCoeffs + 8 * i);                                          \
                                                                                             \
    /*                                                                                       \
@ -114,7 +113,7 @@
        pStateCur += 4;                                                                      \
        pTempSrc += 4;                                                                       \
                                                                                             \
-        FIR_Q15_CORE(pOutput, 4, nbVecTaps, pSamples, vecCoeffs);                            \
+        FIR_Q15_CORE(pOutput, 4, NBVECTAPS, pSamples, vecCoeffs);                            \
        pSamples += 4;                                                                       \
                                                                                             \
        blkCnt--;                                                                            \
@ -126,7 +125,7 @@
    for (int i = 0; i < residual; i++)                                                       \
        *pStateCur++ = *pTempSrc++;                                                          \
                                                                                             \
-    FIR_Q15_CORE(pOutput, residual, nbVecTaps, pSamples, vecCoeffs);                         \
+    FIR_Q15_CORE(pOutput, residual, NBVECTAPS, pSamples, vecCoeffs);                         \
                                                                                             \
    /*                                                                                       \
     * Copy the samples back into the history buffer start                                   \
@ -156,7 +155,9 @@ static void arm_fir_q15_25_32_mve(const arm_fir_instance_q15 * S,
  q15_t * __restrict pDst, uint32_t blockSize)
 {
    #define NBTAPS 32
+    #define NBVECTAPS (NBTAPS / 8)
    FIR_Q15_MAIN_CORE();
+    #undef NBVECTAPS
    #undef NBTAPS
 }

@ -165,7 +166,9 @@ static void arm_fir_q15_17_24_mve(const arm_fir_instance_q15 * S,
  q15_t * __restrict pDst, uint32_t blockSize)
 {
    #define NBTAPS 24
+    #define NBVECTAPS (NBTAPS / 8)
    FIR_Q15_MAIN_CORE();
+    #undef NBVECTAPS
    #undef NBTAPS
 }

@ -175,7 +178,9 @@ static void arm_fir_q15_9_16_mve(const arm_fir_instance_q15 * S,
  q15_t * __restrict pDst, uint32_t blockSize)
 {
    #define NBTAPS 16
+    #define NBVECTAPS (NBTAPS / 8)
    FIR_Q15_MAIN_CORE();
+    #undef NBVECTAPS
    #undef NBTAPS
 }

@ -184,7 +189,9 @@ static void arm_fir_q15_1_8_mve(const arm_fir_instance_q15 * S,
  q15_t * __restrict pDst, uint32_t blockSize)
 {
    #define NBTAPS 8
+    #define NBVECTAPS (NBTAPS / 8)
    FIR_Q15_MAIN_CORE();
+    #undef NBVECTAPS
    #undef NBTAPS
 }

--- a/Source/FilteringFunctions/arm_fir_q31.c
+++ b/Source/FilteringFunctions/arm_fir_q31.c
@ -117,14 +117,13 @@
    q31_t       *pTempDest;             /* Temporary pointer to the destination buffer */\
    uint32_t     numTaps = S->numTaps;  /* Number of filter coefficients in the filter */\
    int32_t      blkCnt;                                                                 \
-    const int32_t   nbVecTaps = (NBTAPS / 4);                                            \
                                                                                         \
    /*                                                                                   \
     * load coefs                                                                        \
     */                                                                                  \
-    q31x4_t         vecCoeffs[nbVecTaps];                                                \
+    q31x4_t         vecCoeffs[NBVECTAPS];                                                \
                                                                                         \
-    for (int i = 0; i < nbVecTaps; i++)                                                  \
+    for (int i = 0; i < NBVECTAPS; i++)                                                  \
        vecCoeffs[i] = vld1q(pCoeffs + 4 * i);                                           \
                                                                                         \
    /*                                                                                   \
@ -145,7 +144,7 @@
        pStateCur += 4;                                                                  \
        pTempSrc += 4;                                                                   \
                                                                                         \
-        FIR_Q31_CORE(4, nbVecTaps, pSamples, vecCoeffs);                                 \
+        FIR_Q31_CORE(4, NBVECTAPS, pSamples, vecCoeffs);                                 \
                                                                                         \
        pSamples += 4;                                                                   \
        /*                                                                               \
@ -162,7 +161,7 @@
              for (int i = 0; i < residual; i++)                                         \
                  *pStateCur++ = *pTempSrc++;                                            \
                                                                                         \
-              FIR_Q31_CORE(3, nbVecTaps, pSamples, vecCoeffs);                           \
+              FIR_Q31_CORE(3, NBVECTAPS, pSamples, vecCoeffs);                           \
          }                                                                              \
          break;                                                                         \
                                                                                         \
@ -171,7 +170,7 @@
              for (int i = 0; i < residual; i++)                                         \
                  *pStateCur++ = *pTempSrc++;                                            \
                                                                                         \
-               FIR_Q31_CORE(2, nbVecTaps, pSamples, vecCoeffs);                          \
+               FIR_Q31_CORE(2, NBVECTAPS, pSamples, vecCoeffs);                          \
          }                                                                              \
          break;                                                                         \
                                                                                         \
@ -180,7 +179,7 @@
              for (int i = 0; i < residual; i++)                                         \
                  *pStateCur++ = *pTempSrc++;                                            \
                                                                                         \
-              FIR_Q31_CORE(1, nbVecTaps, pSamples, vecCoeffs);                           \
+              FIR_Q31_CORE(1, NBVECTAPS, pSamples, vecCoeffs);                           \
          }                                                                              \
          break;                                                                         \
    }                                                                                    \
@ -382,7 +381,9 @@ static void arm_fir_q31_5_8_mve(const arm_fir_instance_q31 * S,
    q31_t * __restrict pDst, uint32_t blockSize)
 {
    #define NBTAPS 8
+    #define NBVECTAPS (NBTAPS / 4)
    FIR_Q31_MAIN_CORE();
+    #undef NBVECTAPS
    #undef NBTAPS
 }

@ -392,7 +393,9 @@ static void arm_fir_q31_9_12_mve(const arm_fir_instance_q31 * S,
    q31_t * __restrict pDst, uint32_t blockSize)
 {
    #define NBTAPS 12
+    #define NBVECTAPS (NBTAPS / 4)
    FIR_Q31_MAIN_CORE();
+    #undef NBVECTAPS
    #undef NBTAPS
 }

@ -402,7 +405,9 @@ static void arm_fir_q31_13_16_mve(const arm_fir_instance_q31 * S,
    q31_t * __restrict pDst, uint32_t blockSize)
 {
    #define NBTAPS 16
+    #define NBVECTAPS (NBTAPS / 4)
    FIR_Q31_MAIN_CORE();
+    #undef NBVECTAPS
    #undef NBTAPS
 }

@ -412,7 +417,9 @@ static void arm_fir_q31_17_20_mve(const arm_fir_instance_q31 * S,
    q31_t * __restrict pDst, uint32_t blockSize)
 {
    #define NBTAPS 20
+    #define NBVECTAPS (NBTAPS / 4)
    FIR_Q31_MAIN_CORE();
+    #undef NBVECTAPS
    #undef NBTAPS
 }

@ -422,7 +429,9 @@ static void arm_fir_q31_21_24_mve(const arm_fir_instance_q31 * S,
    q31_t * __restrict pDst, uint32_t blockSize)
 {
    #define NBTAPS 24
+    #define NBVECTAPS (NBTAPS / 4)
    FIR_Q31_MAIN_CORE();
+    #undef NBVECTAPS
    #undef NBTAPS
 }

@ -432,7 +441,9 @@ static void arm_fir_q31_25_28_mve(const arm_fir_instance_q31 * S,
    q31_t * __restrict pDst, uint32_t blockSize)
 {
    #define NBTAPS 28
+    #define NBVECTAPS (NBTAPS / 4)
    FIR_Q31_MAIN_CORE();
+    #undef NBVECTAPS
    #undef NBTAPS
 }

--- a/Source/FilteringFunctions/arm_fir_q7.c
+++ b/Source/FilteringFunctions/arm_fir_q7.c
@ -81,14 +81,13 @@
    uint32_t       numTaps = S->numTaps;   /* Number of filter coefficients in the filter */\
    int32_t        blkCnt;                                                                  \
    q7x16_t        vecIn0;                                                                  \
-    const int32_t  nbVecTaps = (NBTAPS / 16);                                     \
                                                                                            \
    /*                                                                                      \
     * load coefs                                                                           \
     */                                                                                     \
-    q7x16_t         vecCoeffs[nbVecTaps];                                                   \
+    q7x16_t         vecCoeffs[NBVECTAPS];                                                   \
                                                                                            \
-    for (int i = 0; i < nbVecTaps; i++)                                                     \
+    for (int i = 0; i < NBVECTAPS; i++)                                                     \
        vecCoeffs[i] = vldrbq_s8(pCoeffs + 16 * i);                               \
                                                                                            \
    /*                                                                                      \
@ -109,7 +108,7 @@
        pStateCur += 4;                                                                     \
        pTempSrc += 4;                                                                      \
                                                                                            \
-        FIR_Q7_CORE(pOutput, 4, nbVecTaps, pSamples, vecCoeffs);                            \
+        FIR_Q7_CORE(pOutput, 4, NBVECTAPS, pSamples, vecCoeffs);                            \
        pSamples += 4;                                                                      \
                                                                                            \
        blkCnt--;                                                                           \
@ -121,7 +120,7 @@
    for (int i = 0; i < residual; i++)                                                      \
        *pStateCur++ = *pTempSrc++;                                                         \
                                                                                            \
-    FIR_Q7_CORE(pOutput, residual, nbVecTaps, pSamples, vecCoeffs);                         \
+    FIR_Q7_CORE(pOutput, residual, NBVECTAPS, pSamples, vecCoeffs);                         \
                                                                                            \
                                                                                            \
    /*                                                                                      \
@ -147,7 +146,9 @@ static void arm_fir_q7_49_64_mve(const arm_fir_instance_q7 * S,
  q7_t * __restrict pDst, uint32_t blockSize)
 {
    #define NBTAPS 64
+    #define NBVECTAPS (NBTAPS / 16)
    FIR_Q7_MAIN_CORE();
+    #undef NBVECTAPS
    #undef NBTAPS
 }

@ -157,7 +158,9 @@ void arm_fir_q7_33_48_mve(const arm_fir_instance_q7 * S,
  q7_t * __restrict pDst, uint32_t blockSize)
 {
    #define NBTAPS 48
+    #define NBVECTAPS (NBTAPS / 16)
    FIR_Q7_MAIN_CORE();
+    #undef NBVECTAPS
    #undef NBTAPS
 }

@ -166,7 +169,9 @@ static void arm_fir_q7_17_32_mve(const arm_fir_instance_q7 * S,
  q7_t * __restrict pDst, uint32_t blockSize)
 {
    #define NBTAPS 32
+    #define NBVECTAPS (NBTAPS / 16)
    FIR_Q7_MAIN_CORE();
+    #undef NBVECTAPS
    #undef NBTAPS
 }

@ -176,7 +181,9 @@ void arm_fir_q7_1_16_mve(const arm_fir_instance_q7 * S,
  q7_t * __restrict pDst, uint32_t blockSize)
 {
    #define NBTAPS 16
+    #define NBVECTAPS (NBTAPS / 16)
    FIR_Q7_MAIN_CORE();
+    #undef NBVECTAPS
    #undef NBTAPS
 }

--- a/Testing/Include/Benchmarks/BayesF16.h
+++ b/Testing/Include/Benchmarks/BayesF16.h
@ -17,6 +17,7 @@ class BayesF16:public Client::Suite
            Client::Pattern<int16_t> dims;

            Client::LocalPattern<float16_t> outputProbas;
+            Client::LocalPattern<float16_t> temp;
            Client::LocalPattern<int16_t> outputPredicts;

            // Reference patterns are not loaded when we are in dump mode
@ -32,6 +33,6 @@ class BayesF16:public Client::Suite
            arm_gaussian_naive_bayes_instance_f16 bayes;

            const float16_t *inp;
-            float16_t *bufp;
+            float16_t *bufp,*tempp;

    };
--- a/Testing/Include/Benchmarks/BayesF32.h
+++ b/Testing/Include/Benchmarks/BayesF32.h
@ -17,6 +17,7 @@ class BayesF32:public Client::Suite
            Client::Pattern<int16_t> dims;

            Client::LocalPattern<float32_t> outputProbas;
+            Client::LocalPattern<float32_t> temp;
            Client::LocalPattern<int16_t> outputPredicts;

            // Reference patterns are not loaded when we are in dump mode
@ -32,6 +33,6 @@ class BayesF32:public Client::Suite
            arm_gaussian_naive_bayes_instance_f32 bayes;

            const float32_t *inp;
-            float32_t *bufp;
+            float32_t *bufp,*tempp;

    };
--- a/Testing/Include/Tests/BayesF16.h
+++ b/Testing/Include/Tests/BayesF16.h
@ -17,6 +17,7 @@ class BayesF16:public Client::Suite
            Client::Pattern<int16_t> dims;

            Client::LocalPattern<float16_t> outputProbas;
+            Client::LocalPattern<float16_t> temp;
            Client::LocalPattern<int16_t> outputPredicts;

            // Reference patterns are not loaded when we are in dump mode
--- a/Testing/Include/Tests/BayesF32.h
+++ b/Testing/Include/Tests/BayesF32.h
@ -17,6 +17,7 @@ class BayesF32:public Client::Suite
            Client::Pattern<int16_t> dims;

            Client::LocalPattern<float32_t> outputProbas;
+            Client::LocalPattern<float32_t> temp;
            Client::LocalPattern<int16_t> outputPredicts;

            // Reference patterns are not loaded when we are in dump mode
--- a/Testing/Source/Benchmarks/BayesF16.cpp
+++ b/Testing/Source/Benchmarks/BayesF16.cpp
@ -11,7 +11,7 @@

       p = arm_gaussian_naive_bayes_predict_f16(&bayes, 
                inp, 
-                bufp);
+                bufp,tempp);

    } 

@ -51,6 +51,7 @@
            predicts.reload(BayesF16::PREDICTS2_S16_ID,mgr);

            outputProbas.create(this->classNb,BayesF16::OUT_PROBA_F16_ID,mgr);
+            temp.create(this->classNb,BayesF16::OUT_PROBA_F16_ID,mgr);

            bayes.vectorDimension=this->vecDim;
            bayes.numberOfClasses=this->classNb;
@ -62,6 +63,7 @@
            this->inp = input.ptr() + nbi;

            this->bufp = outputProbas.ptr();
+            this->tempp = temp.ptr();

          }
          break;
--- a/Testing/Source/Benchmarks/BayesF32.cpp
+++ b/Testing/Source/Benchmarks/BayesF32.cpp
@ -11,7 +11,7 @@

       p = arm_gaussian_naive_bayes_predict_f32(&bayes, 
                inp, 
-                bufp);
+                bufp,tempp);

    } 

@ -51,6 +51,7 @@
            predicts.reload(BayesF32::PREDICTS2_S16_ID,mgr);

            outputProbas.create(this->classNb,BayesF32::OUT_PROBA_F32_ID,mgr);
+            temp.create(this->classNb,BayesF32::OUT_PROBA_F32_ID,mgr);

            bayes.vectorDimension=this->vecDim;
            bayes.numberOfClasses=this->classNb;
@ -62,6 +63,7 @@
            this->inp = input.ptr() + nbi;

            this->bufp = outputProbas.ptr();
+            this->tempp = temp.ptr();

          }
          break;
--- a/Testing/Source/Tests/BayesF16.cpp
+++ b/Testing/Source/Tests/BayesF16.cpp
@ -10,6 +10,7 @@
       const float16_t *inp = input.ptr();

       float16_t *bufp = outputProbas.ptr();
+       float16_t *tempp = temp.ptr();
       int16_t *p = outputPredicts.ptr();

       
@ -17,7 +18,7 @@
       {
          *p = arm_gaussian_naive_bayes_predict_f16(&bayes, 
                inp, 
-                bufp);
+                bufp,tempp);

          inp += this->vecDim;
          bufp += this->classNb;
@ -62,6 +63,7 @@
            predicts.reload(BayesF16::PREDICTS1_S16_ID,mgr);

            outputProbas.create(this->nbPatterns*this->classNb,BayesF16::OUT_PROBA_F16_ID,mgr);
+            temp.create(this->nbPatterns*this->classNb,BayesF16::OUT_PROBA_F16_ID,mgr);
            outputPredicts.create(this->nbPatterns,BayesF16::OUT_PREDICT_S16_ID,mgr);

            bayes.vectorDimension=this->vecDim;
--- a/Testing/Source/Tests/BayesF32.cpp
+++ b/Testing/Source/Tests/BayesF32.cpp
@ -10,6 +10,7 @@
       const float32_t *inp = input.ptr();

       float32_t *bufp = outputProbas.ptr();
+       float32_t *tempp = temp.ptr();
       int16_t *p = outputPredicts.ptr();

       
@ -17,7 +18,7 @@
       {
          *p = arm_gaussian_naive_bayes_predict_f32(&bayes, 
                inp, 
-                bufp);
+                bufp,tempp);

          inp += this->vecDim;
          bufp += this->classNb;
@ -62,6 +63,8 @@
            predicts.reload(BayesF32::PREDICTS1_S16_ID,mgr);

            outputProbas.create(this->nbPatterns*this->classNb,BayesF32::OUT_PROBA_F32_ID,mgr);
+            temp.create(this->nbPatterns*this->classNb,BayesF32::OUT_PROBA_F32_ID,mgr);
+
            outputPredicts.create(this->nbPatterns,BayesF32::OUT_PREDICT_S16_ID,mgr);

            bayes.vectorDimension=this->vecDim;