From 4014866174bdd60aa0880dc05e7f1e6b3fa760ee Mon Sep 17 00:00:00 2001 From: Christophe Favergeon Date: Thu, 29 Apr 2021 10:41:12 +0200 Subject: [PATCH] CMSIS-DSP: Remove some gcc compilation warnings. --- .../arm_bayes_example/arm_bayes_example_f32.c | 7 +-- Include/dsp/bayes_functions.h | 10 +++-- Include/dsp/bayes_functions_f16.h | 10 +++-- .../arm_gaussian_naive_bayes_predict_f16.c | 36 +++++++-------- .../arm_gaussian_naive_bayes_predict_f32.c | 45 +++++++++---------- .../arm_cmplx_mult_real_f16.c | 2 +- .../arm_cmplx_mult_real_f32.c | 2 +- .../arm_cmplx_mult_real_q15.c | 2 +- .../arm_cmplx_mult_real_q31.c | 2 +- .../FilteringFunctions/arm_conv_partial_f32.c | 2 +- .../arm_conv_partial_fast_q31.c | 2 +- .../FilteringFunctions/arm_conv_partial_q15.c | 2 +- .../FilteringFunctions/arm_conv_partial_q31.c | 2 +- .../FilteringFunctions/arm_conv_partial_q7.c | 2 +- Source/FilteringFunctions/arm_fir_f16.c | 8 ++-- Source/FilteringFunctions/arm_fir_f32.c | 5 +-- Source/FilteringFunctions/arm_fir_q15.c | 17 ++++--- Source/FilteringFunctions/arm_fir_q31.c | 25 ++++++++--- Source/FilteringFunctions/arm_fir_q7.c | 17 ++++--- Testing/Include/Benchmarks/BayesF16.h | 3 +- Testing/Include/Benchmarks/BayesF32.h | 3 +- Testing/Include/Tests/BayesF16.h | 1 + Testing/Include/Tests/BayesF32.h | 1 + Testing/Source/Benchmarks/BayesF16.cpp | 4 +- Testing/Source/Benchmarks/BayesF32.cpp | 4 +- Testing/Source/Tests/BayesF16.cpp | 4 +- Testing/Source/Tests/BayesF32.cpp | 5 ++- 27 files changed, 128 insertions(+), 95 deletions(-) diff --git a/Examples/ARM/arm_bayes_example/arm_bayes_example_f32.c b/Examples/ARM/arm_bayes_example/arm_bayes_example_f32.c index b0cc13a1..1be97d60 100755 --- a/Examples/ARM/arm_bayes_example/arm_bayes_example_f32.c +++ b/Examples/ARM/arm_bayes_example/arm_bayes_example_f32.c @@ -92,6 +92,7 @@ int32_t main(void) /* Result of the classifier */ float32_t result[NB_OF_CLASSES]; + float32_t temp[NB_OF_CLASSES]; float32_t maxProba; uint32_t index; @@ -105,7 +106,7 @@ int32_t main(void) in[0] = 1.5f; in[1] = 1.0f; - index = arm_gaussian_naive_bayes_predict_f32(&S, in, result); + index = arm_gaussian_naive_bayes_predict_f32(&S, in, result,temp); maxProba = result[index]; @@ -116,7 +117,7 @@ int32_t main(void) in[0] = -1.5f; in[1] = 1.0f; - index = arm_gaussian_naive_bayes_predict_f32(&S, in, result); + index = arm_gaussian_naive_bayes_predict_f32(&S, in, result,temp); maxProba = result[index]; @@ -127,7 +128,7 @@ int32_t main(void) in[0] = 0.0f; in[1] = -3.0f; - index = arm_gaussian_naive_bayes_predict_f32(&S, in, result); + index = arm_gaussian_naive_bayes_predict_f32(&S, in, result,temp); maxProba = result[index]; diff --git a/Include/dsp/bayes_functions.h b/Include/dsp/bayes_functions.h index bcbfc363..beca38ec 100755 --- a/Include/dsp/bayes_functions.h +++ b/Include/dsp/bayes_functions.h @@ -67,9 +67,10 @@ typedef struct /** * @brief Naive Gaussian Bayesian Estimator * - * @param[in] S points to a naive bayes instance structure - * @param[in] in points to the elements of the input vector. - * @param[in] pBuffer points to a buffer of length numberOfClasses + * @param[in] S points to a naive bayes instance structure + * @param[in] in points to the elements of the input vector. + * @param[out] *pOutputProbabilities points to a buffer of length numberOfClasses containing estimated probabilities + * @param[out] *pBufferB points to a temporary buffer of length numberOfClasses * @return The predicted class * */ @@ -77,7 +78,8 @@ typedef struct uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S, const float32_t * in, - float32_t *pBuffer); + float32_t *pOutputProbabilities, + float32_t *pBufferB); #ifdef __cplusplus diff --git a/Include/dsp/bayes_functions_f16.h b/Include/dsp/bayes_functions_f16.h index 506d76ef..f2c9ad82 100755 --- a/Include/dsp/bayes_functions_f16.h +++ b/Include/dsp/bayes_functions_f16.h @@ -58,9 +58,10 @@ typedef struct /** * @brief Naive Gaussian Bayesian Estimator * - * @param[in] S points to a naive bayes instance structure - * @param[in] in points to the elements of the input vector. - * @param[in] pBuffer points to a buffer of length numberOfClasses + * @param[in] S points to a naive bayes instance structure + * @param[in] in points to the elements of the input vector. + * @param[out] *pOutputProbabilities points to a buffer of length numberOfClasses containing estimated probabilities + * @param[out] *pBufferB points to a temporary buffer of length numberOfClasses * @return The predicted class * */ @@ -68,7 +69,8 @@ typedef struct uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_instance_f16 *S, const float16_t * in, - float16_t *pBuffer); + float16_t *pOutputProbabilities, + float16_t *pBufferB); #endif /*defined(ARM_FLOAT16_SUPPORTED)*/ #ifdef __cplusplus diff --git a/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c b/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c index 74ef0fc9..b918f704 100755 --- a/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c +++ b/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c @@ -43,13 +43,12 @@ /** * @brief Naive Gaussian Bayesian Estimator * - * @param[in] *S points to a naive bayes instance structure - * @param[in] *in points to the elements of the input vector. - * @param[in] *pBuffer points to a buffer of length numberOfClasses + * @param[in] *S points to a naive bayes instance structure + * @param[in] *in points to the elements of the input vector. + * @param[out] *pOutputProbabilities points to a buffer of length numberOfClasses containing estimated probabilities + * @param[out] *pBufferB points to a temporary buffer of length numberOfClasses * @return The predicted class * - * @par If the number of classes is big, MVE version will consume lot of - * stack since the log prior are computed on the stack. * */ @@ -60,19 +59,21 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_instance_f16 *S, const float16_t * in, - float16_t *pBuffer) + float16_t *pOutputProbabilities, + float16_t *pBufferB + ) { uint32_t nbClass; const float16_t *pTheta = S->theta; const float16_t *pSigma = S->sigma; - float16_t *buffer = pBuffer; + float16_t *buffer = pOutputProbabilities; const float16_t *pIn = in; float16_t result; f16x8_t vsigma; _Float16 tmp; f16x8_t vacc1, vacc2; uint32_t index; - float16_t logclassPriors[S->numberOfClasses]; + float16_t *logclassPriors=pBufferB; float16_t *pLogPrior = logclassPriors; arm_vlog_f16((float16_t *) S->classPriors, logclassPriors, S->numberOfClasses); @@ -135,38 +136,31 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins buffer++; } - arm_max_f16(pBuffer, S->numberOfClasses, &result, &index); + arm_max_f16(pOutputProbabilities, S->numberOfClasses, &result, &index); return (index); } #else -/** - * @brief Naive Gaussian Bayesian Estimator - * - * @param[in] *S points to a naive bayes instance structure - * @param[in] *in points to the elements of the input vector. - * @param[in] *pBuffer points to a buffer of length numberOfClasses - * @return The predicted class - * - */ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_instance_f16 *S, const float16_t * in, - float16_t *pBuffer) + float16_t *pOutputProbabilities, + float16_t *pBufferB) { uint32_t nbClass; uint32_t nbDim; const float16_t *pPrior = S->classPriors; const float16_t *pTheta = S->theta; const float16_t *pSigma = S->sigma; - float16_t *buffer = pBuffer; + float16_t *buffer = pOutputProbabilities; const float16_t *pIn=in; float16_t result; _Float16 sigma; _Float16 tmp; _Float16 acc1,acc2; uint32_t index; + (void)pBufferB; pTheta=S->theta; pSigma=S->sigma; @@ -199,7 +193,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins buffer++; } - arm_max_f16(pBuffer,S->numberOfClasses,&result,&index); + arm_max_f16(pOutputProbabilities,S->numberOfClasses,&result,&index); return(index); } diff --git a/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f32.c b/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f32.c index f6873658..56331ff5 100755 --- a/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f32.c +++ b/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f32.c @@ -41,13 +41,12 @@ /** * @brief Naive Gaussian Bayesian Estimator * - * @param[in] *S points to a naive bayes instance structure - * @param[in] *in points to the elements of the input vector. - * @param[in] *pBuffer points to a buffer of length numberOfClasses + * @param[in] *S points to a naive bayes instance structure + * @param[in] *in points to the elements of the input vector. + * @param[out] *pOutputProbabilities points to a buffer of length numberOfClasses containing estimated probabilities + * @param[out] *pBufferB points to a temporary buffer of length numberOfClasses * @return The predicted class * - * @par If the number of classes is big, MVE version will consume lot of - * stack since the log prior are computed on the stack. * */ @@ -58,19 +57,21 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S, const float32_t * in, - float32_t *pBuffer) + float32_t *pOutputProbabilities, + float32_t *pBufferB + ) { uint32_t nbClass; const float32_t *pTheta = S->theta; const float32_t *pSigma = S->sigma; - float32_t *buffer = pBuffer; + float32_t *buffer = pOutputProbabilities; const float32_t *pIn = in; float32_t result; f32x4_t vsigma; float32_t tmp; f32x4_t vacc1, vacc2; uint32_t index; - float32_t logclassPriors[S->numberOfClasses]; + float32_t *logclassPriors=pBufferB; float32_t *pLogPrior = logclassPriors; arm_vlog_f32((float32_t *) S->classPriors, logclassPriors, S->numberOfClasses); @@ -133,7 +134,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins buffer++; } - arm_max_f32(pBuffer, S->numberOfClasses, &result, &index); + arm_max_f32(pOutputProbabilities, S->numberOfClasses, &result, &index); return (index); } @@ -148,7 +149,8 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S, const float32_t * in, - float32_t *pBuffer) + float32_t *pOutputProbabilities, + float32_t *pBufferB) { const float32_t *pPrior = S->classPriors; @@ -159,7 +161,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins const float32_t *pTheta1 = S->theta + S->vectorDimension; const float32_t *pSigma1 = S->sigma + S->vectorDimension; - float32_t *buffer = pBuffer; + float32_t *buffer = pOutputProbabilities; const float32_t *pIn=in; float32_t result; @@ -174,6 +176,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins float32x2_t tmpV2; float32x4_t thetaV,thetaV1; float32x4_t inV; + (void)pBufferB; epsilonV = vdupq_n_f32(S->epsilon); @@ -322,32 +325,24 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins classBlkCnt--; } - arm_max_f32(pBuffer,S->numberOfClasses,&result,&index); + arm_max_f32(pOutputProbabilities,S->numberOfClasses,&result,&index); return(index); } #else -/** - * @brief Naive Gaussian Bayesian Estimator - * - * @param[in] *S points to a naive bayes instance structure - * @param[in] *in points to the elements of the input vector. - * @param[in] *pBuffer points to a buffer of length numberOfClasses - * @return The predicted class - * - */ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_instance_f32 *S, const float32_t * in, - float32_t *pBuffer) + float32_t *pOutputProbabilities, + float32_t *pBufferB) { uint32_t nbClass; uint32_t nbDim; const float32_t *pPrior = S->classPriors; const float32_t *pTheta = S->theta; const float32_t *pSigma = S->sigma; - float32_t *buffer = pBuffer; + float32_t *buffer = pOutputProbabilities; const float32_t *pIn=in; float32_t result; float32_t sigma; @@ -355,6 +350,8 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins float32_t acc1,acc2; uint32_t index; + (void)pBufferB; + pTheta=S->theta; pSigma=S->sigma; @@ -386,7 +383,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins buffer++; } - arm_max_f32(pBuffer,S->numberOfClasses,&result,&index); + arm_max_f32(pOutputProbabilities,S->numberOfClasses,&result,&index); return(index); } diff --git a/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c b/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c index 3d2711b3..248858bc 100755 --- a/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c +++ b/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c @@ -79,7 +79,7 @@ void arm_cmplx_mult_real_f16( float16_t * pCmplxDst, uint32_t numSamples) { - const static uint16_t stride_cmplx_x_real_16[8] = { + static const uint16_t stride_cmplx_x_real_16[8] = { 0, 0, 1, 1, 2, 2, 3, 3 }; uint32_t blockSizeC = numSamples * CMPLX_DIM; /* loop counters */ diff --git a/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c b/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c index c301f700..bc0f59b2 100644 --- a/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c +++ b/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c @@ -77,7 +77,7 @@ void arm_cmplx_mult_real_f32( float32_t * pCmplxDst, uint32_t numSamples) { - const static uint32_t stride_cmplx_x_real_32[4] = { 0, 0, 1, 1 }; + static const uint32_t stride_cmplx_x_real_32[4] = { 0, 0, 1, 1 }; uint32_t blockSizeC = numSamples * CMPLX_DIM; /* loop counters */ uint32_t blkCnt; diff --git a/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c b/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c index fb596923..a43383b7 100644 --- a/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c +++ b/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c @@ -57,7 +57,7 @@ void arm_cmplx_mult_real_q15( q15_t * pCmplxDst, uint32_t numSamples) { - const static uint16_t stride_cmplx_x_real_16[8] = { + static const uint16_t stride_cmplx_x_real_16[8] = { 0, 0, 1, 1, 2, 2, 3, 3 }; q15x8_t rVec; diff --git a/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c b/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c index 3741e496..de13757d 100644 --- a/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c +++ b/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c @@ -58,7 +58,7 @@ void arm_cmplx_mult_real_q31( uint32_t numSamples) { - const static uint32_t stride_cmplx_x_real_32[4] = { + static const uint32_t stride_cmplx_x_real_32[4] = { 0, 0, 1, 1 }; q31x4_t rVec; diff --git a/Source/FilteringFunctions/arm_conv_partial_f32.c b/Source/FilteringFunctions/arm_conv_partial_f32.c index 6309cc85..a8a9bd1a 100644 --- a/Source/FilteringFunctions/arm_conv_partial_f32.c +++ b/Source/FilteringFunctions/arm_conv_partial_f32.c @@ -142,7 +142,7 @@ arm_status arm_conv_partial_f32( blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0; blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3; blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex; - blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : numPoints) : 0; + blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : (int32_t)numPoints) : 0; blockSize2 = ((int32_t) check - blockSize3) - (blockSize1 + (int32_t) firstIndex); blockSize2 = (blockSize2 > 0) ? blockSize2 : 0; diff --git a/Source/FilteringFunctions/arm_conv_partial_fast_q31.c b/Source/FilteringFunctions/arm_conv_partial_fast_q31.c index c8b14770..65269bc9 100644 --- a/Source/FilteringFunctions/arm_conv_partial_fast_q31.c +++ b/Source/FilteringFunctions/arm_conv_partial_fast_q31.c @@ -118,7 +118,7 @@ arm_status arm_conv_partial_fast_q31( blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0; blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3; blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex; - blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : numPoints) : 0; + blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : (int32_t)numPoints) : 0; blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + (int32_t) firstIndex); blockSize2 = (blockSize2 > 0) ? blockSize2 : 0; diff --git a/Source/FilteringFunctions/arm_conv_partial_q15.c b/Source/FilteringFunctions/arm_conv_partial_q15.c index 335dd145..cfab5168 100644 --- a/Source/FilteringFunctions/arm_conv_partial_q15.c +++ b/Source/FilteringFunctions/arm_conv_partial_q15.c @@ -119,7 +119,7 @@ arm_status arm_conv_partial_q15( blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0; blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3; blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex; - blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : numPoints) : 0; + blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : (int32_t)numPoints) : 0; blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + (int32_t) firstIndex); blockSize2 = (blockSize2 > 0) ? blockSize2 : 0; diff --git a/Source/FilteringFunctions/arm_conv_partial_q31.c b/Source/FilteringFunctions/arm_conv_partial_q31.c index 07343c5c..bcd52983 100644 --- a/Source/FilteringFunctions/arm_conv_partial_q31.c +++ b/Source/FilteringFunctions/arm_conv_partial_q31.c @@ -121,7 +121,7 @@ arm_status arm_conv_partial_q31( blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0; blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3; blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex; - blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : numPoints) : 0; + blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : (int32_t)numPoints) : 0; blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + (int32_t) firstIndex); blockSize2 = (blockSize2 > 0) ? blockSize2 : 0; diff --git a/Source/FilteringFunctions/arm_conv_partial_q7.c b/Source/FilteringFunctions/arm_conv_partial_q7.c index b9bac123..116a8e69 100644 --- a/Source/FilteringFunctions/arm_conv_partial_q7.c +++ b/Source/FilteringFunctions/arm_conv_partial_q7.c @@ -123,7 +123,7 @@ arm_status arm_conv_partial_q7( blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0; blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3; blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex; - blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : numPoints) : 0; + blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : (int32_t)numPoints) : 0; blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + (int32_t) firstIndex); blockSize2 = (blockSize2 > 0) ? blockSize2 : 0; diff --git a/Source/FilteringFunctions/arm_fir_f16.c b/Source/FilteringFunctions/arm_fir_f16.c index 7d90e8f1..ad198dcf 100755 --- a/Source/FilteringFunctions/arm_fir_f16.c +++ b/Source/FilteringFunctions/arm_fir_f16.c @@ -59,6 +59,7 @@ vecAcc0 = vfmaq(vecAcc0, vecIn0, c[i]); \ } +#define NB_TAPS 4 __STATIC_INLINE void arm_fir_f16_1_4_mve(const arm_fir_instance_f16 * S, const float16_t * __restrict pSrc, float16_t * __restrict pDst, uint32_t blockSize) @@ -74,7 +75,6 @@ __STATIC_INLINE void arm_fir_f16_1_4_mve(const arm_fir_instance_f16 * S, int32_t blkCnt; float16x8_t vecIn0; float16x8_t vecAcc0; - const int NB_TAPS=4; float16_t c[NB_TAPS]; @@ -147,8 +147,9 @@ __STATIC_INLINE void arm_fir_f16_1_4_mve(const arm_fir_instance_f16 * S, } } +#undef NB_TAPS - +#define NB_TAPS 8 __STATIC_INLINE void arm_fir_f16_5_8_mve(const arm_fir_instance_f16 * S, const float16_t * __restrict pSrc, float16_t * __restrict pDst, uint32_t blockSize) @@ -164,7 +165,6 @@ __STATIC_INLINE void arm_fir_f16_5_8_mve(const arm_fir_instance_f16 * S, int32_t blkCnt; float16x8_t vecIn0; float16x8_t vecAcc0; - const int NB_TAPS=8; float16_t c[NB_TAPS]; @@ -237,7 +237,7 @@ __STATIC_INLINE void arm_fir_f16_5_8_mve(const arm_fir_instance_f16 * S, } } - +#undef NB_TAPS void arm_fir_f16(const arm_fir_instance_f16 * S, const float16_t * pSrc, diff --git a/Source/FilteringFunctions/arm_fir_f32.c b/Source/FilteringFunctions/arm_fir_f32.c index b1c01818..f8c1d806 100644 --- a/Source/FilteringFunctions/arm_fir_f32.c +++ b/Source/FilteringFunctions/arm_fir_f32.c @@ -160,6 +160,7 @@ } +#define NB_TAPS 4 __STATIC_INLINE void arm_fir_f32_1_4_mve(const arm_fir_instance_f32 * S, const float32_t * __restrict pSrc, float32_t * __restrict pDst, uint32_t blockSize) @@ -176,7 +177,6 @@ __STATIC_INLINE void arm_fir_f32_1_4_mve(const arm_fir_instance_f32 * S, int32_t blkCnt; float32x4_t vecIn0; float32x4_t vecAcc0; - const int NB_TAPS=4; float32_t c[NB_TAPS]; const float32_t *pCoeffsCur = pCoeffs; @@ -243,8 +243,7 @@ __STATIC_INLINE void arm_fir_f32_1_4_mve(const arm_fir_instance_f32 * S, } while (blkCnt > 0); } - - +#undef NB_TAPS __STATIC_INLINE void arm_fir_f32_5_8_mve(const arm_fir_instance_f32 * S, const float32_t * __restrict pSrc, diff --git a/Source/FilteringFunctions/arm_fir_q15.c b/Source/FilteringFunctions/arm_fir_q15.c index 2880d9ee..f00959b8 100644 --- a/Source/FilteringFunctions/arm_fir_q15.c +++ b/Source/FilteringFunctions/arm_fir_q15.c @@ -86,14 +86,13 @@ uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */\ int32_t blkCnt; \ q15x8_t vecIn0; \ - const int32_t nbVecTaps = (NBTAPS / 8); \ \ /* \ * load coefs \ */ \ - q15x8_t vecCoeffs[nbVecTaps]; \ + q15x8_t vecCoeffs[NBVECTAPS]; \ \ - for (int i = 0; i < nbVecTaps; i++) \ + for (int i = 0; i < NBVECTAPS; i++) \ vecCoeffs[i] = vldrhq_s16(pCoeffs + 8 * i); \ \ /* \ @@ -114,7 +113,7 @@ pStateCur += 4; \ pTempSrc += 4; \ \ - FIR_Q15_CORE(pOutput, 4, nbVecTaps, pSamples, vecCoeffs); \ + FIR_Q15_CORE(pOutput, 4, NBVECTAPS, pSamples, vecCoeffs); \ pSamples += 4; \ \ blkCnt--; \ @@ -126,7 +125,7 @@ for (int i = 0; i < residual; i++) \ *pStateCur++ = *pTempSrc++; \ \ - FIR_Q15_CORE(pOutput, residual, nbVecTaps, pSamples, vecCoeffs); \ + FIR_Q15_CORE(pOutput, residual, NBVECTAPS, pSamples, vecCoeffs); \ \ /* \ * Copy the samples back into the history buffer start \ @@ -156,7 +155,9 @@ static void arm_fir_q15_25_32_mve(const arm_fir_instance_q15 * S, q15_t * __restrict pDst, uint32_t blockSize) { #define NBTAPS 32 + #define NBVECTAPS (NBTAPS / 8) FIR_Q15_MAIN_CORE(); + #undef NBVECTAPS #undef NBTAPS } @@ -165,7 +166,9 @@ static void arm_fir_q15_17_24_mve(const arm_fir_instance_q15 * S, q15_t * __restrict pDst, uint32_t blockSize) { #define NBTAPS 24 + #define NBVECTAPS (NBTAPS / 8) FIR_Q15_MAIN_CORE(); + #undef NBVECTAPS #undef NBTAPS } @@ -175,7 +178,9 @@ static void arm_fir_q15_9_16_mve(const arm_fir_instance_q15 * S, q15_t * __restrict pDst, uint32_t blockSize) { #define NBTAPS 16 + #define NBVECTAPS (NBTAPS / 8) FIR_Q15_MAIN_CORE(); + #undef NBVECTAPS #undef NBTAPS } @@ -184,7 +189,9 @@ static void arm_fir_q15_1_8_mve(const arm_fir_instance_q15 * S, q15_t * __restrict pDst, uint32_t blockSize) { #define NBTAPS 8 + #define NBVECTAPS (NBTAPS / 8) FIR_Q15_MAIN_CORE(); + #undef NBVECTAPS #undef NBTAPS } diff --git a/Source/FilteringFunctions/arm_fir_q31.c b/Source/FilteringFunctions/arm_fir_q31.c index b64ee7f4..40fe5270 100644 --- a/Source/FilteringFunctions/arm_fir_q31.c +++ b/Source/FilteringFunctions/arm_fir_q31.c @@ -117,14 +117,13 @@ q31_t *pTempDest; /* Temporary pointer to the destination buffer */\ uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */\ int32_t blkCnt; \ - const int32_t nbVecTaps = (NBTAPS / 4); \ \ /* \ * load coefs \ */ \ - q31x4_t vecCoeffs[nbVecTaps]; \ + q31x4_t vecCoeffs[NBVECTAPS]; \ \ - for (int i = 0; i < nbVecTaps; i++) \ + for (int i = 0; i < NBVECTAPS; i++) \ vecCoeffs[i] = vld1q(pCoeffs + 4 * i); \ \ /* \ @@ -145,7 +144,7 @@ pStateCur += 4; \ pTempSrc += 4; \ \ - FIR_Q31_CORE(4, nbVecTaps, pSamples, vecCoeffs); \ + FIR_Q31_CORE(4, NBVECTAPS, pSamples, vecCoeffs); \ \ pSamples += 4; \ /* \ @@ -162,7 +161,7 @@ for (int i = 0; i < residual; i++) \ *pStateCur++ = *pTempSrc++; \ \ - FIR_Q31_CORE(3, nbVecTaps, pSamples, vecCoeffs); \ + FIR_Q31_CORE(3, NBVECTAPS, pSamples, vecCoeffs); \ } \ break; \ \ @@ -171,7 +170,7 @@ for (int i = 0; i < residual; i++) \ *pStateCur++ = *pTempSrc++; \ \ - FIR_Q31_CORE(2, nbVecTaps, pSamples, vecCoeffs); \ + FIR_Q31_CORE(2, NBVECTAPS, pSamples, vecCoeffs); \ } \ break; \ \ @@ -180,7 +179,7 @@ for (int i = 0; i < residual; i++) \ *pStateCur++ = *pTempSrc++; \ \ - FIR_Q31_CORE(1, nbVecTaps, pSamples, vecCoeffs); \ + FIR_Q31_CORE(1, NBVECTAPS, pSamples, vecCoeffs); \ } \ break; \ } \ @@ -382,7 +381,9 @@ static void arm_fir_q31_5_8_mve(const arm_fir_instance_q31 * S, q31_t * __restrict pDst, uint32_t blockSize) { #define NBTAPS 8 + #define NBVECTAPS (NBTAPS / 4) FIR_Q31_MAIN_CORE(); + #undef NBVECTAPS #undef NBTAPS } @@ -392,7 +393,9 @@ static void arm_fir_q31_9_12_mve(const arm_fir_instance_q31 * S, q31_t * __restrict pDst, uint32_t blockSize) { #define NBTAPS 12 + #define NBVECTAPS (NBTAPS / 4) FIR_Q31_MAIN_CORE(); + #undef NBVECTAPS #undef NBTAPS } @@ -402,7 +405,9 @@ static void arm_fir_q31_13_16_mve(const arm_fir_instance_q31 * S, q31_t * __restrict pDst, uint32_t blockSize) { #define NBTAPS 16 + #define NBVECTAPS (NBTAPS / 4) FIR_Q31_MAIN_CORE(); + #undef NBVECTAPS #undef NBTAPS } @@ -412,7 +417,9 @@ static void arm_fir_q31_17_20_mve(const arm_fir_instance_q31 * S, q31_t * __restrict pDst, uint32_t blockSize) { #define NBTAPS 20 + #define NBVECTAPS (NBTAPS / 4) FIR_Q31_MAIN_CORE(); + #undef NBVECTAPS #undef NBTAPS } @@ -422,7 +429,9 @@ static void arm_fir_q31_21_24_mve(const arm_fir_instance_q31 * S, q31_t * __restrict pDst, uint32_t blockSize) { #define NBTAPS 24 + #define NBVECTAPS (NBTAPS / 4) FIR_Q31_MAIN_CORE(); + #undef NBVECTAPS #undef NBTAPS } @@ -432,7 +441,9 @@ static void arm_fir_q31_25_28_mve(const arm_fir_instance_q31 * S, q31_t * __restrict pDst, uint32_t blockSize) { #define NBTAPS 28 + #define NBVECTAPS (NBTAPS / 4) FIR_Q31_MAIN_CORE(); + #undef NBVECTAPS #undef NBTAPS } diff --git a/Source/FilteringFunctions/arm_fir_q7.c b/Source/FilteringFunctions/arm_fir_q7.c index e2972cdb..c05fa321 100755 --- a/Source/FilteringFunctions/arm_fir_q7.c +++ b/Source/FilteringFunctions/arm_fir_q7.c @@ -81,14 +81,13 @@ uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */\ int32_t blkCnt; \ q7x16_t vecIn0; \ - const int32_t nbVecTaps = (NBTAPS / 16); \ \ /* \ * load coefs \ */ \ - q7x16_t vecCoeffs[nbVecTaps]; \ + q7x16_t vecCoeffs[NBVECTAPS]; \ \ - for (int i = 0; i < nbVecTaps; i++) \ + for (int i = 0; i < NBVECTAPS; i++) \ vecCoeffs[i] = vldrbq_s8(pCoeffs + 16 * i); \ \ /* \ @@ -109,7 +108,7 @@ pStateCur += 4; \ pTempSrc += 4; \ \ - FIR_Q7_CORE(pOutput, 4, nbVecTaps, pSamples, vecCoeffs); \ + FIR_Q7_CORE(pOutput, 4, NBVECTAPS, pSamples, vecCoeffs); \ pSamples += 4; \ \ blkCnt--; \ @@ -121,7 +120,7 @@ for (int i = 0; i < residual; i++) \ *pStateCur++ = *pTempSrc++; \ \ - FIR_Q7_CORE(pOutput, residual, nbVecTaps, pSamples, vecCoeffs); \ + FIR_Q7_CORE(pOutput, residual, NBVECTAPS, pSamples, vecCoeffs); \ \ \ /* \ @@ -147,7 +146,9 @@ static void arm_fir_q7_49_64_mve(const arm_fir_instance_q7 * S, q7_t * __restrict pDst, uint32_t blockSize) { #define NBTAPS 64 + #define NBVECTAPS (NBTAPS / 16) FIR_Q7_MAIN_CORE(); + #undef NBVECTAPS #undef NBTAPS } @@ -157,7 +158,9 @@ void arm_fir_q7_33_48_mve(const arm_fir_instance_q7 * S, q7_t * __restrict pDst, uint32_t blockSize) { #define NBTAPS 48 + #define NBVECTAPS (NBTAPS / 16) FIR_Q7_MAIN_CORE(); + #undef NBVECTAPS #undef NBTAPS } @@ -166,7 +169,9 @@ static void arm_fir_q7_17_32_mve(const arm_fir_instance_q7 * S, q7_t * __restrict pDst, uint32_t blockSize) { #define NBTAPS 32 + #define NBVECTAPS (NBTAPS / 16) FIR_Q7_MAIN_CORE(); + #undef NBVECTAPS #undef NBTAPS } @@ -176,7 +181,9 @@ void arm_fir_q7_1_16_mve(const arm_fir_instance_q7 * S, q7_t * __restrict pDst, uint32_t blockSize) { #define NBTAPS 16 + #define NBVECTAPS (NBTAPS / 16) FIR_Q7_MAIN_CORE(); + #undef NBVECTAPS #undef NBTAPS } diff --git a/Testing/Include/Benchmarks/BayesF16.h b/Testing/Include/Benchmarks/BayesF16.h index 37924f14..59cc855a 100755 --- a/Testing/Include/Benchmarks/BayesF16.h +++ b/Testing/Include/Benchmarks/BayesF16.h @@ -17,6 +17,7 @@ class BayesF16:public Client::Suite Client::Pattern dims; Client::LocalPattern outputProbas; + Client::LocalPattern temp; Client::LocalPattern outputPredicts; // Reference patterns are not loaded when we are in dump mode @@ -32,6 +33,6 @@ class BayesF16:public Client::Suite arm_gaussian_naive_bayes_instance_f16 bayes; const float16_t *inp; - float16_t *bufp; + float16_t *bufp,*tempp; }; diff --git a/Testing/Include/Benchmarks/BayesF32.h b/Testing/Include/Benchmarks/BayesF32.h index d1f77acf..955f16d0 100755 --- a/Testing/Include/Benchmarks/BayesF32.h +++ b/Testing/Include/Benchmarks/BayesF32.h @@ -17,6 +17,7 @@ class BayesF32:public Client::Suite Client::Pattern dims; Client::LocalPattern outputProbas; + Client::LocalPattern temp; Client::LocalPattern outputPredicts; // Reference patterns are not loaded when we are in dump mode @@ -32,6 +33,6 @@ class BayesF32:public Client::Suite arm_gaussian_naive_bayes_instance_f32 bayes; const float32_t *inp; - float32_t *bufp; + float32_t *bufp,*tempp; }; diff --git a/Testing/Include/Tests/BayesF16.h b/Testing/Include/Tests/BayesF16.h index 34869785..ff67b4db 100755 --- a/Testing/Include/Tests/BayesF16.h +++ b/Testing/Include/Tests/BayesF16.h @@ -17,6 +17,7 @@ class BayesF16:public Client::Suite Client::Pattern dims; Client::LocalPattern outputProbas; + Client::LocalPattern temp; Client::LocalPattern outputPredicts; // Reference patterns are not loaded when we are in dump mode diff --git a/Testing/Include/Tests/BayesF32.h b/Testing/Include/Tests/BayesF32.h index f839dd95..ef60dee1 100755 --- a/Testing/Include/Tests/BayesF32.h +++ b/Testing/Include/Tests/BayesF32.h @@ -17,6 +17,7 @@ class BayesF32:public Client::Suite Client::Pattern dims; Client::LocalPattern outputProbas; + Client::LocalPattern temp; Client::LocalPattern outputPredicts; // Reference patterns are not loaded when we are in dump mode diff --git a/Testing/Source/Benchmarks/BayesF16.cpp b/Testing/Source/Benchmarks/BayesF16.cpp index eabf6a86..f3a24530 100755 --- a/Testing/Source/Benchmarks/BayesF16.cpp +++ b/Testing/Source/Benchmarks/BayesF16.cpp @@ -11,7 +11,7 @@ p = arm_gaussian_naive_bayes_predict_f16(&bayes, inp, - bufp); + bufp,tempp); } @@ -51,6 +51,7 @@ predicts.reload(BayesF16::PREDICTS2_S16_ID,mgr); outputProbas.create(this->classNb,BayesF16::OUT_PROBA_F16_ID,mgr); + temp.create(this->classNb,BayesF16::OUT_PROBA_F16_ID,mgr); bayes.vectorDimension=this->vecDim; bayes.numberOfClasses=this->classNb; @@ -62,6 +63,7 @@ this->inp = input.ptr() + nbi; this->bufp = outputProbas.ptr(); + this->tempp = temp.ptr(); } break; diff --git a/Testing/Source/Benchmarks/BayesF32.cpp b/Testing/Source/Benchmarks/BayesF32.cpp index 406e72bc..2f0753c4 100755 --- a/Testing/Source/Benchmarks/BayesF32.cpp +++ b/Testing/Source/Benchmarks/BayesF32.cpp @@ -11,7 +11,7 @@ p = arm_gaussian_naive_bayes_predict_f32(&bayes, inp, - bufp); + bufp,tempp); } @@ -51,6 +51,7 @@ predicts.reload(BayesF32::PREDICTS2_S16_ID,mgr); outputProbas.create(this->classNb,BayesF32::OUT_PROBA_F32_ID,mgr); + temp.create(this->classNb,BayesF32::OUT_PROBA_F32_ID,mgr); bayes.vectorDimension=this->vecDim; bayes.numberOfClasses=this->classNb; @@ -62,6 +63,7 @@ this->inp = input.ptr() + nbi; this->bufp = outputProbas.ptr(); + this->tempp = temp.ptr(); } break; diff --git a/Testing/Source/Tests/BayesF16.cpp b/Testing/Source/Tests/BayesF16.cpp index b2d8ed54..a544dfc9 100755 --- a/Testing/Source/Tests/BayesF16.cpp +++ b/Testing/Source/Tests/BayesF16.cpp @@ -10,6 +10,7 @@ const float16_t *inp = input.ptr(); float16_t *bufp = outputProbas.ptr(); + float16_t *tempp = temp.ptr(); int16_t *p = outputPredicts.ptr(); @@ -17,7 +18,7 @@ { *p = arm_gaussian_naive_bayes_predict_f16(&bayes, inp, - bufp); + bufp,tempp); inp += this->vecDim; bufp += this->classNb; @@ -62,6 +63,7 @@ predicts.reload(BayesF16::PREDICTS1_S16_ID,mgr); outputProbas.create(this->nbPatterns*this->classNb,BayesF16::OUT_PROBA_F16_ID,mgr); + temp.create(this->nbPatterns*this->classNb,BayesF16::OUT_PROBA_F16_ID,mgr); outputPredicts.create(this->nbPatterns,BayesF16::OUT_PREDICT_S16_ID,mgr); bayes.vectorDimension=this->vecDim; diff --git a/Testing/Source/Tests/BayesF32.cpp b/Testing/Source/Tests/BayesF32.cpp index eb4b3058..3504c27d 100755 --- a/Testing/Source/Tests/BayesF32.cpp +++ b/Testing/Source/Tests/BayesF32.cpp @@ -10,6 +10,7 @@ const float32_t *inp = input.ptr(); float32_t *bufp = outputProbas.ptr(); + float32_t *tempp = temp.ptr(); int16_t *p = outputPredicts.ptr(); @@ -17,7 +18,7 @@ { *p = arm_gaussian_naive_bayes_predict_f32(&bayes, inp, - bufp); + bufp,tempp); inp += this->vecDim; bufp += this->classNb; @@ -62,6 +63,8 @@ predicts.reload(BayesF32::PREDICTS1_S16_ID,mgr); outputProbas.create(this->nbPatterns*this->classNb,BayesF32::OUT_PROBA_F32_ID,mgr); + temp.create(this->nbPatterns*this->classNb,BayesF32::OUT_PROBA_F32_ID,mgr); + outputPredicts.create(this->nbPatterns,BayesF32::OUT_PREDICT_S16_ID,mgr); bayes.vectorDimension=this->vecDim;