diff --git a/Include/arm_helium_utils.h b/Include/arm_helium_utils.h index 826ecf84..915ad7ca 100755 --- a/Include/arm_helium_utils.h +++ b/Include/arm_helium_utils.h @@ -72,7 +72,7 @@ __STATIC_FORCEINLINE float16_t vecAddAcrossF16Mve(float16x8_t in) in = vaddq_f16(tmpVec, in); tmpVec = (float16x8_t) vrev64q_s32((int32x4_t) in); in = vaddq_f16(tmpVec, in); - acc = vgetq_lane_f16(in, 0) + vgetq_lane_f16(in, 4); + acc = (_Float16)vgetq_lane_f16(in, 0) + (_Float16)vgetq_lane_f16(in, 4); return acc; } diff --git a/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c b/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c index 6503766d..c15bd83d 100755 --- a/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c +++ b/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f16.c @@ -67,7 +67,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins const float16_t *pIn = in; float16_t result; f16x8_t vsigma; - float16_t tmp; + _Float16 tmp; f16x8_t vacc1, vacc2; uint32_t index; float16_t logclassPriors[S->numberOfClasses]; @@ -81,15 +81,15 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins for (nbClass = 0; nbClass < S->numberOfClasses; nbClass++) { pIn = in; - vacc1 = vdupq_n_f16(0); - vacc2 = vdupq_n_f16(0); + vacc1 = vdupq_n_f16(0.0f16); + vacc2 = vdupq_n_f16(0.0f16); uint32_t blkCnt =S->vectorDimension >> 3; while (blkCnt > 0U) { f16x8_t vinvSigma, vtmp; vsigma = vaddq_n_f16(vld1q(pSigma), S->epsilon); - vacc1 = vaddq(vacc1, vlogq_f16(vmulq_n_f16(vsigma, 2.0f * PI))); + vacc1 = vaddq(vacc1, vlogq_f16(vmulq_n_f16(vsigma, 2.0f16 * (_Float16)PI))); vinvSigma = vrecip_medprec_f16(vsigma); @@ -112,7 +112,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins vsigma = vaddq_n_f16(vld1q(pSigma), S->epsilon); vacc1 = - vaddq_m_f16(vacc1, vacc1, vlogq_f16(vmulq_n_f16(vsigma, 2.0f * PI)), p0); + vaddq_m_f16(vacc1, vacc1, vlogq_f16(vmulq_n_f16(vsigma, 2.0f16 * (_Float16)PI)), p0); vinvSigma = vrecip_medprec_f16(vsigma); @@ -126,8 +126,8 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins pSigma += blkCnt; } - tmp = -0.5f * vecAddAcrossF16Mve(vacc1); - tmp -= 0.5f * vecAddAcrossF16Mve(vacc2); + tmp = -0.5f16 * (_Float16)vecAddAcrossF16Mve(vacc1); + tmp -= 0.5f16 * (_Float16)vecAddAcrossF16Mve(vacc2); *buffer = tmp + *pLogPrior++; buffer++; @@ -175,13 +175,13 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins pIn = in; - tmp = 0.0; - acc1 = 0.0f; - acc2 = 0.0f; + tmp = 0.0f16; + acc1 = 0.0f16; + acc2 = 0.0f16; for(nbDim = 0; nbDim < S->vectorDimension; nbDim++) { sigma = *pSigma + S->epsilon; - acc1 += logf(2.0f * PI_F * sigma); + acc1 += logf(2.0f16 * (_Float16)PI_F * sigma); acc2 += (*pIn - *pTheta) * (*pIn - *pTheta) / sigma; pIn++; @@ -189,8 +189,8 @@ uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_ins pSigma++; } - tmp = -0.5f * acc1; - tmp -= 0.5f * acc2; + tmp = -0.5f16 * acc1; + tmp -= 0.5f16 * acc2; *buffer = tmp + logf(*pPrior++); diff --git a/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c b/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c index 00ec3bdd..f13ebcdc 100755 --- a/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c +++ b/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c @@ -172,7 +172,7 @@ void arm_cmplx_mag_f16( uint32_t numSamples) { uint32_t blkCnt; /* loop counter */ - float16_t real, imag; /* Temporary variables to hold input values */ + _Float16 real, imag; /* Temporary variables to hold input values */ #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) diff --git a/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c b/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c index e110ace8..03e3696f 100755 --- a/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c +++ b/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c @@ -107,7 +107,7 @@ void arm_cmplx_mag_squared_f16( uint32_t numSamples) { uint32_t blkCnt; /* Loop counter */ - float16_t real, imag; /* Temporary input variables */ + _Float16 real, imag; /* Temporary input variables */ #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) diff --git a/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c b/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c index 7f876a99..14fde480 100755 --- a/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c +++ b/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c @@ -103,7 +103,7 @@ void arm_cmplx_mult_cmplx_f16( pDst += 8; } - float16_t a, b, c, d; /* Temporary variables to store real and imaginary values */ + _Float16 a, b, c, d; /* Temporary variables to store real and imaginary values */ /* Tail */ blkCnt = (blockSize & 7) >> 1; while (blkCnt > 0) @@ -134,7 +134,7 @@ void arm_cmplx_mult_cmplx_f16( uint32_t numSamples) { uint32_t blkCnt; /* Loop counter */ - float16_t a, b, c, d; /* Temporary variables to store real and imaginary values */ + _Float16 a, b, c, d; /* Temporary variables to store real and imaginary values */ #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) diff --git a/Source/DistanceFunctions/arm_braycurtis_distance_f16.c b/Source/DistanceFunctions/arm_braycurtis_distance_f16.c index 8b182227..964c55af 100755 --- a/Source/DistanceFunctions/arm_braycurtis_distance_f16.c +++ b/Source/DistanceFunctions/arm_braycurtis_distance_f16.c @@ -72,7 +72,7 @@ float16_t arm_braycurtis_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize) { - float16_t accumDiff = 0.0f, accumSum = 0.0f; + _Float16 accumDiff = 0.0f, accumSum = 0.0f; uint32_t blkCnt; f16x8_t a, b, c, accumDiffV, accumSumV; @@ -125,14 +125,14 @@ float16_t arm_braycurtis_distance_f16(const float16_t *pA,const float16_t *pB, u float16_t arm_braycurtis_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize) { - float16_t accumDiff=0.0f, accumSum=0.0f, tmpA, tmpB; + _Float16 accumDiff=0.0f16, accumSum=0.0f16, tmpA, tmpB; while(blockSize > 0) { tmpA = *pA++; tmpB = *pB++; - accumDiff += fabsf(tmpA - tmpB); - accumSum += fabsf(tmpA + tmpB); + accumDiff += (_Float16)fabsf(tmpA - tmpB); + accumSum += (_Float16)fabsf(tmpA + tmpB); blockSize --; } /* diff --git a/Source/DistanceFunctions/arm_canberra_distance_f16.c b/Source/DistanceFunctions/arm_canberra_distance_f16.c index 0d27b967..228ede4c 100755 --- a/Source/DistanceFunctions/arm_canberra_distance_f16.c +++ b/Source/DistanceFunctions/arm_canberra_distance_f16.c @@ -70,7 +70,7 @@ float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize) { - float16_t accum = 0.0f; + _Float16 accum = 0.0f16; uint32_t blkCnt; f16x8_t a, b, c, accumV; @@ -141,7 +141,7 @@ float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uin #else float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize) { - float16_t accum=0.0f, tmpA, tmpB,diff,sum; + _Float16 accum=0.0f, tmpA, tmpB,diff,sum; while(blockSize > 0) { @@ -150,7 +150,7 @@ float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uin diff = fabsf(tmpA - tmpB); sum = fabsf(tmpA) + fabsf(tmpB); - if ((tmpA != 0.0f) || (tmpB != 0.0f)) + if ((tmpA != 0.0f16) || (tmpB != 0.0f16)) { accum += (diff / sum); } diff --git a/Source/DistanceFunctions/arm_chebyshev_distance_f16.c b/Source/DistanceFunctions/arm_chebyshev_distance_f16.c index eb43fdbb..d97e255e 100755 --- a/Source/DistanceFunctions/arm_chebyshev_distance_f16.c +++ b/Source/DistanceFunctions/arm_chebyshev_distance_f16.c @@ -67,7 +67,7 @@ float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, ui uint32_t blkCnt; /* loop counters */ f16x8_t vecA, vecB; f16x8_t vecDiff = vdupq_n_f16(0.0); - float16_t maxValue = 0.0; + float16_t maxValue = 0.0f16; blkCnt = blockSize >> 3; @@ -111,7 +111,7 @@ float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, ui #else float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize) { - float16_t diff=0.0f, maxVal,tmpA, tmpB; + _Float16 diff=0.0f, maxVal,tmpA, tmpB; tmpA = *pA++; tmpB = *pB++; diff --git a/Source/DistanceFunctions/arm_cityblock_distance_f16.c b/Source/DistanceFunctions/arm_cityblock_distance_f16.c index 49bde0a3..6f961388 100755 --- a/Source/DistanceFunctions/arm_cityblock_distance_f16.c +++ b/Source/DistanceFunctions/arm_cityblock_distance_f16.c @@ -102,14 +102,14 @@ float16_t arm_cityblock_distance_f16(const float16_t *pA,const float16_t *pB, ui #else float16_t arm_cityblock_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize) { - float16_t accum,tmpA, tmpB; + _Float16 accum,tmpA, tmpB; - accum = 0.0f; + accum = 0.0f16; while(blockSize > 0) { tmpA = *pA++; tmpB = *pB++; - accum += fabsf(tmpA - tmpB); + accum += (_Float16)fabsf(tmpA - tmpB); blockSize --; } diff --git a/Source/DistanceFunctions/arm_euclidean_distance_f16.c b/Source/DistanceFunctions/arm_euclidean_distance_f16.c index 86dd06aa..7a71fd51 100755 --- a/Source/DistanceFunctions/arm_euclidean_distance_f16.c +++ b/Source/DistanceFunctions/arm_euclidean_distance_f16.c @@ -105,16 +105,17 @@ float16_t arm_euclidean_distance_f16(const float16_t *pA,const float16_t *pB, ui #else float16_t arm_euclidean_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize) { - float16_t accum=0.0f,tmp; + _Float16 accum=0.0f,tmp; + float16_t result; while(blockSize > 0) { - tmp = *pA++ - *pB++; + tmp = (_Float16)*pA++ - (_Float16)*pB++; accum += SQ(tmp); blockSize --; } - arm_sqrt_f16(accum,&tmp); - return(tmp); + arm_sqrt_f16(accum,&result); + return(result); } #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ diff --git a/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c b/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c index ecdbbcb1..4e4ce434 100755 --- a/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c +++ b/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c @@ -145,16 +145,17 @@ float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize) { - float16_t left, right,sum, result, tmp; + _Float16 left, right,sum, tmp; + float16_t result; uint32_t i; - left = 0.0f; - right = 0.0f; + left = 0.0f16; + right = 0.0f16; for(i=0; i < blockSize; i++) { - tmp = (pA[i] + pB[i]) / 2.0f; - left += rel_entr(pA[i], tmp); - right += rel_entr(pB[i], tmp); + tmp = ((_Float16)pA[i] + (_Float16)pB[i]) / 2.0f16; + left += (_Float16)rel_entr(pA[i], tmp); + right += (_Float16)rel_entr(pB[i], tmp); } diff --git a/Source/DistanceFunctions/arm_minkowski_distance_f16.c b/Source/DistanceFunctions/arm_minkowski_distance_f16.c index 8df29655..def1569a 100755 --- a/Source/DistanceFunctions/arm_minkowski_distance_f16.c +++ b/Source/DistanceFunctions/arm_minkowski_distance_f16.c @@ -111,13 +111,13 @@ float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, in float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, int32_t order, uint32_t blockSize) { - float16_t sum; + _Float16 sum; uint32_t i; sum = 0.0f; for(i=0; i < blockSize; i++) { - sum += powf(fabsf(pA[i] - pB[i]),order); + sum += (_Float16)powf(fabsf(pA[i] - pB[i]),order); } diff --git a/Source/FilteringFunctions/arm_biquad_cascade_df1_f16.c b/Source/FilteringFunctions/arm_biquad_cascade_df1_f16.c index ad274f1f..575a8c9e 100755 --- a/Source/FilteringFunctions/arm_biquad_cascade_df1_f16.c +++ b/Source/FilteringFunctions/arm_biquad_cascade_df1_f16.c @@ -64,7 +64,7 @@ void arm_biquad_cascade_df1_f16( float16_t Xn1, Xn2, Yn1, Yn2; /* Filter pState variables */ float16_t X0, X1, X2, X3; /* temporary input */ float16_t X4, X5, X6, X7; /* temporary input */ - float16_t lastX, lastY; /* X,Y history for tail handling */ + _Float16 lastX, lastY; /* X,Y history for tail handling */ f16x8_t coeffs; f16x8_t accVec; /* accumultor vector */ uint32_t sample, stage = S->numStages; /* loop counters */ @@ -320,10 +320,10 @@ void arm_biquad_cascade_df1_f16( float16_t *pOut = pDst; /* Destination pointer */ float16_t *pState = S->pState; /* pState pointer */ const float16_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ - float16_t acc; /* Accumulator */ - float16_t b0, b1, b2, a1, a2; /* Filter coefficients */ - float16_t Xn1, Xn2, Yn1, Yn2; /* Filter pState variables */ - float16_t Xn; /* Temporary input */ + _Float16 acc; /* Accumulator */ + _Float16 b0, b1, b2, a1, a2; /* Filter coefficients */ + _Float16 Xn1, Xn2, Yn1, Yn2; /* Filter pState variables */ + _Float16 Xn; /* Temporary input */ uint32_t sample, stage = S->numStages; /* Loop counters */ do diff --git a/Source/FilteringFunctions/arm_biquad_cascade_df2T_f16.c b/Source/FilteringFunctions/arm_biquad_cascade_df2T_f16.c index 2f185587..856463ae 100755 --- a/Source/FilteringFunctions/arm_biquad_cascade_df2T_f16.c +++ b/Source/FilteringFunctions/arm_biquad_cascade_df2T_f16.c @@ -197,10 +197,10 @@ void arm_biquad_cascade_df2T_f16( float16_t *pOut = pDst; /* Destination pointer */ float16_t *pState = S->pState; /* State pointer */ const float16_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ - float16_t acc1; /* Accumulator */ - float16_t b0, b1, b2, a1, a2; /* Filter coefficients */ - float16_t Xn1; /* Temporary input */ - float16_t d1, d2; /* State variables */ + _Float16 acc1; /* Accumulator */ + _Float16 b0, b1, b2, a1, a2; /* Filter coefficients */ + _Float16 Xn1; /* Temporary input */ + _Float16 d1, d2; /* State variables */ uint32_t sample, stage = S->numStages; /* Loop counters */ do diff --git a/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c b/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c index 12afdbbe..11f4e6ef 100755 --- a/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c +++ b/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c @@ -198,10 +198,10 @@ void arm_biquad_cascade_stereo_df2T_f16( float16_t *pOut = pDst; /* Destination pointer */ float16_t *pState = S->pState; /* State pointer */ const float16_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ - float16_t acc1a, acc1b; /* Accumulator */ - float16_t b0, b1, b2, a1, a2; /* Filter coefficients */ - float16_t Xn1a, Xn1b; /* Temporary input */ - float16_t d1a, d2a, d1b, d2b; /* State variables */ + _Float16 acc1a, acc1b; /* Accumulator */ + _Float16 b0, b1, b2, a1, a2; /* Filter coefficients */ + _Float16 Xn1a, Xn1b; /* Temporary input */ + _Float16 d1a, d2a, d1b, d2b; /* State variables */ uint32_t sample, stage = S->numStages; /* Loop counters */ do diff --git a/Source/FilteringFunctions/arm_correlate_f16.c b/Source/FilteringFunctions/arm_correlate_f16.c index 80b4e742..d9cb61cb 100755 --- a/Source/FilteringFunctions/arm_correlate_f16.c +++ b/Source/FilteringFunctions/arm_correlate_f16.c @@ -370,8 +370,8 @@ void arm_correlate_f16( for (i = 0U; i <= block1 - 2; i += 2) { uint32_t count = i + 1; - float16_t acc0; - float16_t acc1; + _Float16 acc0; + _Float16 acc1; /* * compute 2 accumulators per loop * size is incrementing for second accumulator @@ -390,7 +390,7 @@ void arm_correlate_f16( for (; i < block1; i++) { uint32_t count = i + 1; - float16_t acc; + _Float16 acc; pX = pA; pY = pB; @@ -403,10 +403,10 @@ void arm_correlate_f16( for (i = 0U; i <= block2 - 4; i += 4) { - float16_t acc0; - float16_t acc1; - float16_t acc2; - float16_t acc3; + _Float16 acc0; + _Float16 acc1; + _Float16 acc2; + _Float16 acc3; pX = pA; pY = pB; @@ -430,8 +430,8 @@ void arm_correlate_f16( for (; i <= block2 - 2; i += 2) { - float16_t acc0; - float16_t acc1; + _Float16 acc0; + _Float16 acc1; pX = pA; pY = pB; @@ -451,7 +451,7 @@ void arm_correlate_f16( if (block2 & 1) { - float16_t acc; + _Float16 acc; pX = pA; pY = pB; @@ -466,8 +466,8 @@ void arm_correlate_f16( { uint32_t count = (i + 1); - float16_t acc0; - float16_t acc1; + _Float16 acc0; + _Float16 acc1; pX = pA; pY = pB; @@ -488,7 +488,7 @@ void arm_correlate_f16( for (; i >= 0; i--) { uint32_t count = (i + 1); - float16_t acc; + _Float16 acc; pX = pA; pY = pB; @@ -517,15 +517,15 @@ void arm_correlate_f16( const float16_t *px; /* Intermediate inputA pointer */ const float16_t *py; /* Intermediate inputB pointer */ const float16_t *pSrc1; - float16_t sum; + _Float16 sum; uint32_t blockSize1, blockSize2, blockSize3; /* Loop counters */ uint32_t j, k, count, blkCnt; /* Loop counters */ uint32_t outBlockSize; /* Loop counter */ int32_t inc = 1; /* Destination address modifier */ #if defined (ARM_MATH_LOOPUNROLL) - float16_t acc0, acc1, acc2, acc3,c0; /* Accumulators */ - float16_t x0, x1, x2, x3; /* temporary variables for holding input and coefficient values */ + _Float16 acc0, acc1, acc2, acc3,c0; /* Accumulators */ + _Float16 x0, x1, x2, x3; /* temporary variables for holding input and coefficient values */ #endif /* The algorithm implementation is based on the lengths of the inputs. */ @@ -625,7 +625,7 @@ void arm_correlate_f16( while (blockSize1 > 0U) { /* Accumulator is made zero for every iteration */ - sum = 0.0f; + sum = 0.0f16; #if defined (ARM_MATH_LOOPUNROLL) @@ -725,10 +725,10 @@ void arm_correlate_f16( while (blkCnt > 0U) { /* Set all accumulators to zero */ - acc0 = 0.0f; - acc1 = 0.0f; - acc2 = 0.0f; - acc3 = 0.0f; + acc0 = 0.0f16; + acc1 = 0.0f16; + acc2 = 0.0f16; + acc3 = 0.0f16; /* read x[0], x[1], x[2] samples */ @@ -873,7 +873,7 @@ void arm_correlate_f16( while (blkCnt > 0U) { /* Accumulator is made zero for every iteration */ - sum = 0.0f; + sum = 0.0f16; #if defined (ARM_MATH_LOOPUNROLL) @@ -939,7 +939,7 @@ void arm_correlate_f16( while (blkCnt > 0U) { /* Accumulator is made zero for every iteration */ - sum = 0.0f; + sum = 0.0f16; /* Loop over srcBLen */ k = srcBLen; @@ -1000,7 +1000,7 @@ void arm_correlate_f16( while (blockSize3 > 0U) { /* Accumulator is made zero for every iteration */ - sum = 0.0f; + sum = 0.0f16; #if defined (ARM_MATH_LOOPUNROLL) @@ -1069,7 +1069,7 @@ void arm_correlate_f16( const float16_t *pIn1 = pSrcA; /* inputA pointer */ const float16_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */ - float16_t sum; /* Accumulator */ + _Float16 sum; /* Accumulator */ uint32_t i = 0U, j; /* Loop counters */ uint32_t inv = 0U; /* Reverse order flag */ uint32_t tot = 0U; /* Length */ @@ -1127,7 +1127,7 @@ void arm_correlate_f16( for (i = 0U; i <= tot; i++) { /* Initialize sum with zero to carry out MAC operations */ - sum = 0.0f; + sum = 0.0f16; /* Loop to perform MAC operations according to convolution equation */ for (j = 0U; j <= i; j++) diff --git a/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c b/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c index 5f400aff..8f749207 100755 --- a/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c +++ b/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c @@ -711,13 +711,13 @@ arm_status arm_mat_cmplx_mult_f16( uint16_t numRowsA = pSrcA->numRows; /* Number of rows of input matrix A */ uint16_t numColsB = pSrcB->numCols; /* Number of columns of input matrix B */ uint16_t numColsA = pSrcA->numCols; /* Number of columns of input matrix A */ - float16_t sumReal, sumImag; /* Accumulator */ - float16_t a1, b1, c1, d1; + _Float16 sumReal, sumImag; /* Accumulator */ + _Float16 a1, b1, c1, d1; uint32_t col, i = 0U, j, row = numRowsA, colCnt; /* loop counters */ arm_status status; /* status of matrix multiplication */ #if defined (ARM_MATH_LOOPUNROLL) - float16_t a0, b0, c0, d0; + _Float16 a0, b0, c0, d0; #endif #ifdef ARM_MATH_MATRIX_CHECK @@ -755,8 +755,8 @@ arm_status arm_mat_cmplx_mult_f16( do { /* Set the variable sum, that acts as accumulator, to zero */ - sumReal = 0.0f; - sumImag = 0.0f; + sumReal = 0.0f16; + sumImag = 0.0f16; /* Initiate pointer pIn1 to point to starting address of column being processed */ pIn1 = pInA; diff --git a/Source/MatrixFunctions/arm_mat_inverse_f16.c b/Source/MatrixFunctions/arm_mat_inverse_f16.c index 42fb7c75..85adf6f4 100755 --- a/Source/MatrixFunctions/arm_mat_inverse_f16.c +++ b/Source/MatrixFunctions/arm_mat_inverse_f16.c @@ -66,7 +66,7 @@ arm_status arm_mat_inverse_f16( uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */ float16_t *pTmpA, *pTmpB; - float16_t in = 0.0f; /* Temporary input values */ + _Float16 in = 0.0f16; /* Temporary input values */ uint32_t i, rowCnt, flag = 0U, j, loopCnt, k, l; /* loop counters */ arm_status status; /* status of matrix inverse */ uint32_t blkCnt; @@ -137,20 +137,20 @@ arm_status arm_mat_inverse_f16( j = numRows - rowCnt; while (j > 0U) { - *pOutT1++ = 0.0f; + *pOutT1++ = 0.0f16; j--; } /* * Writing all ones in the diagonal of the destination matrix */ - *pOutT1++ = 1.0f; + *pOutT1++ = 1.0f16; /* * Writing all zeroes in upper triangle of the destination matrix */ j = rowCnt - 1U; while (j > 0U) { - *pOutT1++ = 0.0f; + *pOutT1++ = 0.0f16; j--; } /* @@ -199,7 +199,7 @@ arm_status arm_mat_inverse_f16( /* * Check if the pivot element is zero */ - if (*pInT1 == 0.0f) + if (*pInT1 == 0.0f16) { /* * Loop over the number rows present below @@ -215,7 +215,7 @@ arm_status arm_mat_inverse_f16( * Check if there is a non zero pivot element to * * replace in the rows below */ - if (*pInT2 != 0.0f) + if (*pInT2 != 0.0f16) { f16x8_t vecA, vecB; /* @@ -310,7 +310,7 @@ arm_status arm_mat_inverse_f16( /* * Update the status if the matrix is singular */ - if ((flag != 1U) && (in == 0.0f)) + if ((flag != 1U) && (in == 0.0f16)) { return ARM_MATH_SINGULAR; } @@ -334,7 +334,7 @@ arm_status arm_mat_inverse_f16( pTmpA = pInT1; - f16x8_t invIn = vdupq_n_f16(1.0f / in); + f16x8_t invIn = vdupq_n_f16(1.0f16 / in); blkCnt = (numCols - l) >> 3; f16x8_t vecA; @@ -537,12 +537,12 @@ arm_status arm_mat_inverse_f16( */ status = ARM_MATH_SUCCESS; - if ((flag != 1U) && (in == 0.0f)) + if ((flag != 1U) && (in == 0.0f16)) { pIn = pSrc->pData; for (i = 0; i < numRows * numCols; i++) { - if (pIn[i] != 0.0f) + if (pIn[i] != 0.0f16) break; } @@ -568,7 +568,7 @@ arm_status arm_mat_inverse_f16( uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */ uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */ - float16_t Xchg, in = 0.0f, in1; /* Temporary input values */ + _Float16 Xchg, in = 0.0f16, in1; /* Temporary input values */ uint32_t i, rowCnt, flag = 0U, j, loopCnt, k, l; /* loop counters */ arm_status status; /* status of matrix inverse */ @@ -636,18 +636,18 @@ arm_status arm_mat_inverse_f16( j = numRows - rowCnt; while (j > 0U) { - *pOutT1++ = 0.0f; + *pOutT1++ = 0.0f16; j--; } /* Writing all ones in the diagonal of the destination matrix */ - *pOutT1++ = 1.0f; + *pOutT1++ = 1.0f16; /* Writing all zeroes in upper triangle of the destination matrix */ j = rowCnt - 1U; while (j > 0U) { - *pOutT1++ = 0.0f; + *pOutT1++ = 0.0f16; j--; } @@ -685,7 +685,7 @@ arm_status arm_mat_inverse_f16( k = 1U; /* Check if the pivot element is zero */ - if (*pInT1 == 0.0f) + if (*pInT1 == 0.0f16) { /* Loop over the number rows present below */ @@ -697,7 +697,7 @@ arm_status arm_mat_inverse_f16( /* Check if there is a non zero pivot element to * replace in the rows below */ - if (*pInT2 != 0.0f) + if (*pInT2 != 0.0f16) { /* Loop over number of columns * to the right of the pilot element */ @@ -743,7 +743,7 @@ arm_status arm_mat_inverse_f16( } /* Update the status if the matrix is singular */ - if ((flag != 1U) && (in == 0.0f)) + if ((flag != 1U) && (in == 0.0f16)) { return ARM_MATH_SINGULAR; } @@ -877,12 +877,12 @@ arm_status arm_mat_inverse_f16( /* Set status as ARM_MATH_SUCCESS */ status = ARM_MATH_SUCCESS; - if ((flag != 1U) && (in == 0.0f)) + if ((flag != 1U) && (in == 0.0f16)) { pIn = pSrc->pData; for (i = 0; i < numRows * numCols; i++) { - if (pIn[i] != 0.0f) + if (pIn[i] != 0.0f16) break; } diff --git a/Source/MatrixFunctions/arm_mat_mult_f16.c b/Source/MatrixFunctions/arm_mat_mult_f16.c index 797adf54..657d6188 100755 --- a/Source/MatrixFunctions/arm_mat_mult_f16.c +++ b/Source/MatrixFunctions/arm_mat_mult_f16.c @@ -632,7 +632,7 @@ arm_status arm_mat_mult_f16( float16_t *pInB = pSrcB->pData; /* Input data matrix pointer B */ float16_t *pOut = pDst->pData; /* Output data matrix pointer */ float16_t *px; /* Temporary output data matrix pointer */ - float16_t sum; /* Accumulator */ + _Float16 sum; /* Accumulator */ uint16_t numRowsA = pSrcA->numRows; /* Number of rows of input matrix A */ uint16_t numColsB = pSrcB->numCols; /* Number of columns of input matrix B */ uint16_t numColsA = pSrcA->numCols; /* Number of columns of input matrix A */ @@ -671,7 +671,7 @@ arm_status arm_mat_mult_f16( do { /* Set the variable sum, that acts as accumulator, to zero */ - sum = 0.0f; + sum = 0.0f16; /* Initialize pointer pIn1 to point to starting address of column being processed */ pIn1 = pInA; diff --git a/Source/SVMFunctions/arm_svm_linear_predict_f16.c b/Source/SVMFunctions/arm_svm_linear_predict_f16.c index 79638527..54748f03 100755 --- a/Source/SVMFunctions/arm_svm_linear_predict_f16.c +++ b/Source/SVMFunctions/arm_svm_linear_predict_f16.c @@ -151,7 +151,7 @@ void arm_svm_linear_predict_f16( acc0 = vfmaq_n_f16(acc0,acc2,*pDualCoef++); acc0 = vfmaq_n_f16(acc0,acc3,*pDualCoef++); - sum += vecAddAcrossF16Mve(acc0); + sum += (_Float16)vecAddAcrossF16Mve(acc0); pSrcA += numCols * 4; /* @@ -221,7 +221,7 @@ void arm_svm_linear_predict_f16( acc0 = vmulq_n_f16(acc0,*pDualCoef++); acc0 = vfmaq_n_f16(acc0,acc1,*pDualCoef++); - sum += vecAddAcrossF16Mve(acc0); + sum += (_Float16)vecAddAcrossF16Mve(acc0); pSrcA += numCols * 2; row -= 2; @@ -274,7 +274,7 @@ void arm_svm_linear_predict_f16( /* * Sum the partial parts */ - sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc0); + sum += (_Float16)*pDualCoef++ * (_Float16)vecAddAcrossF16Mve(acc0); } diff --git a/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c b/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c index 31848cf6..52715095 100755 --- a/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c +++ b/Source/SVMFunctions/arm_svm_polynomial_predict_f16.c @@ -294,7 +294,7 @@ void arm_svm_polynomial_predict_f16( (vaddq_n_f16(vmulq_n_f16(vtmp, S->gamma), S->coef0), S->degree), vctp16q(1)); } - sum += vecAddAcrossF16Mve(vSum); + sum += (_Float16)vecAddAcrossF16Mve(vSum); *pResult = S->classes[STEP(sum)]; @@ -316,9 +316,9 @@ void arm_svm_polynomial_predict_f16( dot=0; for(j=0; j < S->vectorDimension; j++) { - dot = dot + in[j]* *pSupport++; + dot = dot + (_Float16)in[j]* (_Float16)*pSupport++; } - sum += S->dualCoefficients[i] * arm_exponent_f16(S->gamma * dot + S->coef0, S->degree); + sum += S->dualCoefficients[i] * (_Float16)arm_exponent_f16(S->gamma * dot + S->coef0, S->degree); } *pResult=S->classes[STEP(sum)]; diff --git a/Source/SVMFunctions/arm_svm_rbf_predict_f16.c b/Source/SVMFunctions/arm_svm_rbf_predict_f16.c index aab5885f..831c972b 100755 --- a/Source/SVMFunctions/arm_svm_rbf_predict_f16.c +++ b/Source/SVMFunctions/arm_svm_rbf_predict_f16.c @@ -67,7 +67,7 @@ void arm_svm_rbf_predict_f16( uint32_t row; uint32_t blkCnt; /* loop counters */ const float16_t *pDualCoef = S->dualCoefficients; - float16_t sum = S->intercept; + _Float16 sum = S->intercept; f16x8_t vSum = vdupq_n_f16(0); row = numRows; @@ -323,19 +323,19 @@ void arm_svm_rbf_predict_f16( int32_t * pResult) { _Float16 sum=S->intercept; - _Float16 dot=0; + _Float16 dot=00.f16; uint32_t i,j; const float16_t *pSupport = S->supportVectors; for(i=0; i < S->nbOfSupportVectors; i++) { - dot=0; + dot=0.0f16; for(j=0; j < S->vectorDimension; j++) { - dot = dot + SQ(in[j] - *pSupport); + dot = dot + SQ((_Float16)in[j] - (_Float16) *pSupport); pSupport++; } - sum += S->dualCoefficients[i] * expf(-S->gamma * dot); + sum += (_Float16)S->dualCoefficients[i] * (_Float16)expf(-(_Float16)S->gamma * dot); } *pResult=S->classes[STEP(sum)]; } diff --git a/Source/SVMFunctions/arm_svm_sigmoid_predict_f16.c b/Source/SVMFunctions/arm_svm_sigmoid_predict_f16.c index dc6771ce..4840ca34 100755 --- a/Source/SVMFunctions/arm_svm_sigmoid_predict_f16.c +++ b/Source/SVMFunctions/arm_svm_sigmoid_predict_f16.c @@ -67,7 +67,7 @@ void arm_svm_sigmoid_predict_f16( uint32_t row; uint32_t blkCnt; /* loop counters */ const float16_t *pDualCoef = S->dualCoefficients; - float16_t sum = S->intercept; + _Float16 sum = S->intercept; f16x8_t vSum = vdupq_n_f16(0.0f); row = numRows; @@ -305,18 +305,18 @@ void arm_svm_sigmoid_predict_f16( int32_t * pResult) { _Float16 sum=S->intercept; - _Float16 dot=0; + _Float16 dot=0.0f16; uint32_t i,j; const float16_t *pSupport = S->supportVectors; for(i=0; i < S->nbOfSupportVectors; i++) { - dot=0; + dot=0.0f16; for(j=0; j < S->vectorDimension; j++) { - dot = dot + in[j]* *pSupport++; + dot = dot + (_Float16)in[j] * (_Float16)*pSupport++; } - sum += S->dualCoefficients[i] * tanhf(S->gamma * dot + S->coef0); + sum += (_Float16)S->dualCoefficients[i] * (_Float16)tanhf((_Float16)S->gamma * dot + (_Float16)S->coef0); } *pResult=S->classes[STEP(sum)]; } diff --git a/Source/StatisticsFunctions/arm_entropy_f16.c b/Source/StatisticsFunctions/arm_entropy_f16.c index a3501782..2b7cd141 100755 --- a/Source/StatisticsFunctions/arm_entropy_f16.c +++ b/Source/StatisticsFunctions/arm_entropy_f16.c @@ -65,7 +65,7 @@ float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize) { uint32_t blkCnt; - float16_t accum=0.0f,p; + _Float16 accum=0.0f16,p; blkCnt = blockSize; @@ -110,7 +110,7 @@ float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize) { const float16_t *pIn; uint32_t blkCnt; - float16_t accum, p; + _Float16 accum, p; pIn = pSrcA; blkCnt = blockSize; diff --git a/Source/StatisticsFunctions/arm_kullback_leibler_f16.c b/Source/StatisticsFunctions/arm_kullback_leibler_f16.c index 06f4ba65..2252a821 100755 --- a/Source/StatisticsFunctions/arm_kullback_leibler_f16.c +++ b/Source/StatisticsFunctions/arm_kullback_leibler_f16.c @@ -72,12 +72,12 @@ float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSrcB,uint32_t blockSize) { uint32_t blkCnt; - float16_t accum, pA,pB; + _Float16 accum, pA,pB; blkCnt = blockSize; - accum = 0.0f; + accum = 0.0f16; f16x8_t vSum = vdupq_n_f16(0.0f); blkCnt = blockSize >> 3; @@ -120,7 +120,7 @@ float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSr { const float16_t *pInA, *pInB; uint32_t blkCnt; - float16_t accum, pA,pB; + _Float16 accum, pA,pB; pInA = pSrcA; pInB = pSrcB; diff --git a/Source/StatisticsFunctions/arm_logsumexp_f16.c b/Source/StatisticsFunctions/arm_logsumexp_f16.c index c851bf7e..dba2671f 100755 --- a/Source/StatisticsFunctions/arm_logsumexp_f16.c +++ b/Source/StatisticsFunctions/arm_logsumexp_f16.c @@ -72,8 +72,8 @@ float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize) float16_t maxVal; const float16_t *pIn; int32_t blkCnt; - float16_t accum=0.0f16; - float16_t tmp; + _Float16 accum=0.0f16; + _Float16 tmp; arm_max_no_idx_f16((float16_t *) in, blockSize, &maxVal); @@ -122,11 +122,11 @@ float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize) #else float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize) { - float16_t maxVal; - float16_t tmp; + _Float16 maxVal; + _Float16 tmp; const float16_t *pIn; uint32_t blkCnt; - float16_t accum; + _Float16 accum; pIn = in; blkCnt = blockSize; diff --git a/Source/StatisticsFunctions/arm_power_f16.c b/Source/StatisticsFunctions/arm_power_f16.c index e40bc521..1d1a9959 100755 --- a/Source/StatisticsFunctions/arm_power_f16.c +++ b/Source/StatisticsFunctions/arm_power_f16.c @@ -88,8 +88,8 @@ void arm_power_f16( float16_t * pResult) { uint32_t blkCnt; /* Loop counter */ - float16_t sum = 0.0f; /* Temporary result storage */ - float16_t in; /* Temporary variable to store input value */ + _Float16 sum = 0.0f16; /* Temporary result storage */ + _Float16 in; /* Temporary variable to store input value */ #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) diff --git a/Source/StatisticsFunctions/arm_rms_f16.c b/Source/StatisticsFunctions/arm_rms_f16.c index 15f1a47b..63d5dcf5 100755 --- a/Source/StatisticsFunctions/arm_rms_f16.c +++ b/Source/StatisticsFunctions/arm_rms_f16.c @@ -83,8 +83,8 @@ void arm_rms_f16( float16_t * pResult) { uint32_t blkCnt; /* Loop counter */ - float16_t sum = 0.0f; /* Temporary result storage */ - float16_t in; /* Temporary variable to store input value */ + _Float16 sum = 0.0f16; /* Temporary result storage */ + _Float16 in; /* Temporary variable to store input value */ #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) diff --git a/Testing/FrameworkSource/Error.cpp b/Testing/FrameworkSource/Error.cpp index bd9a4889..bfadbc6f 100644 --- a/Testing/FrameworkSource/Error.cpp +++ b/Testing/FrameworkSource/Error.cpp @@ -342,7 +342,7 @@ void assert_close_error(unsigned long nb,float64_t &ref, float64_t &val, double if (abs(val - ref) > (absthreshold + relthreshold * abs(ref))) { char details[200]; - sprintf(details,"close %g : abs=%g, rel=%g",abs(val - ref) , absthreshold,relthreshold); + sprintf(details,"close error %g > %g: (val = %g, ref = %g)",abs(val - ref) , absthreshold + relthreshold * abs(ref),val,ref); throw (Error(CLOSE_ERROR,nb,details)); } }; @@ -385,7 +385,7 @@ void assert_close_error(unsigned long nb,float32_t &ref, float32_t &val, double if (abs(val - ref) > (absthreshold + relthreshold * abs(ref))) { char details[200]; - sprintf(details,"close %g : abs=%g, rel=%g",abs(val - ref) , absthreshold,relthreshold); + sprintf(details,"close error %g > %g: (val = %g, ref = %g)",abs(val - ref) , absthreshold + relthreshold * abs(ref),val,ref); throw (Error(CLOSE_ERROR,nb,details)); } }; @@ -429,7 +429,7 @@ void assert_close_error(unsigned long nb,float16_t &ref, float16_t &val, double if (abs((float)val - (float)ref) > (absthreshold + relthreshold * abs((float)ref))) { char details[200]; - sprintf(details,"close %g : abs=%g, rel=%g",abs(val - ref) , absthreshold,relthreshold); + sprintf(details,"close error %g > %g: (val = %g, ref = %g)",abs(val - ref) , absthreshold + relthreshold * abs(ref),val,ref); throw (Error(CLOSE_ERROR,nb,details)); } }; diff --git a/Testing/PatternGeneration/Support.py b/Testing/PatternGeneration/Support.py index 81b5ebce..96d15e8f 100755 --- a/Testing/PatternGeneration/Support.py +++ b/Testing/PatternGeneration/Support.py @@ -11,7 +11,7 @@ NBVECTORS = [4,10,16] VECDIM = [12,14,20] -def genWsum(config,nb): +def genWsum(config,f,nb): DIM=50 inputs=[] weights=[] @@ -23,15 +23,15 @@ def genWsum(config,nb): inputs += list(va) weights += list(vb) - nbiters = Tools.loopnb(0,Tools.TAILONLY) + nbiters = Tools.loopnb(f,Tools.TAILONLY) e = np.sum(va[0:nbiters].T * vb[0:nbiters]) / np.sum(vb[0:nbiters]) output.append(e) - nbiters = Tools.loopnb(0,Tools.BODYONLY) + nbiters = Tools.loopnb(f,Tools.BODYONLY) e = np.sum(va[0:nbiters].T * vb[0:nbiters]) / np.sum(vb[0:nbiters]) output.append(e) - nbiters = Tools.loopnb(0,Tools.BODYANDTAIL) + nbiters = Tools.loopnb(f,Tools.BODYANDTAIL) e = np.sum(va[0:nbiters].T * vb[0:nbiters]) / np.sum(vb[0:nbiters]) output.append(e) @@ -92,7 +92,7 @@ def writeTestsF32(config): # This is for benchmarking the weighted sum and we use only one test pattern - genWsum(config,6) + genWsum(config,Tools.F32,6) def writeTestsF16(config): @@ -105,7 +105,7 @@ def writeTestsF16(config): config.writeInput(11,va,"Samples") # This is for benchmarking the weighted sum and we use only one test pattern - genWsum(config,6) + genWsum(config,Tools.F16,6) def writeTestsQ31(config): NBSAMPLES=256 diff --git a/Testing/Patterns/DSP/Support/SupportF16/Inputs6_f16.txt b/Testing/Patterns/DSP/Support/SupportF16/Inputs6_f16.txt index 417c4297..c0477372 100755 --- a/Testing/Patterns/DSP/Support/SupportF16/Inputs6_f16.txt +++ b/Testing/Patterns/DSP/Support/SupportF16/Inputs6_f16.txt @@ -1,102 +1,102 @@ H 50 -// 0.549118 -0x3865 -// 0.782574 -0x3a43 -// 0.537389 -0x384d -// 0.576597 -0x389d -// 0.849496 -0x3acc -// 0.764850 -0x3a1e -// 0.539151 -0x3850 -// 0.459163 -0x3759 -// 0.352476 -0x35a4 -// 0.333343 -0x3555 -// 0.362662 -0x35cd -// 0.343928 -0x3581 -// 0.046084 -0x29e6 -// 0.583088 -0x38aa -// 0.464789 -0x3770 -// 0.659567 -0x3947 -// 0.177360 -0x31ad -// 0.465121 -0x3771 -// 0.849010 -0x3acb -// 0.748383 -0x39fd -// 0.493254 -0x37e4 -// 0.023579 -0x2609 -// 0.354457 -0x35ac -// 0.880690 -0x3b0c -// 0.029813 -0x27a2 -// 0.338038 -0x3569 -// 0.724386 -0x39cc -// 0.341454 -0x3577 -// 0.676073 -0x3969 -// 0.041176 -0x2945 -// 0.214019 -0x32d9 -// 0.322539 -0x3529 -// 0.499026 -0x37fc -// 0.616038 -0x38ee -// 0.688882 -0x3983 -// 0.066439 -0x2c41 -// 0.094981 -0x2e14 -// 0.873092 -0x3afc -// 0.642707 -0x3924 -// 0.537527 -0x384d -// 0.370953 -0x35ef -// 0.450042 -0x3733 -// 0.434679 -0x36f4 -// 0.676789 -0x396a -// 0.303649 -0x34dc -// 0.076930 -0x2cec -// 0.871305 -0x3af8 -// 0.855053 -0x3ad7 -// 0.498935 -0x37fc -// 0.854980 -0x3ad7 +// 0.145259 +0x30a6 +// 0.431775 +0x36e9 +// 0.925328 +0x3b67 +// 0.170239 +0x3173 +// 0.385773 +0x362c +// 0.998601 +0x3bfd +// 0.108755 +0x2ef6 +// 0.479921 +0x37ae +// 0.592423 +0x38bd +// 0.431234 +0x36e6 +// 0.413843 +0x369f +// 0.959014 +0x3bac +// 0.665528 +0x3953 +// 0.952834 +0x3b9f +// 0.562821 +0x3881 +// 0.609152 +0x38e0 +// 0.890135 +0x3b1f +// 0.002624 +0x1960 +// 0.666853 +0x3956 +// 0.813988 +0x3a83 +// 0.170387 +0x3174 +// 0.264551 +0x343c +// 0.699111 +0x3998 +// 0.738154 +0x39e8 +// 0.475053 +0x379a +// 0.439282 +0x3707 +// 0.699736 +0x3999 +// 0.488956 +0x37d3 +// 0.115565 +0x2f65 +// 0.115452 +0x2f64 +// 0.586420 +0x38b1 +// 0.327648 +0x353e +// 0.442207 +0x3713 +// 0.752186 +0x3a04 +// 0.476363 +0x379f +// 0.491094 +0x37dc +// 0.552734 +0x386c +// 0.674894 +0x3966 +// 0.705545 +0x39a5 +// 0.966659 +0x3bbc +// 0.528146 +0x383a +// 0.633660 +0x3912 +// 0.049129 +0x2a4a +// 0.458540 +0x3756 +// 0.331162 +0x354c +// 0.399161 +0x3663 +// 0.015650 +0x2402 +// 0.271388 +0x3458 +// 0.912530 +0x3b4d +// 0.655578 +0x393f diff --git a/Testing/Patterns/DSP/Support/SupportF16/Ref6_f16.txt b/Testing/Patterns/DSP/Support/SupportF16/Ref6_f16.txt index 3d238667..6a0d15db 100755 --- a/Testing/Patterns/DSP/Support/SupportF16/Ref6_f16.txt +++ b/Testing/Patterns/DSP/Support/SupportF16/Ref6_f16.txt @@ -1,8 +1,8 @@ H 3 -// 0.544937 -0x385c -// 0.581009 -0x38a6 -// 0.504352 -0x3809 +// 0.445338 +0x3720 +// 0.569713 +0x388f +// 0.564358 +0x3884 diff --git a/Testing/Patterns/DSP/Support/SupportF16/Weights6_f16.txt b/Testing/Patterns/DSP/Support/SupportF16/Weights6_f16.txt index 37e2e382..0f5e13da 100755 --- a/Testing/Patterns/DSP/Support/SupportF16/Weights6_f16.txt +++ b/Testing/Patterns/DSP/Support/SupportF16/Weights6_f16.txt @@ -1,102 +1,102 @@ H 50 -// 0.873606 -0x3afd -// 0.005892 -0x1e09 -// 0.669526 -0x395b -// 0.198146 -0x3257 -// 0.032904 -0x2836 -// 0.829321 -0x3aa2 -// 0.044536 -0x29b3 -// 0.842708 -0x3abe -// 0.981319 -0x3bda -// 0.274000 -0x3462 -// 0.509194 -0x3813 -// 0.994965 -0x3bf6 -// 0.572766 -0x3895 -// 0.557596 -0x3876 -// 0.306704 -0x34e8 -// 0.983822 -0x3bdf -// 0.145555 -0x30a8 -// 0.539088 -0x3850 -// 0.527526 -0x3838 -// 0.501409 -0x3803 -// 0.249480 -0x33fc -// 0.273503 -0x3460 -// 0.567660 -0x388b -// 0.741561 -0x39ef -// 0.458026 -0x3754 -// 0.239162 -0x33a7 -// 0.731739 -0x39db -// 0.662458 -0x394d -// 0.024779 -0x2658 -// 0.086811 -0x2d8e -// 0.660991 -0x394a -// 0.834424 -0x3aad -// 0.163672 -0x313d -// 0.120432 -0x2fb5 -// 0.593488 -0x38bf -// 0.973784 -0x3bca -// 0.167473 -0x315c -// 0.858171 -0x3ade -// 0.986637 -0x3be5 -// 0.223556 -0x3327 -// 0.382377 -0x361e -// 0.757667 -0x3a10 -// 0.032219 -0x2820 -// 0.574024 -0x3898 -// 0.125286 -0x3002 -// 0.946997 -0x3b93 -// 0.942443 -0x3b8a -// 0.152563 -0x30e2 -// 0.240567 -0x33b3 -// 0.160261 -0x3121 +// 0.305271 +0x34e2 +// 0.032698 +0x282f +// 0.266409 +0x3443 +// 0.617540 +0x38f1 +// 0.152922 +0x30e5 +// 0.774995 +0x3a33 +// 0.848550 +0x3aca +// 0.523429 +0x3830 +// 0.116035 +0x2f6d +// 0.649223 +0x3932 +// 0.246758 +0x33e5 +// 0.826661 +0x3a9d +// 0.967839 +0x3bbe +// 0.283239 +0x3488 +// 0.847203 +0x3ac7 +// 0.720735 +0x39c4 +// 0.477014 +0x37a2 +// 0.633504 +0x3911 +// 0.596646 +0x38c6 +// 0.832505 +0x3aa9 +// 0.135769 +0x3058 +// 0.607454 +0x38dc +// 0.914942 +0x3b52 +// 0.474766 +0x3799 +// 0.781271 +0x3a40 +// 0.084347 +0x2d66 +// 0.038805 +0x28f8 +// 0.055045 +0x2b0c +// 0.334330 +0x3559 +// 0.331283 +0x354d +// 0.595124 +0x38c3 +// 0.443636 +0x3719 +// 0.612722 +0x38e7 +// 0.152437 +0x30e1 +// 0.311869 +0x34fd +// 0.177173 +0x31ab +// 0.540216 +0x3852 +// 0.941623 +0x3b88 +// 0.666747 +0x3955 +// 0.002608 +0x1958 +// 0.092911 +0x2df2 +// 0.129786 +0x3027 +// 0.710137 +0x39ae +// 0.488728 +0x37d2 +// 0.478723 +0x37a9 +// 0.627602 +0x3905 +// 0.001795 +0x175b +// 0.299980 +0x34cd +// 0.636209 +0x3917 +// 0.102662 +0x2e92 diff --git a/Testing/Source/Tests/BIQUADF16.cpp b/Testing/Source/Tests/BIQUADF16.cpp index b8bd496d..3c72cfd5 100755 --- a/Testing/Source/Tests/BIQUADF16.cpp +++ b/Testing/Source/Tests/BIQUADF16.cpp @@ -10,8 +10,8 @@ Reference patterns are generated with a double precision computation. */ -#define REL_ERROR (3.0e-3) -#define ABS_ERROR (3.5e-2) +#define REL_ERROR (5.0e-2) +#define ABS_ERROR (1.0e-1) void BIQUADF16::test_biquad_cascade_df1_ref() { diff --git a/Testing/Source/Tests/DistanceTestsF16.cpp b/Testing/Source/Tests/DistanceTestsF16.cpp index 12ec71e0..8c9df7d6 100755 --- a/Testing/Source/Tests/DistanceTestsF16.cpp +++ b/Testing/Source/Tests/DistanceTestsF16.cpp @@ -3,7 +3,7 @@ #include "Error.h" #include "Test.h" -#define REL_ERROR (2e-3) +#define REL_ERROR (5e-3) #define REL_JS_ERROR (3e-2) diff --git a/Testing/Source/Tests/SupportTestsF16.cpp b/Testing/Source/Tests/SupportTestsF16.cpp index c05b4203..ce7222e8 100755 --- a/Testing/Source/Tests/SupportTestsF16.cpp +++ b/Testing/Source/Tests/SupportTestsF16.cpp @@ -7,8 +7,8 @@ #define SNR_THRESHOLD 120 #define REL_ERROR (1.0e-5) -#define ABS_WEIGHTEDSUM_ERROR (5.0e-2) -#define REL_WEIGHTEDSUM_ERROR (5.0e-2) +#define ABS_WEIGHTEDSUM_ERROR (1.0e-1) +#define REL_WEIGHTEDSUM_ERROR (5.0e-3) #define ABS_ERROR_F32 (1.0e-3) #define REL_ERROR_F32 (1.0e-3) @@ -28,7 +28,7 @@ void SupportTestsF16::test_weighted_sum_f16() *outp=arm_weighted_sum_f16(inp, coefsp,this->nbSamples); - + ASSERT_CLOSE_ERROR(*outp,refp[this->offset],ABS_WEIGHTEDSUM_ERROR,REL_WEIGHTEDSUM_ERROR); ASSERT_EMPTY_TAIL(output); diff --git a/Testing/bench.txt b/Testing/bench.txt index de658337..24e8e107 100755 --- a/Testing/bench.txt +++ b/Testing/bench.txt @@ -754,7 +754,7 @@ group Root { } Functions { - Cascaded BiQuad Filter DF2T_f64:test_biquad_cascade_df2T_f64 + Cascaded BiQuad Filter DF2T:test_biquad_cascade_df2T_f64 } -> PARAM1_ID } } @@ -1570,8 +1570,8 @@ group Root { Complex FFT:test_cfft_f32 -> CFFT_PARAM_ID Real FFT:test_rfft_f32 -> RFFT_PARAM_ID DCT4:test_dct4_f32 -> DCT_PARAM_ID - Radix 4 Complex FFT:test_cfft_radix4_f32 -> CFFT4_PARAM_ID - Radix 2 Complex FFT:test_cfft_radix2_f32 -> CFFT_PARAM_ID + Scalar Radix 4 Complex FFT:test_cfft_radix4_f32 -> CFFT4_PARAM_ID + Scalar Radix 2 Complex FFT:test_cfft_radix2_f32 -> CFFT_PARAM_ID } } @@ -1620,8 +1620,8 @@ group Root { Complex FFT:test_cfft_q31 -> CFFT_PARAM_ID Real FFT:test_rfft_q31 -> RFFT_PARAM_ID DCT4:test_dct4_q31 -> DCT_PARAM_ID - Radix 4 Complex FFT:test_cfft_radix4_q31 -> CFFT4_PARAM_ID - Radix 2 Complex FFT:test_cfft_radix2_q31 -> CFFT_PARAM_ID + Scalar Radix 4 Complex FFT:test_cfft_radix4_q31 -> CFFT4_PARAM_ID + Scalar Radix 2 Complex FFT:test_cfft_radix2_q31 -> CFFT_PARAM_ID } } @@ -1670,8 +1670,8 @@ group Root { Complex FFT:test_cfft_q15 -> CFFT_PARAM_ID Real FFT:test_rfft_q15 -> RFFT_PARAM_ID DCT4:test_dct4_q15 -> DCT_PARAM_ID - Radix 4 Complex FFT:test_cfft_radix4_q15 -> CFFT4_PARAM_ID - Radix 2 Complex FFT:test_cfft_radix2_q15 -> CFFT_PARAM_ID + Scalar Radix 4 Complex FFT:test_cfft_radix4_q15 -> CFFT4_PARAM_ID + Scalar Radix 2 Complex FFT:test_cfft_radix2_q15 -> CFFT_PARAM_ID } } } diff --git a/Testing/bench_f16.txt b/Testing/bench_f16.txt index d5425849..7199d1df 100755 --- a/Testing/bench_f16.txt +++ b/Testing/bench_f16.txt @@ -584,8 +584,8 @@ group Root { Functions { Complex FFT:test_cfft_f16 -> CFFT_PARAM_ID Real FFT:test_rfft_f16 -> RFFT_PARAM_ID - Radix 4 Complex FFT:test_cfft_radix4_f16 -> CFFT4_PARAM_ID - Radix 2 Complex FFT:test_cfft_radix2_f16 -> CFFT_PARAM_ID + Scalar Radix 4 Complex FFT:test_cfft_radix4_f16 -> CFFT4_PARAM_ID + Scalar Radix 2 Complex FFT:test_cfft_radix2_f16 -> CFFT_PARAM_ID } }