diff --git a/Source/StatisticsFunctions/arm_accumulate_f16.c b/Source/StatisticsFunctions/arm_accumulate_f16.c index 9ed7c66c..e2a778c4 100755 --- a/Source/StatisticsFunctions/arm_accumulate_f16.c +++ b/Source/StatisticsFunctions/arm_accumulate_f16.c @@ -60,36 +60,6 @@ @param[out] pResult sum value returned here. @return none */ -#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) - -#include "arm_helium_utils.h" - -void arm_accumulate_f16( - const float16_t * pSrc, - uint32_t blockSize, - float16_t * pResult) -{ - int32_t blkCnt; /* loop counters */ - f16x8_t vecSrc; - f16x8_t sumVec = vdupq_n_f16(0.0f16); - - blkCnt = blockSize; - do { - mve_pred16_t p = vctp16q(blkCnt); - - vecSrc = vldrhq_z_f16((float16_t const *) pSrc, p); - sumVec = vaddq_m_f16(sumVec, sumVec, vecSrc, p); - - blkCnt -= 8; - pSrc += 8; - } - while (blkCnt > 0); - - *pResult = vecAddAcrossF16Mve(sumVec); -} - - -#else void arm_accumulate_f16( const float16_t * pSrc, @@ -148,5 +118,4 @@ void arm_accumulate_f16( @} end of Accumulation group */ -#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ diff --git a/Source/StatisticsFunctions/arm_accumulate_f32.c b/Source/StatisticsFunctions/arm_accumulate_f32.c index f967aac5..7fce8f5b 100644 --- a/Source/StatisticsFunctions/arm_accumulate_f32.c +++ b/Source/StatisticsFunctions/arm_accumulate_f32.c @@ -45,50 +45,8 @@ @param[out] pResult sum value returned here. @return none */ -#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) -#include "arm_helium_utils.h" -void arm_accumulate_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult) -{ - uint32_t blkCnt; /* loop counters */ - f32x4_t vecSrc; - f32x4_t sumVec = vdupq_n_f32(0.0f); - float32_t sum = 0.0f; - - /* Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - while (blkCnt > 0U) - { - vecSrc = vldrwq_f32(pSrc); - sumVec = vaddq_f32(sumVec, vecSrc); - - blkCnt --; - pSrc += 4; - } - - sum = vecAddAcrossF32Mve(sumVec); - - /* Tail */ - blkCnt = blockSize & 0x3; - - while (blkCnt > 0U) - { - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ - sum += *pSrc++; - - /* Decrement loop counter */ - blkCnt--; - } - - *pResult = sum; -} - - -#else #if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_accumulate_f32( const float32_t * pSrc, @@ -191,7 +149,7 @@ void arm_accumulate_f32( *pResult = sum ; } #endif /* #if defined(ARM_MATH_NEON) */ -#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + /** @} end of Accumulation group