From b1c556034418f0ba8d0f99bc108f30c3a7f64675 Mon Sep 17 00:00:00 2001 From: Christophe Favergeon Date: Wed, 21 Apr 2021 09:34:59 +0200 Subject: [PATCH] CMSIS-DSP: Unrolled and DSP versions of absmin and absmax. --- Source/StatisticsFunctions/arm_absmax_f16.c | 91 ++++++++++++++++++++- Source/StatisticsFunctions/arm_absmax_f32.c | 91 ++++++++++++++++++++- Source/StatisticsFunctions/arm_absmax_q15.c | 90 ++++++++++++++++++++ Source/StatisticsFunctions/arm_absmax_q31.c | 90 +++++++++++++++++++- Source/StatisticsFunctions/arm_absmax_q7.c | 90 +++++++++++++++++++- Source/StatisticsFunctions/arm_absmin_f16.c | 91 ++++++++++++++++++++- Source/StatisticsFunctions/arm_absmin_f32.c | 90 +++++++++++++++++++- Source/StatisticsFunctions/arm_absmin_q15.c | 91 ++++++++++++++++++++- Source/StatisticsFunctions/arm_absmin_q31.c | 90 +++++++++++++++++++- Source/StatisticsFunctions/arm_absmin_q7.c | 91 ++++++++++++++++++++- Testing/CMakeLists.txt | 25 ++++-- 11 files changed, 911 insertions(+), 19 deletions(-) diff --git a/Source/StatisticsFunctions/arm_absmax_f16.c b/Source/StatisticsFunctions/arm_absmax_f16.c index d6445ee6..1f82d51f 100755 --- a/Source/StatisticsFunctions/arm_absmax_f16.c +++ b/Source/StatisticsFunctions/arm_absmax_f16.c @@ -51,7 +51,94 @@ @return none */ - +#if defined(ARM_MATH_LOOPUNROLL) +void arm_absmax_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult, + uint32_t * pIndex) +{ + float16_t cur_absmax, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0.0f16) ? out : -out; \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmax to next consecutive values one by one */ \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax; \ + /* compare for the extrema value */ \ + if (cur_absmax > out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmax; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax; \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax; \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax; \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax; \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else void arm_absmax_f16( const float16_t * pSrc, uint32_t blockSize, @@ -91,7 +178,7 @@ void arm_absmax_f16( *pResult = out; *pIndex = outIndex; } - +#endif /* defined(ARM_MATH_LOOPUNROLL) */ /** @} end of AbsMax group */ diff --git a/Source/StatisticsFunctions/arm_absmax_f32.c b/Source/StatisticsFunctions/arm_absmax_f32.c index 18cb2972..3367f730 100755 --- a/Source/StatisticsFunctions/arm_absmax_f32.c +++ b/Source/StatisticsFunctions/arm_absmax_f32.c @@ -55,7 +55,94 @@ @return none */ - +#if defined(ARM_MATH_LOOPUNROLL) +void arm_absmax_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ + float32_t cur_absmax, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0.0f) ? out : -out; \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmax to next consecutive values one by one */ \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \ + /* compare for the extrema value */ \ + if (cur_absmax > out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmax; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else void arm_absmax_f32( const float32_t * pSrc, uint32_t blockSize, @@ -98,7 +185,7 @@ void arm_absmax_f32( *pResult = out; *pIndex = outIndex; } - +#endif /* defined(ARM_MATH_LOOPUNROLL) */ /** @} end of AbsMax group */ diff --git a/Source/StatisticsFunctions/arm_absmax_q15.c b/Source/StatisticsFunctions/arm_absmax_q15.c index a807590e..8473fb67 100755 --- a/Source/StatisticsFunctions/arm_absmax_q15.c +++ b/Source/StatisticsFunctions/arm_absmax_q15.c @@ -44,6 +44,94 @@ @return none */ +#if defined(ARM_MATH_DSP) +void arm_absmax_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex) +{ + q15_t cur_absmax, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q15_t)__QSUB16(0, out); \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmax to next consecutive values one by one */ \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \ + /* compare for the extrema value */ \ + if (cur_absmax > out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmax; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else void arm_absmax_q15( const q15_t * pSrc, uint32_t blockSize, @@ -84,6 +172,8 @@ void arm_absmax_q15( *pResult = out; *pIndex = outIndex; } +#endif /* defined(ARM_MATH_DSP) */ + /** @} end of AbsMax group */ diff --git a/Source/StatisticsFunctions/arm_absmax_q31.c b/Source/StatisticsFunctions/arm_absmax_q31.c index 3f43c4a4..f8a99fae 100755 --- a/Source/StatisticsFunctions/arm_absmax_q31.c +++ b/Source/StatisticsFunctions/arm_absmax_q31.c @@ -44,6 +44,94 @@ @return none */ +#if defined(ARM_MATH_DSP) +void arm_absmax_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex) +{ + q31_t cur_absmax, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q31_t)__QSUB(0, out); \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmax to next consecutive values one by one */ \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \ + /* compare for the extrema value */ \ + if (cur_absmax > out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmax; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else void arm_absmax_q31( const q31_t * pSrc, uint32_t blockSize, @@ -85,7 +173,7 @@ void arm_absmax_q31( *pResult = out; *pIndex = outIndex; } - +#endif /* defined(ARM_MATH_DSP) */ /** @} end of AbsMax group */ diff --git a/Source/StatisticsFunctions/arm_absmax_q7.c b/Source/StatisticsFunctions/arm_absmax_q7.c index 74b8deb3..acf0ebda 100755 --- a/Source/StatisticsFunctions/arm_absmax_q7.c +++ b/Source/StatisticsFunctions/arm_absmax_q7.c @@ -43,7 +43,94 @@ @param[out] pIndex index of maximum value returned here @return none */ - +#if defined(ARM_MATH_DSP) +void arm_absmax_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ + q7_t cur_absmax, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmax to next consecutive values one by one */ \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ + /* compare for the extrema value */ \ + if (cur_absmax > out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmax; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else void arm_absmax_q7( const q7_t * pSrc, uint32_t blockSize, @@ -85,6 +172,7 @@ void arm_absmax_q7( *pResult = out; *pIndex = outIndex; } +#endif /* defined(ARM_MATH_DSP) */ /** @} end of AbsMax group diff --git a/Source/StatisticsFunctions/arm_absmin_f16.c b/Source/StatisticsFunctions/arm_absmin_f16.c index 8b883315..84200cb4 100755 --- a/Source/StatisticsFunctions/arm_absmin_f16.c +++ b/Source/StatisticsFunctions/arm_absmin_f16.c @@ -53,7 +53,94 @@ */ - +#if defined(ARM_MATH_LOOPUNROLL) +void arm_absmin_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult, + uint32_t * pIndex) +{ + float16_t cur_absmin, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0.0f16) ? out : -out; \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmin to next consecutive values one by one */ \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin; \ + /* compare for the extrema value */ \ + if (cur_absmin < out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmin; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin; \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin; \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin; \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin; \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else void arm_absmin_f16( const float16_t * pSrc, uint32_t blockSize, @@ -93,7 +180,7 @@ void arm_absmin_f16( *pResult = out; *pIndex = outIndex; } - +#endif /* defined(ARM_MATH_LOOPUNROLL) */ /** @} end of AbsMin group */ diff --git a/Source/StatisticsFunctions/arm_absmin_f32.c b/Source/StatisticsFunctions/arm_absmin_f32.c index 51fc438d..4a2ec2a6 100755 --- a/Source/StatisticsFunctions/arm_absmin_f32.c +++ b/Source/StatisticsFunctions/arm_absmin_f32.c @@ -57,7 +57,94 @@ @return none */ - +#if defined(ARM_MATH_LOOPUNROLL) +void arm_absmin_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ + float32_t cur_absmin, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0.0f) ? out : -out; \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmin to next consecutive values one by one */ \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \ + /* compare for the extrema value */ \ + if (cur_absmin < out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmin; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else void arm_absmin_f32( const float32_t * pSrc, uint32_t blockSize, @@ -98,6 +185,7 @@ void arm_absmin_f32( *pIndex = outIndex; } +#endif /* defined(ARM_MATH_LOOPUNROLL) */ /** @} end of AbsMin group */ diff --git a/Source/StatisticsFunctions/arm_absmin_q15.c b/Source/StatisticsFunctions/arm_absmin_q15.c index bc174f37..b6b1fee5 100755 --- a/Source/StatisticsFunctions/arm_absmin_q15.c +++ b/Source/StatisticsFunctions/arm_absmin_q15.c @@ -37,7 +37,7 @@ */ /** - @brief Minimum value of absolute values a Q15 vector. + @brief Minimum value of absolute values of a Q15 vector. @param[in] pSrc points to the input vector @param[in] blockSize number of samples in input vector @param[out] pResult minimum value returned here @@ -45,6 +45,94 @@ @return none */ +#if defined(ARM_MATH_DSP) +void arm_absmin_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex) +{ + q15_t cur_absmin, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q15_t)__QSUB16(0, out); \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmin to next consecutive values one by one */ \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \ + /* compare for the extrema value */ \ + if (cur_absmin < out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmin; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else void arm_absmin_q15( const q15_t * pSrc, uint32_t blockSize, @@ -86,6 +174,7 @@ void arm_absmin_q15( *pResult = out; *pIndex = outIndex; } +#endif /* defined(ARM_MATH_DSP) */ /** @} end of AbsMin group diff --git a/Source/StatisticsFunctions/arm_absmin_q31.c b/Source/StatisticsFunctions/arm_absmin_q31.c index f7056975..c02523e3 100755 --- a/Source/StatisticsFunctions/arm_absmin_q31.c +++ b/Source/StatisticsFunctions/arm_absmin_q31.c @@ -45,6 +45,94 @@ @return none */ +#if defined(ARM_MATH_DSP) +void arm_absmin_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex) +{ + q31_t cur_absmin, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q31_t)__QSUB(0, out); \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmin to next consecutive values one by one */ \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \ + /* compare for the extrema value */ \ + if (cur_absmin < out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmin; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else void arm_absmin_q31( const q31_t * pSrc, uint32_t blockSize, @@ -85,7 +173,7 @@ void arm_absmin_q31( *pResult = out; *pIndex = outIndex; } - +#endif /* defined(ARM_MATH_DSP) */ /** @} end of AbsMin group */ diff --git a/Source/StatisticsFunctions/arm_absmin_q7.c b/Source/StatisticsFunctions/arm_absmin_q7.c index 11ef6563..a0f1fb73 100755 --- a/Source/StatisticsFunctions/arm_absmin_q7.c +++ b/Source/StatisticsFunctions/arm_absmin_q7.c @@ -44,7 +44,94 @@ @param[out] pIndex index of minimum value returned here @return none */ - +#if defined(ARM_MATH_DSP) +void arm_absmin_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ + q7_t cur_absmin, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmin to next consecutive values one by one */ \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \ + /* compare for the extrema value */ \ + if (cur_absmin < out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmin; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else void arm_absmin_q7( const q7_t * pSrc, uint32_t blockSize, @@ -85,7 +172,7 @@ void arm_absmin_q7( *pResult = out; *pIndex = outIndex; } - +#endif /* defined(ARM_MATH_DSP) */ /** @} end of AbsMin group */ diff --git a/Testing/CMakeLists.txt b/Testing/CMakeLists.txt index d56e5b0a..1826b954 100644 --- a/Testing/CMakeLists.txt +++ b/Testing/CMakeLists.txt @@ -82,6 +82,8 @@ option(REFLIB "Use already built reference lib" OFF) option(EMBEDDED "Embedded Mode" ON) option(FLOAT16TESTS "Float16 tests" OFF) +option(ALLTESTS "All tests including Float16 tests" OFF) + option(MICROBENCH "Micro benchmarks" OFF) option(EXTERNAL "External benchmarks or tests" OFF) option(CACHEANALYSIS "Build with cache analysis mode enabled" OFF) @@ -211,7 +213,7 @@ set (NNSRC target_include_directories(TestingLib PRIVATE Include/Benchmarks) endif() - if ((NOT ARMAC5) AND (FLOAT16TESTS) AND ((FLOAT16) OR (MVEF) OR (HELIUM) OR (NEON) OR (NEONEXPERIMENTAL))) + if ((NOT ARMAC5) AND (FLOAT16TESTS OR ALLTESTS) AND ((FLOAT16) OR (MVEF) OR (HELIUM) OR (NEON) OR (NEONEXPERIMENTAL))) set(TESTSRC16 Source/Benchmarks/BasicMathsBenchmarksF16.cpp Source/Benchmarks/ComplexMathsBenchmarksF16.cpp Source/Benchmarks/BayesF16.cpp @@ -376,7 +378,7 @@ set(TESTSRC ) -if ((NOT ARMAC5) AND (FLOAT16TESTS) AND ((FLOAT16) OR (MVEF) OR (HELIUM) OR (NEON) OR (NEONEXPERIMENTAL))) +if ((NOT ARMAC5) AND (FLOAT16TESTS OR ALLTESTS) AND ((FLOAT16) OR (MVEF) OR (HELIUM) OR (NEON) OR (NEONEXPERIMENTAL))) set(TESTSRC16 Source/Tests/BasicTestsF16.cpp Source/Tests/ComplexTestsF16.cpp @@ -427,13 +429,24 @@ endif() disableOptimization(TestingLib) disableOptimization(FrameworkLib) -## Only build f16 version when running float16tests -if ((NOT ARMAC5) AND (FLOAT16TESTS) AND ((FLOAT16) OR (MVEF) OR (HELIUM) OR (NEON) OR (NEONEXPERIMENTAL))) -target_sources(TestingLib PRIVATE ${TESTSRC16}) +## Only build f16 version when running float16tests or all tests +## and float16 are supported +if (ARMAC5) + target_sources(TestingLib PRIVATE ${TESTSRC}) +elseif ((FLOAT16) OR (MVEF) OR (HELIUM) OR (NEON) OR (NEONEXPERIMENTAL)) + if (ALLTESTS) + target_sources(TestingLib PRIVATE ${TESTSRC16}) + target_sources(TestingLib PRIVATE ${TESTSRC}) + elseif (FLOAT16TESTS) + target_sources(TestingLib PRIVATE ${TESTSRC16}) + else() + target_sources(TestingLib PRIVATE ${TESTSRC}) + endif() else() -target_sources(TestingLib PRIVATE ${TESTSRC}) + target_sources(TestingLib PRIVATE ${TESTSRC}) endif() + if(NN) target_sources(TestingLib PRIVATE ${NNSRC}) endif()