From 8967816df2b16192a435cbdfaa192138a5025810 Mon Sep 17 00:00:00 2001 From: Christophe Favergeon Date: Mon, 26 Apr 2021 15:05:54 +0200 Subject: [PATCH] CMSIS-DSP: Improved Helium implementation of fir q7. --- Source/FilteringFunctions/arm_fir_q7.c | 44 +++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 4 deletions(-) mode change 100644 => 100755 Source/FilteringFunctions/arm_fir_q7.c diff --git a/Source/FilteringFunctions/arm_fir_q7.c b/Source/FilteringFunctions/arm_fir_q7.c old mode 100644 new mode 100755 index 3b5d8426..e2972cdb --- a/Source/FilteringFunctions/arm_fir_q7.c +++ b/Source/FilteringFunctions/arm_fir_q7.c @@ -141,7 +141,27 @@ while (blkCnt > 0); \ } -static void arm_fir_q7_17_32_mve(const arm_fir_instance_q7 * S, + +static void arm_fir_q7_49_64_mve(const arm_fir_instance_q7 * S, + const q7_t * __restrict pSrc, + q7_t * __restrict pDst, uint32_t blockSize) +{ + #define NBTAPS 64 + FIR_Q7_MAIN_CORE(); + #undef NBTAPS +} + + +void arm_fir_q7_33_48_mve(const arm_fir_instance_q7 * S, + const q7_t * __restrict pSrc, + q7_t * __restrict pDst, uint32_t blockSize) +{ + #define NBTAPS 48 + FIR_Q7_MAIN_CORE(); + #undef NBTAPS +} + +static void arm_fir_q7_17_32_mve(const arm_fir_instance_q7 * S, const q7_t * __restrict pSrc, q7_t * __restrict pDst, uint32_t blockSize) { @@ -151,8 +171,8 @@ static void arm_fir_q7_17_32_mve(const arm_fir_instance_q7 * S, } -void arm_fir_q7_1_16_mve(const arm_fir_instance_q7 * S, - const q7_t * __restrict pSrc, +void arm_fir_q7_1_16_mve(const arm_fir_instance_q7 * S, + const q7_t * __restrict pSrc, q7_t * __restrict pDst, uint32_t blockSize) { #define NBTAPS 16 @@ -196,6 +216,22 @@ void arm_fir_q7( arm_fir_q7_17_32_mve(S, pSrc, pDst, blockSize); return; } + else if (numTaps <= 48) + { + /* + * [33 to 48 taps] specialized routine + */ + arm_fir_q7_33_48_mve(S, pSrc, pDst, blockSize); + return; + } + else if (numTaps <= 64) + { + /* + * [49 to 64 taps] specialized routine + */ + arm_fir_q7_49_64_mve(S, pSrc, pDst, blockSize); + return; + } /* * pState points to state array which contains previous frame (numTaps - 1) samples @@ -607,7 +643,7 @@ void arm_fir_q7( { acc0 += (q15_t) * (px++) * (*(pb++)); i--; - } + } /* The result is in 2.14 format. Convert to 1.7 Then store the output in the destination buffer. */