diff --git a/Source/TransformFunctions/arm_cfft_f16.c b/Source/TransformFunctions/arm_cfft_f16.c index 26dae5ff..edfc0b4a 100755 --- a/Source/TransformFunctions/arm_cfft_f16.c +++ b/Source/TransformFunctions/arm_cfft_f16.c @@ -108,28 +108,26 @@ static void _arm_radix4_butterfly_f16_mve(const arm_cfft_instance_f16 * S,float1 n2 >>= 2u; for (int k = fftLen / 4u; k > 1; k >>= 2) { + float16_t const *p_rearranged_twiddle_tab_stride1 = + &S->rearranged_twiddle_stride1[ + S->rearranged_twiddle_tab_stride1_arr[stage]]; + float16_t const *p_rearranged_twiddle_tab_stride2 = + &S->rearranged_twiddle_stride2[ + S->rearranged_twiddle_tab_stride2_arr[stage]]; + float16_t const *p_rearranged_twiddle_tab_stride3 = + &S->rearranged_twiddle_stride3[ + S->rearranged_twiddle_tab_stride3_arr[stage]]; + float16_t * pBase = pSrc; for (int i = 0; i < iter; i++) { - float16_t const *p_rearranged_twiddle_tab_stride1 = - &S->rearranged_twiddle_stride1[ - S->rearranged_twiddle_tab_stride1_arr[stage]]; - float16_t const *p_rearranged_twiddle_tab_stride2 = - &S->rearranged_twiddle_stride2[ - S->rearranged_twiddle_tab_stride2_arr[stage]]; - float16_t const *p_rearranged_twiddle_tab_stride3 = - &S->rearranged_twiddle_stride3[ - S->rearranged_twiddle_tab_stride3_arr[stage]]; - float16_t const *pW1, *pW2, *pW3; - float16_t *inA = pSrc + CMPLX_DIM * i * n1; - float16_t *inB = inA + n2 * CMPLX_DIM; - float16_t *inC = inB + n2 * CMPLX_DIM; - float16_t *inD = inC + n2 * CMPLX_DIM; - f16x8_t vecW; - - - pW1 = p_rearranged_twiddle_tab_stride1; - pW2 = p_rearranged_twiddle_tab_stride2; - pW3 = p_rearranged_twiddle_tab_stride3; + float16_t *inA = pBase; + float16_t *inB = inA + n2 * CMPLX_DIM; + float16_t *inC = inB + n2 * CMPLX_DIM; + float16_t *inD = inC + n2 * CMPLX_DIM; + float16_t const *pW1 = p_rearranged_twiddle_tab_stride1; + float16_t const *pW2 = p_rearranged_twiddle_tab_stride2; + float16_t const *pW3 = p_rearranged_twiddle_tab_stride3; + f16x8_t vecW; blkCnt = n2 / 4; /* @@ -198,6 +196,7 @@ static void _arm_radix4_butterfly_f16_mve(const arm_cfft_instance_f16 * S,float1 blkCnt--; } + pBase += CMPLX_DIM * n1; } n1 = n2; n2 >>= 2u; @@ -300,7 +299,6 @@ static void _arm_radix4_butterfly_inverse_f16_mve(const arm_cfft_instance_f16 * f16x8_t vecTmp0, vecTmp1; f16x8_t vecSum0, vecDiff0, vecSum1, vecDiff1; f16x8_t vecA, vecB, vecC, vecD; - f16x8_t vecW; uint32_t blkCnt; uint32_t n1, n2; uint32_t stage = 0; @@ -317,26 +315,27 @@ static void _arm_radix4_butterfly_inverse_f16_mve(const arm_cfft_instance_f16 * n2 >>= 2u; for (int k = fftLen / 4; k > 1; k >>= 2) { + float16_t const *p_rearranged_twiddle_tab_stride1 = + &S->rearranged_twiddle_stride1[ + S->rearranged_twiddle_tab_stride1_arr[stage]]; + float16_t const *p_rearranged_twiddle_tab_stride2 = + &S->rearranged_twiddle_stride2[ + S->rearranged_twiddle_tab_stride2_arr[stage]]; + float16_t const *p_rearranged_twiddle_tab_stride3 = + &S->rearranged_twiddle_stride3[ + S->rearranged_twiddle_tab_stride3_arr[stage]]; + + float16_t * pBase = pSrc; for (int i = 0; i < iter; i++) { - float16_t const *p_rearranged_twiddle_tab_stride1 = - &S->rearranged_twiddle_stride1[ - S->rearranged_twiddle_tab_stride1_arr[stage]]; - float16_t const *p_rearranged_twiddle_tab_stride2 = - &S->rearranged_twiddle_stride2[ - S->rearranged_twiddle_tab_stride2_arr[stage]]; - float16_t const *p_rearranged_twiddle_tab_stride3 = - &S->rearranged_twiddle_stride3[ - S->rearranged_twiddle_tab_stride3_arr[stage]]; - float16_t const *pW1, *pW2, *pW3; - float16_t *inA = pSrc + CMPLX_DIM * i * n1; - float16_t *inB = inA + n2 * CMPLX_DIM; - float16_t *inC = inB + n2 * CMPLX_DIM; - float16_t *inD = inC + n2 * CMPLX_DIM; - - pW1 = p_rearranged_twiddle_tab_stride1; - pW2 = p_rearranged_twiddle_tab_stride2; - pW3 = p_rearranged_twiddle_tab_stride3; + float16_t *inA = pBase; + float16_t *inB = inA + n2 * CMPLX_DIM; + float16_t *inC = inB + n2 * CMPLX_DIM; + float16_t *inD = inC + n2 * CMPLX_DIM; + float16_t const *pW1 = p_rearranged_twiddle_tab_stride1; + float16_t const *pW2 = p_rearranged_twiddle_tab_stride2; + float16_t const *pW3 = p_rearranged_twiddle_tab_stride3; + f16x8_t vecW; blkCnt = n2 / 4; /* @@ -404,6 +403,7 @@ static void _arm_radix4_butterfly_inverse_f16_mve(const arm_cfft_instance_f16 * blkCnt--; } + pBase += CMPLX_DIM * n1; } n1 = n2; n2 >>= 2u; diff --git a/Source/TransformFunctions/arm_cfft_f32.c b/Source/TransformFunctions/arm_cfft_f32.c index 859e1c6f..0b33e8ea 100755 --- a/Source/TransformFunctions/arm_cfft_f32.c +++ b/Source/TransformFunctions/arm_cfft_f32.c @@ -91,13 +91,13 @@ static float32_t arm_inverse_fft_length_f32(uint16_t fftLen) static void _arm_radix4_butterfly_f32_mve(const arm_cfft_instance_f32 * S,float32_t * pSrc, uint32_t fftLen) { - f32x4_t vecTmp0, vecTmp1; - f32x4_t vecSum0, vecDiff0, vecSum1, vecDiff1; - f32x4_t vecA, vecB, vecC, vecD; - uint32_t blkCnt; - uint32_t n1, n2; - uint32_t stage = 0; - int32_t iter = 1; + f32x4_t vecTmp0, vecTmp1; + f32x4_t vecSum0, vecDiff0, vecSum1, vecDiff1; + f32x4_t vecA, vecB, vecC, vecD; + uint32_t blkCnt; + uint32_t n1, n2; + uint32_t stage = 0; + int32_t iter = 1; static const int32_t strides[4] = { (0 - 16) * (int32_t)sizeof(q31_t *), (1 - 16) * (int32_t)sizeof(q31_t *), @@ -110,29 +110,28 @@ static void _arm_radix4_butterfly_f32_mve(const arm_cfft_instance_f32 * S,float3 n2 >>= 2u; for (int k = fftLen / 4u; k > 1; k >>= 2) { + float32_t const *p_rearranged_twiddle_tab_stride1 = + &S->rearranged_twiddle_stride1[ + S->rearranged_twiddle_tab_stride1_arr[stage]]; + float32_t const *p_rearranged_twiddle_tab_stride2 = + &S->rearranged_twiddle_stride2[ + S->rearranged_twiddle_tab_stride2_arr[stage]]; + float32_t const *p_rearranged_twiddle_tab_stride3 = + &S->rearranged_twiddle_stride3[ + S->rearranged_twiddle_tab_stride3_arr[stage]]; + + float32_t * pBase = pSrc; for (int i = 0; i < iter; i++) { - float32_t const *p_rearranged_twiddle_tab_stride1 = - &S->rearranged_twiddle_stride1[ - S->rearranged_twiddle_tab_stride1_arr[stage]]; - float32_t const *p_rearranged_twiddle_tab_stride2 = - &S->rearranged_twiddle_stride2[ - S->rearranged_twiddle_tab_stride2_arr[stage]]; - float32_t const *p_rearranged_twiddle_tab_stride3 = - &S->rearranged_twiddle_stride3[ - S->rearranged_twiddle_tab_stride3_arr[stage]]; - float32_t const *pW1, *pW2, *pW3; - float32_t *inA = pSrc + CMPLX_DIM * i * n1; - float32_t *inB = inA + n2 * CMPLX_DIM; - float32_t *inC = inB + n2 * CMPLX_DIM; - float32_t *inD = inC + n2 * CMPLX_DIM; + float32_t *inA = pBase; + float32_t *inB = inA + n2 * CMPLX_DIM; + float32_t *inC = inB + n2 * CMPLX_DIM; + float32_t *inD = inC + n2 * CMPLX_DIM; + float32_t const *pW1 = p_rearranged_twiddle_tab_stride1; + float32_t const *pW2 = p_rearranged_twiddle_tab_stride2; + float32_t const *pW3 = p_rearranged_twiddle_tab_stride3; f32x4_t vecW; - - pW1 = p_rearranged_twiddle_tab_stride1; - pW2 = p_rearranged_twiddle_tab_stride2; - pW3 = p_rearranged_twiddle_tab_stride3; - blkCnt = n2 / 2; /* * load 2 f32 complex pair @@ -200,6 +199,7 @@ static void _arm_radix4_butterfly_f32_mve(const arm_cfft_instance_f32 * S,float3 blkCnt--; } + pBase += CMPLX_DIM * n1; } n1 = n2; n2 >>= 2u; @@ -302,7 +302,6 @@ static void _arm_radix4_butterfly_inverse_f32_mve(const arm_cfft_instance_f32 * f32x4_t vecTmp0, vecTmp1; f32x4_t vecSum0, vecDiff0, vecSum1, vecDiff1; f32x4_t vecA, vecB, vecC, vecD; - f32x4_t vecW; uint32_t blkCnt; uint32_t n1, n2; uint32_t stage = 0; @@ -319,26 +318,27 @@ static void _arm_radix4_butterfly_inverse_f32_mve(const arm_cfft_instance_f32 * n2 >>= 2u; for (int k = fftLen / 4; k > 1; k >>= 2) { + float32_t const *p_rearranged_twiddle_tab_stride1 = + &S->rearranged_twiddle_stride1[ + S->rearranged_twiddle_tab_stride1_arr[stage]]; + float32_t const *p_rearranged_twiddle_tab_stride2 = + &S->rearranged_twiddle_stride2[ + S->rearranged_twiddle_tab_stride2_arr[stage]]; + float32_t const *p_rearranged_twiddle_tab_stride3 = + &S->rearranged_twiddle_stride3[ + S->rearranged_twiddle_tab_stride3_arr[stage]]; + + float32_t * pBase = pSrc; for (int i = 0; i < iter; i++) { - float32_t const *p_rearranged_twiddle_tab_stride1 = - &S->rearranged_twiddle_stride1[ - S->rearranged_twiddle_tab_stride1_arr[stage]]; - float32_t const *p_rearranged_twiddle_tab_stride2 = - &S->rearranged_twiddle_stride2[ - S->rearranged_twiddle_tab_stride2_arr[stage]]; - float32_t const *p_rearranged_twiddle_tab_stride3 = - &S->rearranged_twiddle_stride3[ - S->rearranged_twiddle_tab_stride3_arr[stage]]; - float32_t const *pW1, *pW2, *pW3; - float32_t *inA = pSrc + CMPLX_DIM * i * n1; - float32_t *inB = inA + n2 * CMPLX_DIM; - float32_t *inC = inB + n2 * CMPLX_DIM; - float32_t *inD = inC + n2 * CMPLX_DIM; - - pW1 = p_rearranged_twiddle_tab_stride1; - pW2 = p_rearranged_twiddle_tab_stride2; - pW3 = p_rearranged_twiddle_tab_stride3; + float32_t *inA = pBase; + float32_t *inB = inA + n2 * CMPLX_DIM; + float32_t *inC = inB + n2 * CMPLX_DIM; + float32_t *inD = inC + n2 * CMPLX_DIM; + float32_t const *pW1 = p_rearranged_twiddle_tab_stride1; + float32_t const *pW2 = p_rearranged_twiddle_tab_stride2; + float32_t const *pW3 = p_rearranged_twiddle_tab_stride3; + f32x4_t vecW; blkCnt = n2 / 2; /* @@ -406,6 +406,7 @@ static void _arm_radix4_butterfly_inverse_f32_mve(const arm_cfft_instance_f32 * blkCnt--; } + pBase += CMPLX_DIM * n1; } n1 = n2; n2 >>= 2u; diff --git a/Source/TransformFunctions/arm_cfft_q15.c b/Source/TransformFunctions/arm_cfft_q15.c index 0f981f26..1ee42e0a 100644 --- a/Source/TransformFunctions/arm_cfft_q15.c +++ b/Source/TransformFunctions/arm_cfft_q15.c @@ -41,7 +41,6 @@ static void _arm_radix4_butterfly_q15_mve( q15x8_t vecTmp0, vecTmp1; q15x8_t vecSum0, vecDiff0, vecSum1, vecDiff1; q15x8_t vecA, vecB, vecC, vecD; - q15x8_t vecW; uint32_t blkCnt; uint32_t n1, n2; uint32_t stage = 0; @@ -61,25 +60,26 @@ static void _arm_radix4_butterfly_q15_mve( for (int k = fftLen / 4u; k > 1; k >>= 2u) { + q15_t const *p_rearranged_twiddle_tab_stride2 = + &S->rearranged_twiddle_stride2[ + S->rearranged_twiddle_tab_stride2_arr[stage]]; + q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[ + S->rearranged_twiddle_tab_stride3_arr[stage]]; + q15_t const *p_rearranged_twiddle_tab_stride1 = + &S->rearranged_twiddle_stride1[ + S->rearranged_twiddle_tab_stride1_arr[stage]]; + + q15_t * pBase = pSrc; for (int i = 0; i < iter; i++) { - q15_t const *p_rearranged_twiddle_tab_stride2 = - &S->rearranged_twiddle_stride2[ - S->rearranged_twiddle_tab_stride2_arr[stage]]; - q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[ - S->rearranged_twiddle_tab_stride3_arr[stage]]; - q15_t const *p_rearranged_twiddle_tab_stride1 = - &S->rearranged_twiddle_stride1[ - S->rearranged_twiddle_tab_stride1_arr[stage]]; - q15_t const *pW1, *pW2, *pW3; - q15_t *inA = pSrc + CMPLX_DIM * i * n1; + q15_t *inA = pBase; q15_t *inB = inA + n2 * CMPLX_DIM; q15_t *inC = inB + n2 * CMPLX_DIM; q15_t *inD = inC + n2 * CMPLX_DIM; - - pW1 = p_rearranged_twiddle_tab_stride1; - pW2 = p_rearranged_twiddle_tab_stride2; - pW3 = p_rearranged_twiddle_tab_stride3; + q15_t const *pW1 = p_rearranged_twiddle_tab_stride1; + q15_t const *pW2 = p_rearranged_twiddle_tab_stride2; + q15_t const *pW3 = p_rearranged_twiddle_tab_stride3; + q15x8_t vecW; blkCnt = n2 / 4; /* @@ -147,6 +147,7 @@ static void _arm_radix4_butterfly_q15_mve( blkCnt--; } + pBase += CMPLX_DIM * n1; } n1 = n2; n2 >>= 2u; @@ -276,7 +277,6 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S q15x8_t vecTmp0, vecTmp1; q15x8_t vecSum0, vecDiff0, vecSum1, vecDiff1; q15x8_t vecA, vecB, vecC, vecD; - q15x8_t vecW; uint32_t blkCnt; uint32_t n1, n2; uint32_t stage = 0; @@ -297,25 +297,27 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S for (int k = fftLen / 4u; k > 1; k >>= 2u) { + q15_t const *p_rearranged_twiddle_tab_stride2 = + &S->rearranged_twiddle_stride2[ + S->rearranged_twiddle_tab_stride2_arr[stage]]; + q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[ + S->rearranged_twiddle_tab_stride3_arr[stage]]; + q15_t const *p_rearranged_twiddle_tab_stride1 = + &S->rearranged_twiddle_stride1[ + S->rearranged_twiddle_tab_stride1_arr[stage]]; + + q15_t * pBase = pSrc; for (int i = 0; i < iter; i++) { - q15_t const *p_rearranged_twiddle_tab_stride2 = - &S->rearranged_twiddle_stride2[ - S->rearranged_twiddle_tab_stride2_arr[stage]]; - q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[ - S->rearranged_twiddle_tab_stride3_arr[stage]]; - q15_t const *p_rearranged_twiddle_tab_stride1 = - &S->rearranged_twiddle_stride1[ - S->rearranged_twiddle_tab_stride1_arr[stage]]; - q15_t const *pW1, *pW2, *pW3; - q15_t *inA = pSrc + CMPLX_DIM * i * n1; + q15_t *inA = pBase; q15_t *inB = inA + n2 * CMPLX_DIM; q15_t *inC = inB + n2 * CMPLX_DIM; q15_t *inD = inC + n2 * CMPLX_DIM; + q15_t const *pW1 = p_rearranged_twiddle_tab_stride1; + q15_t const *pW2 = p_rearranged_twiddle_tab_stride2; + q15_t const *pW3 = p_rearranged_twiddle_tab_stride3; + q15x8_t vecW; - pW1 = p_rearranged_twiddle_tab_stride1; - pW2 = p_rearranged_twiddle_tab_stride2; - pW3 = p_rearranged_twiddle_tab_stride3; blkCnt = n2 / 4; /* @@ -382,6 +384,7 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S blkCnt--; } + pBase += CMPLX_DIM * n1; } n1 = n2; n2 >>= 2u; diff --git a/Source/TransformFunctions/arm_cfft_q31.c b/Source/TransformFunctions/arm_cfft_q31.c index bfa88c25..78ce5053 100644 --- a/Source/TransformFunctions/arm_cfft_q31.c +++ b/Source/TransformFunctions/arm_cfft_q31.c @@ -43,7 +43,6 @@ static void _arm_radix4_butterfly_q31_mve( q31x4_t vecTmp0, vecTmp1; q31x4_t vecSum0, vecDiff0, vecSum1, vecDiff1; q31x4_t vecA, vecB, vecC, vecD; - q31x4_t vecW; uint32_t blkCnt; uint32_t n1, n2; uint32_t stage = 0; @@ -64,25 +63,27 @@ static void _arm_radix4_butterfly_q31_mve( for (int k = fftLen / 4u; k > 1; k >>= 2u) { + q31_t const *p_rearranged_twiddle_tab_stride2 = + &S->rearranged_twiddle_stride2[ + S->rearranged_twiddle_tab_stride2_arr[stage]]; + q31_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[ + S->rearranged_twiddle_tab_stride3_arr[stage]]; + q31_t const *p_rearranged_twiddle_tab_stride1 = + &S->rearranged_twiddle_stride1[ + S->rearranged_twiddle_tab_stride1_arr[stage]]; + + q31_t * pBase = pSrc; for (int i = 0; i < iter; i++) { - q31_t const *p_rearranged_twiddle_tab_stride2 = - &S->rearranged_twiddle_stride2[ - S->rearranged_twiddle_tab_stride2_arr[stage]]; - q31_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[ - S->rearranged_twiddle_tab_stride3_arr[stage]]; - q31_t const *p_rearranged_twiddle_tab_stride1 = - &S->rearranged_twiddle_stride1[ - S->rearranged_twiddle_tab_stride1_arr[stage]]; - q31_t const *pW1, *pW2, *pW3; - q31_t *inA = pSrc + CMPLX_DIM * i * n1; + q31_t *inA = pBase; q31_t *inB = inA + n2 * CMPLX_DIM; q31_t *inC = inB + n2 * CMPLX_DIM; q31_t *inD = inC + n2 * CMPLX_DIM; + q31_t const *pW1 = p_rearranged_twiddle_tab_stride1; + q31_t const *pW2 = p_rearranged_twiddle_tab_stride2; + q31_t const *pW3 = p_rearranged_twiddle_tab_stride3; + q31x4_t vecW; - pW1 = p_rearranged_twiddle_tab_stride1; - pW2 = p_rearranged_twiddle_tab_stride2; - pW3 = p_rearranged_twiddle_tab_stride3; blkCnt = n2 / 2; /* @@ -149,6 +150,7 @@ static void _arm_radix4_butterfly_q31_mve( blkCnt--; } + pBase += CMPLX_DIM * n1; } n1 = n2; n2 >>= 2u; @@ -293,7 +295,6 @@ static void _arm_radix4_butterfly_inverse_q31_mve( q31x4_t vecTmp0, vecTmp1; q31x4_t vecSum0, vecDiff0, vecSum1, vecDiff1; q31x4_t vecA, vecB, vecC, vecD; - q31x4_t vecW; uint32_t blkCnt; uint32_t n1, n2; uint32_t stage = 0; @@ -313,26 +314,26 @@ static void _arm_radix4_butterfly_inverse_q31_mve( for (int k = fftLen / 4u; k > 1; k >>= 2u) { + q31_t const *p_rearranged_twiddle_tab_stride2 = + &S->rearranged_twiddle_stride2[ + S->rearranged_twiddle_tab_stride2_arr[stage]]; + q31_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[ + S->rearranged_twiddle_tab_stride3_arr[stage]]; + q31_t const *p_rearranged_twiddle_tab_stride1 = + &S->rearranged_twiddle_stride1[ + S->rearranged_twiddle_tab_stride1_arr[stage]]; + + q31_t * pBase = pSrc; for (int i = 0; i < iter; i++) { - q31_t const *p_rearranged_twiddle_tab_stride2 = - &S->rearranged_twiddle_stride2[ - S->rearranged_twiddle_tab_stride2_arr[stage]]; - q31_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[ - S->rearranged_twiddle_tab_stride3_arr[stage]]; - q31_t const *p_rearranged_twiddle_tab_stride1 = - &S->rearranged_twiddle_stride1[ - S->rearranged_twiddle_tab_stride1_arr[stage]]; - - q31_t const *pW1, *pW2, *pW3; - q31_t *inA = pSrc + CMPLX_DIM * i * n1; + q31_t *inA = pBase; q31_t *inB = inA + n2 * CMPLX_DIM; q31_t *inC = inB + n2 * CMPLX_DIM; q31_t *inD = inC + n2 * CMPLX_DIM; - - pW1 = p_rearranged_twiddle_tab_stride1; - pW2 = p_rearranged_twiddle_tab_stride2; - pW3 = p_rearranged_twiddle_tab_stride3; + q31_t const *pW1 = p_rearranged_twiddle_tab_stride1; + q31_t const *pW2 = p_rearranged_twiddle_tab_stride2; + q31_t const *pW3 = p_rearranged_twiddle_tab_stride3; + q31x4_t vecW; blkCnt = n2 / 2; /* @@ -399,6 +400,7 @@ static void _arm_radix4_butterfly_inverse_q31_mve( blkCnt--; } + pBase += CMPLX_DIM * n1; } n1 = n2; n2 >>= 2u;