diff --git a/Include/arm_math.h b/Include/arm_math.h index e4c96f69..e202547a 100644 --- a/Include/arm_math.h +++ b/Include/arm_math.h @@ -3113,8 +3113,20 @@ void arm_mat_init_f32( const q15_t *pTwiddle; /**< points to the Twiddle factor table. */ const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ uint16_t bitRevLength; /**< bit reversal table length. */ +#if defined(ARM_MATH_MVEI) + const uint32_t *rearranged_twiddle_tab_stride1_arr; /**< Per stage reordered twiddle pointer (offset 1) */ \ + const uint32_t *rearranged_twiddle_tab_stride2_arr; /**< Per stage reordered twiddle pointer (offset 2) */ \ + const uint32_t *rearranged_twiddle_tab_stride3_arr; /**< Per stage reordered twiddle pointer (offset 3) */ \ + const q15_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */ \ + const q15_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */ \ + const q15_t *rearranged_twiddle_stride3; +#endif } arm_cfft_instance_q15; +arm_status arm_cfft_init_q15( + arm_cfft_instance_q15 * S, + uint16_t fftLen); + void arm_cfft_q15( const arm_cfft_instance_q15 * S, q15_t * p1, @@ -3209,7 +3221,11 @@ void arm_cfft_q31( uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ const q15_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ const q15_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */ +#if defined(ARM_MATH_MVEI) + arm_cfft_instance_q15 cfftInst; +#else const arm_cfft_instance_q15 *pCfft; /**< points to the complex FFT instance. */ +#endif } arm_rfft_instance_q15; arm_status arm_rfft_init_q15( diff --git a/PythonWrapper/cmsisdsp_pkg/src/fftinit.c b/PythonWrapper/cmsisdsp_pkg/src/fftinit.c index 33dcda39..1ba4cf18 100644 --- a/PythonWrapper/cmsisdsp_pkg/src/fftinit.c +++ b/PythonWrapper/cmsisdsp_pkg/src/fftinit.c @@ -115,76 +115,3 @@ arm_status arm_cfft_init_f64( } -arm_status arm_cfft_init_q15( - arm_cfft_instance_q15 * S, - uint16_t fftLen) -{ - /* Initialise the default arm status */ - arm_status status = ARM_MATH_SUCCESS; - - /* Initialise the FFT length */ - S->fftLen = fftLen; - - /* Initialise the Twiddle coefficient pointer */ - S->pTwiddle = (float32_t *)twiddleCoef_4096; - - - /* Initializations of Instance structure depending on the FFT length */ - switch (S->fftLen) { - /* Initializations of structure parameters for 4096 point FFT */ - case 4096U: - /* Initialise the bit reversal table modifier */ - FFTFXTINIT(q15,4096); - break; - - /* Initializations of structure parameters for 2048 point FFT */ - case 2048U: - /* Initialise the bit reversal table modifier */ - FFTFXTINIT(q15,2048); - - break; - - /* Initializations of structure parameters for 1024 point FFT */ - case 1024U: - /* Initialise the bit reversal table modifier */ - FFTFXTINIT(q15,1024); - - break; - - /* Initializations of structure parameters for 512 point FFT */ - case 512U: - /* Initialise the bit reversal table modifier */ - FFTFXTINIT(q15,512); - break; - - case 256U: - FFTFXTINIT(q15,256); - break; - - case 128U: - FFTFXTINIT(q15,128); - break; - - case 64U: - FFTFXTINIT(q15,64); - break; - - case 32U: - FFTFXTINIT(q15,32); - break; - - case 16U: - /* Initializations of structure parameters for 16 point FFT */ - FFTFXTINIT(q15,16); - break; - - - default: - /* Reporting argument error if fftSize is not valid value */ - status = ARM_MATH_ARGUMENT_ERROR; - break; - } - - - return (status); -} diff --git a/Source/CommonTables/arm_const_structs.c b/Source/CommonTables/arm_const_structs.c index daaddfba..166e8cef 100644 --- a/Source/CommonTables/arm_const_structs.c +++ b/Source/CommonTables/arm_const_structs.c @@ -229,7 +229,6 @@ const arm_cfft_instance_q31 arm_cfft_sR_q31_len4096 = { }; #endif -#endif /* !defined(ARM_MATH_MVEI) */ #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_16) && defined(ARM_TABLE_BITREVIDX_FXT_16)) const arm_cfft_instance_q15 arm_cfft_sR_q15_len16 = { @@ -285,6 +284,8 @@ const arm_cfft_instance_q15 arm_cfft_sR_q15_len4096 = { }; #endif +#endif /* !defined(ARM_MATH_MVEI) */ + /* Structure for real-value inputs */ /* Double precision strucs */ @@ -545,7 +546,6 @@ const arm_rfft_instance_q31 arm_rfft_sR_q31_len8192 = { }; #endif -#endif /* !defined(ARM_MATH_MVEI) */ /* q15_t */ @@ -657,4 +657,7 @@ const arm_rfft_instance_q15 arm_rfft_sR_q15_len8192 = { }; #endif +#endif /* !defined(ARM_MATH_MVEI) */ + + #endif diff --git a/Source/TransformFunctions/CMakeLists.txt b/Source/TransformFunctions/CMakeLists.txt index d7e020c0..42f16cf7 100644 --- a/Source/TransformFunctions/CMakeLists.txt +++ b/Source/TransformFunctions/CMakeLists.txt @@ -38,6 +38,7 @@ if (NOT CONFIGTABLE OR ALLFFT OR CFFT_Q15_16 OR CFFT_Q15_32 OR CFFT_Q15_64 OR CF target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_q15.c) target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c) target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c) +target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q15.c) endif() if (NOT CONFIGTABLE OR ALLFFT OR CFFT_Q31_16 OR CFFT_Q31_32 OR CFFT_Q31_64 OR CFFT_Q31_128 OR CFFT_Q31_256 OR CFFT_Q31_512 @@ -82,6 +83,7 @@ target_sources(CMSISDSPTransform PRIVATE arm_dct4_q15.c) target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q15.c) target_sources(CMSISDSPTransform PRIVATE arm_rfft_q15.c) target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c) +target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q15.c) target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_q15.c) target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c) endif() @@ -117,6 +119,7 @@ if (NOT CONFIGTABLE OR ALLFFT OR RFFT_Q15_32 OR RFFT_Q15_64 OR RFFT_Q15_128 OR R target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q15.c) target_sources(CMSISDSPTransform PRIVATE arm_rfft_q15.c) target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c) +target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q15.c) target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c) endif() diff --git a/Source/TransformFunctions/TransformFunctions.c b/Source/TransformFunctions/TransformFunctions.c index c4732162..769fc836 100644 --- a/Source/TransformFunctions/TransformFunctions.c +++ b/Source/TransformFunctions/TransformFunctions.c @@ -59,3 +59,5 @@ #include "arm_rfft_q15.c" #include "arm_rfft_q31.c" #include "arm_cfft_init_f32.c" +#include "arm_cfft_init_q31.c" +#include "arm_cfft_init_q15.c" diff --git a/Source/TransformFunctions/arm_cfft_init_q15.c b/Source/TransformFunctions/arm_cfft_init_q15.c new file mode 100755 index 00000000..99af18cd --- /dev/null +++ b/Source/TransformFunctions/arm_cfft_init_q15.c @@ -0,0 +1,356 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cfft_init_q15.c + * Description: Initialization function for cfft q15 instance + * + * $Date: 07. January 2020 + * $Revision: V1.7.0 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define FFTINIT(EXT,SIZE) \ + S->bitRevLength = arm_cfft_sR_##EXT##_len##SIZE.bitRevLength; \ + S->pBitRevTable = arm_cfft_sR_##EXT##_len##SIZE.pBitRevTable; \ + S->pTwiddle = arm_cfft_sR_##EXT##_len##SIZE.pTwiddle; + +/** + @addtogroup ComplexFFT + @{ + */ + +/** + @brief Initialization function for the cfft q15 function + @param[in,out] S points to an instance of the floating-point CFFT structure + @param[in] fftLen fft length (number of complex samples) + @return execution status + - \ref ARM_MATH_SUCCESS : Operation successful + - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected + + @par Use of this function is mandatory only for the MVE version of the FFT. + Other versions can still initialize directly the data structure using + variables declared in arm_const_structs.h + */ + +#include "arm_math.h" +#include "arm_common_tables.h" +#include "arm_const_structs.h" + +#if defined(ARM_MATH_MVEI) + +#include "arm_vec_fft.h" +#include "arm_mve_tables.h" + + +arm_status arm_cfft_radix4by2_rearrange_twiddles_q15(arm_cfft_instance_q15 *S, int twidCoefModifier) +{ + + switch (S->fftLen >> (twidCoefModifier - 1)) { + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096) + case 4096U: + S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_4096_q15; + S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_4096_q15; + + S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_4096_q15; + S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_4096_q15; + + S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_4096_q15; + S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_4096_q15; + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_1024) || defined(ARM_TABLE_BITREVIDX_FXT_2048) + case 1024U: + S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_1024_q15; + S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_1024_q15; + + S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_1024_q15; + S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_1024_q15; + + S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_1024_q15; + S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_1024_q15; + break; + #endif + + #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_256) || defined(ARM_TABLE_BITREVIDX_FXT_512) + case 256U: + S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_256_q15; + S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_256_q15; + + S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_256_q15; + S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_256_q15; + + S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_256_q15; + S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_256_q15; + + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_64) || defined(ARM_TABLE_BITREVIDX_FXT_128) + case 64U: + S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_64_q15; + S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_64_q15; + + S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_64_q15; + S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_64_q15; + + S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_64_q15; + S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_64_q15; + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_16) || defined(ARM_TABLE_BITREVIDX_FXT_32) + case 16U: + S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_16_q15; + S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_16_q15; + + S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_16_q15; + S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_16_q15; + + S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_16_q15; + S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_16_q15; + break; +#endif + + default: + return(ARM_MATH_ARGUMENT_ERROR); + break; + /* invalid sizes already filtered */ + } + + return(ARM_MATH_SUCCESS); + +} + + + +arm_status arm_cfft_init_q15( + arm_cfft_instance_q15 * S, + uint16_t fftLen) +{ + + /* Initialise the default arm status */ + arm_status status = ARM_MATH_SUCCESS; + + /* Initialise the FFT length */ + S->fftLen = fftLen; + + /* Initialise the Twiddle coefficient pointer */ + S->pTwiddle = NULL; + + + /* Initializations of Instance structure depending on the FFT length */ + switch (S->fftLen) { + /* Initializations of structure parameters for 4096 point FFT */ +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096) + case 4096U: + /* Initialise the bit reversal table modifier */ + S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH; + S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_4096; + S->pTwiddle = (q15_t *)twiddleCoef_4096_q15; + status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1); + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_2048) + /* Initializations of structure parameters for 2048 point FFT */ + case 2048U: + /* Initialise the bit reversal table modifier */ + S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH; + S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_2048; + S->pTwiddle = (q15_t *)twiddleCoef_2048_q15; + status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 2); + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_1024) + /* Initializations of structure parameters for 1024 point FFT */ + case 1024U: + /* Initialise the bit reversal table modifier */ + S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH; + S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_1024; + S->pTwiddle = (q15_t *)twiddleCoef_1024_q15; + status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1); + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_512) + /* Initializations of structure parameters for 512 point FFT */ + case 512U: + /* Initialise the bit reversal table modifier */ + S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH; + S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_512; + S->pTwiddle = (q15_t *)twiddleCoef_512_q15; + status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 2); + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_256) + case 256U: + S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH; + S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_256; + S->pTwiddle = (q15_t *)twiddleCoef_256_q15; + status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1); + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_128) + case 128U: + S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH; + S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_128; + S->pTwiddle = (q15_t *)twiddleCoef_128_q15; + status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 2); + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_64) + case 64U: + S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH; + S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_64; + S->pTwiddle = (q15_t *)twiddleCoef_64_q15; + status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1); + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_32) + case 32U: + S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH; + S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_32; + S->pTwiddle = (q15_t *)twiddleCoef_32_q15; + status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 2); + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_16) + case 16U: + /* Initializations of structure parameters for 16 point FFT */ + S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH; + S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_16; + S->pTwiddle = (q15_t *)twiddleCoef_16_q15; + status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1); + break; +#endif + + default: + /* Reporting argument error if fftSize is not valid value */ + status = ARM_MATH_ARGUMENT_ERROR; + break; + } + + + return (status); +} +#else +arm_status arm_cfft_init_q15( + arm_cfft_instance_q15 * S, + uint16_t fftLen) +{ + /* Initialise the default arm status */ + arm_status status = ARM_MATH_SUCCESS; + + /* Initialise the FFT length */ + S->fftLen = fftLen; + + /* Initialise the Twiddle coefficient pointer */ + S->pTwiddle = NULL; + + + /* Initializations of Instance structure depending on the FFT length */ + switch (S->fftLen) { +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) && defined(ARM_TABLE_BITREVIDX_FLT_4096)) + /* Initializations of structure parameters for 4096 point FFT */ + case 4096U: + /* Initialise the bit reversal table modifier */ + FFTINIT(q15,4096); + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048)) + /* Initializations of structure parameters for 2048 point FFT */ + case 2048U: + /* Initialise the bit reversal table modifier */ + FFTINIT(q15,2048); + + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024)) + /* Initializations of structure parameters for 1024 point FFT */ + case 1024U: + /* Initialise the bit reversal table modifier */ + FFTINIT(q15,1024); + + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_512) && defined(ARM_TABLE_BITREVIDX_FLT_512)) + /* Initializations of structure parameters for 512 point FFT */ + case 512U: + /* Initialise the bit reversal table modifier */ + FFTINIT(q15,512); + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_256) && defined(ARM_TABLE_BITREVIDX_FLT_256)) + case 256U: + FFTINIT(q15,256); + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_128) && defined(ARM_TABLE_BITREVIDX_FLT_128)) + case 128U: + FFTINIT(q15,128); + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_64) && defined(ARM_TABLE_BITREVIDX_FLT_64)) + case 64U: + FFTINIT(q15,64); + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_32) && defined(ARM_TABLE_BITREVIDX_FLT_32)) + case 32U: + FFTINIT(q15,32); + break; +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_16) && defined(ARM_TABLE_BITREVIDX_FLT_16)) + case 16U: + /* Initializations of structure parameters for 16 point FFT */ + FFTINIT(q15,16); + break; +#endif + + default: + /* Reporting argument error if fftSize is not valid value */ + status = ARM_MATH_ARGUMENT_ERROR; + break; + } + + + return (status); +} +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of ComplexFFT group + */ diff --git a/Source/TransformFunctions/arm_cfft_q15.c b/Source/TransformFunctions/arm_cfft_q15.c index 28ad1e18..1fa9abb0 100644 --- a/Source/TransformFunctions/arm_cfft_q15.c +++ b/Source/TransformFunctions/arm_cfft_q15.c @@ -28,6 +28,621 @@ #include "arm_math.h" +#if defined(ARM_MATH_MVEI) + +#include "arm_vec_fft.h" + + +static void arm_bitreversal_16_inpl_mve( + uint16_t *pSrc, + const uint16_t bitRevLen, + const uint16_t *pBitRevTab) + +{ + uint32_t *src = (uint32_t *)pSrc; + uint32_t blkCnt; /* loop counters */ + uint32x4_t bitRevTabOff; + uint16x8_t one = vdupq_n_u16(1); + + blkCnt = (bitRevLen / 2) / 4; + while (blkCnt > 0U) { + bitRevTabOff = vldrhq_u16(pBitRevTab); + pBitRevTab += 8; + + uint32x4_t bitRevOff1 = vmullbq_int_u16(bitRevTabOff, one); + uint32x4_t bitRevOff2 = vmulltq_int_u16(bitRevTabOff, one); + + bitRevOff1 = bitRevOff1 >> 3; + bitRevOff2 = bitRevOff2 >> 3; + + uint32x4_t in1 = vldrwq_gather_shifted_offset_u32(src, bitRevOff1); + uint32x4_t in2 = vldrwq_gather_shifted_offset_u32(src, bitRevOff2); + + vstrwq_scatter_shifted_offset_u32(src, bitRevOff1, in2); + vstrwq_scatter_shifted_offset_u32(src, bitRevOff2, in1); + + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + + + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = bitRevLen & 7; + if (blkCnt > 0U) { + mve_pred16_t p0 = vctp16q(blkCnt); + + bitRevTabOff = vldrhq_z_u16(pBitRevTab, p0); + + uint32x4_t bitRevOff1 = vmullbq_int_u16(bitRevTabOff, one); + uint32x4_t bitRevOff2 = vmulltq_int_u16(bitRevTabOff, one); + + bitRevOff1 = bitRevOff1 >> 3; + bitRevOff2 = bitRevOff2 >> 3; + + uint32x4_t in1 = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff1, p0); + uint32x4_t in2 = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff2, p0); + + vstrwq_scatter_shifted_offset_p_u32(src, bitRevOff1, in2, p0); + vstrwq_scatter_shifted_offset_p_u32(src, bitRevOff2, in1, p0); + } +} + +static void _arm_radix4_butterfly_q15_mve( + const arm_cfft_instance_q15 * S, + q15_t *pSrc, + uint32_t fftLen) +{ + q15x8_t vecTmp0, vecTmp1; + q15x8_t vecSum0, vecDiff0, vecSum1, vecDiff1; + q15x8_t vecA, vecB, vecC, vecD; + q15x8_t vecW; + uint32_t blkCnt; + uint32_t n1, n2; + uint32_t stage = 0; + int32_t iter = 1; + static const uint32_t strides[4] = { + (0 - 16) * sizeof(q15_t *), (4 - 16) * sizeof(q15_t *), + (8 - 16) * sizeof(q15_t *), (12 - 16) * sizeof(q15_t *) + }; + + /* + * Process first stages + * Each stage in middle stages provides two down scaling of the input + */ + n2 = fftLen; + n1 = n2; + n2 >>= 2u; + + for (int k = fftLen / 4u; k > 1; k >>= 2u) + { + for (int i = 0; i < iter; i++) + { + q15_t const *p_rearranged_twiddle_tab_stride2 = + &S->rearranged_twiddle_stride2[ + S->rearranged_twiddle_tab_stride2_arr[stage]]; + q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[ + S->rearranged_twiddle_tab_stride3_arr[stage]]; + q15_t const *p_rearranged_twiddle_tab_stride1 = + &S->rearranged_twiddle_stride1[ + S->rearranged_twiddle_tab_stride1_arr[stage]]; + q15_t const *pW1, *pW2, *pW3; + q15_t *inA = pSrc + CMPLX_DIM * i * n1; + q15_t *inB = inA + n2 * CMPLX_DIM; + q15_t *inC = inB + n2 * CMPLX_DIM; + q15_t *inD = inC + n2 * CMPLX_DIM; + + pW1 = p_rearranged_twiddle_tab_stride1; + pW2 = p_rearranged_twiddle_tab_stride2; + pW3 = p_rearranged_twiddle_tab_stride3; + + blkCnt = n2 / 4; + /* + * load 4 x q15 complex pair + */ + vecA = vldrhq_s16(inA); + vecC = vldrhq_s16(inC); + while (blkCnt > 0U) + { + vecB = vldrhq_s16(inB); + vecD = vldrhq_s16(inD); + + vecSum0 = vhaddq(vecA, vecC); + vecDiff0 = vhsubq(vecA, vecC); + + vecSum1 = vhaddq(vecB, vecD); + vecDiff1 = vhsubq(vecB, vecD); + /* + * [ 1 1 1 1 ] * [ A B C D ]' .* 1 + */ + vecTmp0 = vhaddq(vecSum0, vecSum1); + vst1q(inA, vecTmp0); + inA += 8; + /* + * [ 1 -1 1 -1 ] * [ A B C D ]' + */ + vecTmp0 = vhsubq(vecSum0, vecSum1); + /* + * [ 1 -1 1 -1 ] * [ A B C D ]'.* W2 + */ + vecW = vld1q(pW2); + pW2 += 8; + vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0); + + vst1q(inB, vecTmp1); + inB += 8; + /* + * [ 1 -i -1 +i ] * [ A B C D ]' + */ + vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1); + /* + * [ 1 -i -1 +i ] * [ A B C D ]'.* W1 + */ + vecW = vld1q(pW1); + pW1 += 8; + vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0); + vst1q(inC, vecTmp1); + inC += 8; + + /* + * [ 1 +i -1 -i ] * [ A B C D ]' + */ + vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1); + /* + * [ 1 +i -1 -i ] * [ A B C D ]'.* W3 + */ + vecW = vld1q(pW3); + pW3 += 8; + vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0); + vst1q(inD, vecTmp1); + inD += 8; + + vecA = vldrhq_s16(inA); + vecC = vldrhq_s16(inC); + + blkCnt--; + } + } + n1 = n2; + n2 >>= 2u; + iter = iter << 2; + stage++; + } + + /* + * start of Last stage process + */ + uint32x4_t vecScGathAddr = *(uint32x4_t *) strides; + vecScGathAddr = vecScGathAddr + (uint32_t) pSrc; + + /* + * load scheduling + */ + vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64); + vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8); + + blkCnt = (fftLen >> 4); + while (blkCnt > 0U) + { + vecSum0 = vhaddq(vecA, vecC); + vecDiff0 = vhsubq(vecA, vecC); + + vecB = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 4); + vecD = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 12); + + vecSum1 = vhaddq(vecB, vecD); + vecDiff1 = vhsubq(vecB, vecD); + /* + * pre-load for next iteration + */ + vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64); + vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8); + + vecTmp0 = vhaddq(vecSum0, vecSum1); + vstrwq_scatter_base_s32(vecScGathAddr, -64, (q15x8_t) vecTmp0); + + vecTmp0 = vhsubq(vecSum0, vecSum1); + vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (q15x8_t) vecTmp0); + + vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1); + vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (q15x8_t) vecTmp0); + + vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1); + vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (q15x8_t) vecTmp0); + + blkCnt--; + } + +} + +static void arm_cfft_radix4by2_q15_mve(const arm_cfft_instance_q15 *S, q15_t *pSrc, uint32_t fftLen) +{ + uint32_t n2; + q15_t *pIn0; + q15_t *pIn1; + const q15_t *pCoef = S->pTwiddle; + uint32_t blkCnt; + q15x8_t vecIn0, vecIn1, vecSum, vecDiff; + q15x8_t vecCmplxTmp, vecTw; + q15_t const *pCoefVec; + + n2 = fftLen >> 1; + + pIn0 = pSrc; + pIn1 = pSrc + fftLen; + pCoefVec = pCoef; + + blkCnt = n2 / 4; + + while (blkCnt > 0U) + { + vecIn0 = *(q15x8_t *) pIn0; + vecIn1 = *(q15x8_t *) pIn1; + + vecIn0 = vecIn0 >> 1; + vecIn1 = vecIn1 >> 1; + vecSum = vhaddq(vecIn0, vecIn1); + vst1q(pIn0, vecSum); + pIn0 += 8; + + vecTw = vld1q(pCoefVec); + pCoefVec += 8; + + vecDiff = vhsubq(vecIn0, vecIn1); + vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw); + vst1q(pIn1, vecCmplxTmp); + pIn1 += 8; + + blkCnt--; + } + + _arm_radix4_butterfly_q15_mve(S, pSrc, n2); + + _arm_radix4_butterfly_q15_mve(S, pSrc + fftLen, n2); + + + pIn0 = pSrc; + blkCnt = (fftLen << 1) >> 3; + while (blkCnt > 0U) + { + vecIn0 = *(q15x8_t *) pIn0; + vecIn0 = vecIn0 << 1; + vst1q(pIn0, vecIn0); + pIn0 += 8; + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = (fftLen << 1) & 7; + if (blkCnt > 0U) + { + mve_pred16_t p0 = vctp16q(blkCnt); + + vecIn0 = *(q15x8_t *) pIn0; + vecIn0 = vecIn0 << 1; + vstrhq_p(pIn0, vecIn0, p0); + } +} + +static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S,q15_t *pSrc, uint32_t fftLen) +{ + q15x8_t vecTmp0, vecTmp1; + q15x8_t vecSum0, vecDiff0, vecSum1, vecDiff1; + q15x8_t vecA, vecB, vecC, vecD; + q15x8_t vecW; + uint32_t blkCnt; + uint32_t n1, n2; + uint32_t stage = 0; + int32_t iter = 1; + static const uint32_t strides[4] = { + (0 - 16) * sizeof(q15_t *), (4 - 16) * sizeof(q15_t *), + (8 - 16) * sizeof(q15_t *), (12 - 16) * sizeof(q15_t *) + }; + + + /* + * Process first stages + * Each stage in middle stages provides two down scaling of the input + */ + n2 = fftLen; + n1 = n2; + n2 >>= 2u; + + for (int k = fftLen / 4u; k > 1; k >>= 2u) + { + for (int i = 0; i < iter; i++) + { + q15_t const *p_rearranged_twiddle_tab_stride2 = + &S->rearranged_twiddle_stride2[ + S->rearranged_twiddle_tab_stride2_arr[stage]]; + q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[ + S->rearranged_twiddle_tab_stride3_arr[stage]]; + q15_t const *p_rearranged_twiddle_tab_stride1 = + &S->rearranged_twiddle_stride1[ + S->rearranged_twiddle_tab_stride1_arr[stage]]; + q15_t const *pW1, *pW2, *pW3; + q15_t *inA = pSrc + CMPLX_DIM * i * n1; + q15_t *inB = inA + n2 * CMPLX_DIM; + q15_t *inC = inB + n2 * CMPLX_DIM; + q15_t *inD = inC + n2 * CMPLX_DIM; + + pW1 = p_rearranged_twiddle_tab_stride1; + pW2 = p_rearranged_twiddle_tab_stride2; + pW3 = p_rearranged_twiddle_tab_stride3; + + blkCnt = n2 / 4; + /* + * load 4 x q15 complex pair + */ + vecA = vldrhq_s16(inA); + vecC = vldrhq_s16(inC); + while (blkCnt > 0U) + { + vecB = vldrhq_s16(inB); + vecD = vldrhq_s16(inD); + + vecSum0 = vhaddq(vecA, vecC); + vecDiff0 = vhsubq(vecA, vecC); + + vecSum1 = vhaddq(vecB, vecD); + vecDiff1 = vhsubq(vecB, vecD); + /* + * [ 1 1 1 1 ] * [ A B C D ]' .* 1 + */ + vecTmp0 = vhaddq(vecSum0, vecSum1); + vst1q(inA, vecTmp0); + inA += 8; + /* + * [ 1 -1 1 -1 ] * [ A B C D ]' + */ + vecTmp0 = vhsubq(vecSum0, vecSum1); + /* + * [ 1 -1 1 -1 ] * [ A B C D ]'.* W2 + */ + vecW = vld1q(pW2); + pW2 += 8; + vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW); + + vst1q(inB, vecTmp1); + inB += 8; + /* + * [ 1 -i -1 +i ] * [ A B C D ]' + */ + vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1); + /* + * [ 1 -i -1 +i ] * [ A B C D ]'.* W1 + */ + vecW = vld1q(pW1); + pW1 += 8; + vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW); + vst1q(inC, vecTmp1); + inC += 8; + /* + * [ 1 +i -1 -i ] * [ A B C D ]' + */ + vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1); + /* + * [ 1 +i -1 -i ] * [ A B C D ]'.* W3 + */ + vecW = vld1q(pW3); + pW3 += 8; + vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW); + vst1q(inD, vecTmp1); + inD += 8; + + vecA = vldrhq_s16(inA); + vecC = vldrhq_s16(inC); + + blkCnt--; + } + } + n1 = n2; + n2 >>= 2u; + iter = iter << 2; + stage++; + } + + /* + * start of Last stage process + */ + uint32x4_t vecScGathAddr = *(uint32x4_t *) strides; + vecScGathAddr = vecScGathAddr + (uint32_t) pSrc; + + /* + * load scheduling + */ + vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64); + vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8); + + blkCnt = (fftLen >> 4); + while (blkCnt > 0U) + { + vecSum0 = vhaddq(vecA, vecC); + vecDiff0 = vhsubq(vecA, vecC); + + vecB = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 4); + vecD = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 12); + + vecSum1 = vhaddq(vecB, vecD); + vecDiff1 = vhsubq(vecB, vecD); + /* + * pre-load for next iteration + */ + vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64); + vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8); + + vecTmp0 = vhaddq(vecSum0, vecSum1); + vstrwq_scatter_base_s32(vecScGathAddr, -64, (q15x8_t) vecTmp0); + + vecTmp0 = vhsubq(vecSum0, vecSum1); + vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (q15x8_t) vecTmp0); + + vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1); + vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (q15x8_t) vecTmp0); + + vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1); + vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (q15x8_t) vecTmp0); + + blkCnt--; + } +} + +static void arm_cfft_radix4by2_inverse_q15_mve(const arm_cfft_instance_q15 *S, q15_t *pSrc, uint32_t fftLen) +{ + uint32_t n2; + q15_t *pIn0; + q15_t *pIn1; + const q15_t *pCoef = S->pTwiddle; + + uint32_t blkCnt; + q15x8_t vecIn0, vecIn1, vecSum, vecDiff; + q15x8_t vecCmplxTmp, vecTw; + q15_t const *pCoefVec; + + n2 = fftLen >> 1; + + pIn0 = pSrc; + pIn1 = pSrc + fftLen; + pCoefVec = pCoef; + + blkCnt = n2 / 4; + + while (blkCnt > 0U) + { + vecIn0 = *(q15x8_t *) pIn0; + vecIn1 = *(q15x8_t *) pIn1; + + vecIn0 = vecIn0 >> 1; + vecIn1 = vecIn1 >> 1; + vecSum = vhaddq(vecIn0, vecIn1); + vst1q(pIn0, vecSum); + pIn0 += 8; + + vecTw = vld1q(pCoefVec); + pCoefVec += 8; + + vecDiff = vhsubq(vecIn0, vecIn1); + vecCmplxTmp = vqrdmlsdhq(vuninitializedq_s16() , vecDiff, vecTw); + vecCmplxTmp = vqrdmladhxq(vecCmplxTmp, vecDiff, vecTw); + vst1q(pIn1, vecCmplxTmp); + pIn1 += 8; + + blkCnt--; + } + + + _arm_radix4_butterfly_inverse_q15_mve(S, pSrc, n2); + + _arm_radix4_butterfly_inverse_q15_mve(S, pSrc + fftLen, n2); + + pIn0 = pSrc; + blkCnt = (fftLen << 1) >> 3; + while (blkCnt > 0U) + { + vecIn0 = *(q15x8_t *) pIn0; + vecIn0 = vecIn0 << 1; + vst1q(pIn0, vecIn0); + pIn0 += 8; + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = (fftLen << 1) & 7; + while (blkCnt > 0U) + { + mve_pred16_t p0 = vctp16q(blkCnt); + + vecIn0 = *(q15x8_t *) pIn0; + vecIn0 = vecIn0 << 1; + vstrhq_p(pIn0, vecIn0, p0); + } +} + +/** + @ingroup groupTransforms + */ + +/** + @addtogroup ComplexFFT + @{ + */ + +/** + @brief Processing function for Q15 complex FFT. + @param[in] S points to an instance of Q15 CFFT structure + @param[in,out] p1 points to the complex data buffer of size 2*fftLen. Processing occurs in-place + @param[in] ifftFlag flag that selects transform direction + - value = 0: forward transform + - value = 1: inverse transform + @param[in] bitReverseFlag flag that enables / disables bit reversal of output + - value = 0: disables bit reversal of output + - value = 1: enables bit reversal of output + @return none + */ +void arm_cfft_q15( + const arm_cfft_instance_q15 * S, + q15_t * pSrc, + uint8_t ifftFlag, + uint8_t bitReverseFlag) +{ + uint32_t fftLen = S->fftLen; + + if (ifftFlag == 1U) { + + switch (fftLen) { + case 16: + case 64: + case 256: + case 1024: + case 4096: + _arm_radix4_butterfly_inverse_q15_mve(S, pSrc, fftLen); + break; + + case 32: + case 128: + case 512: + case 2048: + arm_cfft_radix4by2_inverse_q15_mve(S, pSrc, fftLen); + break; + } + } else { + switch (fftLen) { + case 16: + case 64: + case 256: + case 1024: + case 4096: + _arm_radix4_butterfly_q15_mve(S, pSrc, fftLen); + break; + + case 32: + case 128: + case 512: + case 2048: + arm_cfft_radix4by2_q15_mve(S, pSrc, fftLen); + break; + } + } + + + if (bitReverseFlag) + { + + arm_bitreversal_16_inpl_mve((uint16_t*)pSrc, S->bitRevLength, S->pBitRevTable); + + } +} + +#else + extern void arm_radix4_butterfly_q15( q15_t * pSrc, uint32_t fftLen, @@ -330,3 +945,5 @@ void arm_cfft_radix4by2_inverse_q15( pSrc[4 * i + 3] = p3; } } + +#endif /* defined(ARM_MATH_MVEI) */ \ No newline at end of file diff --git a/Source/TransformFunctions/arm_rfft_init_q15.c b/Source/TransformFunctions/arm_rfft_init_q15.c index 6f90771a..11e175cf 100644 --- a/Source/TransformFunctions/arm_rfft_init_q15.c +++ b/Source/TransformFunctions/arm_rfft_init_q15.c @@ -92,55 +92,136 @@ arm_status arm_rfft_init_q15( #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096)) case 8192U: S->twidCoefRModifier = 1U; - S->pCfft = &arm_cfft_sR_q15_len4096; + + #if defined(ARM_MATH_MVEI) + status=arm_cfft_init_q15(&(S->cfftInst),4096); + if (status != ARM_MATH_SUCCESS) + { + return(status); + } + #else + S->pCfft = &arm_cfft_sR_q15_len4096; + #endif break; #endif #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048)) case 4096U: S->twidCoefRModifier = 2U; - S->pCfft = &arm_cfft_sR_q15_len2048; + + #if defined(ARM_MATH_MVEI) + status=arm_cfft_init_q15(&(S->cfftInst),2048); + if (status != ARM_MATH_SUCCESS) + { + return(status); + } + #else + S->pCfft = &arm_cfft_sR_q15_len2048; + #endif break; #endif #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024)) case 2048U: S->twidCoefRModifier = 4U; - S->pCfft = &arm_cfft_sR_q15_len1024; + + #if defined(ARM_MATH_MVEI) + status=arm_cfft_init_q15(&(S->cfftInst),1024); + if (status != ARM_MATH_SUCCESS) + { + return(status); + } + #else + S->pCfft = &arm_cfft_sR_q15_len1024; + #endif break; #endif #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_512) && defined(ARM_TABLE_BITREVIDX_FXT_512)) case 1024U: S->twidCoefRModifier = 8U; - S->pCfft = &arm_cfft_sR_q15_len512; + + #if defined(ARM_MATH_MVEI) + status=arm_cfft_init_q15(&(S->cfftInst),512); + if (status != ARM_MATH_SUCCESS) + { + return(status); + } + #else + S->pCfft = &arm_cfft_sR_q15_len512; + #endif break; #endif #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_256) && defined(ARM_TABLE_BITREVIDX_FXT_256)) case 512U: S->twidCoefRModifier = 16U; - S->pCfft = &arm_cfft_sR_q15_len256; + + #if defined(ARM_MATH_MVEI) + status=arm_cfft_init_q15(&(S->cfftInst),256); + if (status != ARM_MATH_SUCCESS) + { + return(status); + } + #else + S->pCfft = &arm_cfft_sR_q15_len256; + #endif break; #endif #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_128) && defined(ARM_TABLE_BITREVIDX_FXT_128)) case 256U: S->twidCoefRModifier = 32U; - S->pCfft = &arm_cfft_sR_q15_len128; + + #if defined(ARM_MATH_MVEI) + status=arm_cfft_init_q15(&(S->cfftInst),128); + if (status != ARM_MATH_SUCCESS) + { + return(status); + } + #else + S->pCfft = &arm_cfft_sR_q15_len128; + #endif break; #endif #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_64) && defined(ARM_TABLE_BITREVIDX_FXT_64)) case 128U: S->twidCoefRModifier = 64U; - S->pCfft = &arm_cfft_sR_q15_len64; + + #if defined(ARM_MATH_MVEI) + status=arm_cfft_init_q15(&(S->cfftInst),64); + if (status != ARM_MATH_SUCCESS) + { + return(status); + } + #else + S->pCfft = &arm_cfft_sR_q15_len64; + #endif break; #endif #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_32) && defined(ARM_TABLE_BITREVIDX_FXT_32)) case 64U: S->twidCoefRModifier = 128U; - S->pCfft = &arm_cfft_sR_q15_len32; + + #if defined(ARM_MATH_MVEI) + status=arm_cfft_init_q15(&(S->cfftInst),32); + if (status != ARM_MATH_SUCCESS) + { + return(status); + } + #else + S->pCfft = &arm_cfft_sR_q15_len32; + #endif break; #endif #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_16) && defined(ARM_TABLE_BITREVIDX_FXT_16)) case 32U: S->twidCoefRModifier = 256U; - S->pCfft = &arm_cfft_sR_q15_len16; + + #if defined(ARM_MATH_MVEI) + status=arm_cfft_init_q15(&(S->cfftInst),16); + if (status != ARM_MATH_SUCCESS) + { + return(status); + } + #else + S->pCfft = &arm_cfft_sR_q15_len16; + #endif break; #endif default: diff --git a/Source/TransformFunctions/arm_rfft_q15.c b/Source/TransformFunctions/arm_rfft_q15.c index be34ca01..6013e8cd 100644 --- a/Source/TransformFunctions/arm_rfft_q15.c +++ b/Source/TransformFunctions/arm_rfft_q15.c @@ -75,7 +75,11 @@ void arm_rfft_q15( q15_t * pSrc, q15_t * pDst) { +#if defined(ARM_MATH_MVEI) + const arm_cfft_instance_q15 *S_CFFT = &(S->cfftInst); +#else const arm_cfft_instance_q15 *S_CFFT = S->pCfft; +#endif uint32_t L2 = S->fftLenReal >> 1U; uint32_t i; diff --git a/Testing/Include/Tests/TransformCQ15.h b/Testing/Include/Tests/TransformCQ15.h index 6989e758..e1df0bfe 100755 --- a/Testing/Include/Tests/TransformCQ15.h +++ b/Testing/Include/Tests/TransformCQ15.h @@ -15,7 +15,7 @@ class TransformCQ15:public Client::Suite Client::RefPattern ref; - const arm_cfft_instance_q15 *instCfftQ15; + arm_cfft_instance_q15 instCfftQ15; int ifft; @@ -27,5 +27,7 @@ class TransformCQ15:public Client::Suite */ int scaling; + + arm_status status; }; diff --git a/Testing/Source/Tests/TransformCQ15.cpp b/Testing/Source/Tests/TransformCQ15.cpp index 666ba284..3ebb2b35 100755 --- a/Testing/Source/Tests/TransformCQ15.cpp +++ b/Testing/Source/Tests/TransformCQ15.cpp @@ -13,10 +13,10 @@ q15_t *outfftp = outputfft.ptr(); - memcpy(outfftp,inp,sizeof(q15_t)*input.nbSamples()); + memcpy(outfftp,inp,sizeof(q15_t)*input.nbSamples()); - arm_cfft_q15( - this->instCfftQ15, + arm_cfft_q15( + &(this->instCfftQ15), outfftp, this->ifft, 1); @@ -37,7 +37,7 @@ memcpy(outfftp,inp,sizeof(q15_t)*input.nbSamples()); arm_cfft_q15( - this->instCfftQ15, + &(this->instCfftQ15), outfftp, this->ifft, 1); @@ -68,7 +68,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_NOISY_16_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_NOISY_16_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len16; + status=arm_cfft_init_q15(&instCfftQ15,16); this->ifft=0; @@ -79,7 +79,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_16_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_16_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len16; + status=arm_cfft_init_q15(&instCfftQ15,16); this->ifft=1; this->scaling = 4; @@ -91,7 +91,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_NOISY_32_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_NOISY_32_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len32; + status=arm_cfft_init_q15(&instCfftQ15,32); this->ifft=0; @@ -102,7 +102,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_32_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_32_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len32; + status=arm_cfft_init_q15(&instCfftQ15,32); this->ifft=1; this->scaling = 5; @@ -114,7 +114,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_NOISY_64_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_NOISY_64_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len64; + status=arm_cfft_init_q15(&instCfftQ15,64); this->ifft=0; @@ -126,7 +126,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_64_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_64_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len64; + status=arm_cfft_init_q15(&instCfftQ15,64); this->ifft=1; this->scaling=6; @@ -138,7 +138,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_NOISY_128_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_NOISY_128_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len128; + status=arm_cfft_init_q15(&instCfftQ15,128); this->ifft=0; @@ -149,7 +149,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_128_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_128_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len128; + status=arm_cfft_init_q15(&instCfftQ15,128); this->ifft=1; this->scaling=7; @@ -161,7 +161,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_NOISY_256_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_NOISY_256_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len256; + status=arm_cfft_init_q15(&instCfftQ15,256); this->ifft=0; @@ -172,7 +172,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_256_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_256_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len256; + status=arm_cfft_init_q15(&instCfftQ15,256); this->ifft=1; this->scaling=8; @@ -184,7 +184,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_NOISY_512_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_NOISY_512_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len512; + status=arm_cfft_init_q15(&instCfftQ15,512); this->ifft=0; @@ -196,7 +196,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_512_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_512_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len512; + status=arm_cfft_init_q15(&instCfftQ15,512); this->ifft=1; this->scaling=9; @@ -209,7 +209,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_NOISY_1024_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_NOISY_1024_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len1024; + status=arm_cfft_init_q15(&instCfftQ15,1024); this->ifft=0; @@ -220,7 +220,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_1024_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_1024_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len1024; + status=arm_cfft_init_q15(&instCfftQ15,1024); this->ifft=1; this->scaling=10; @@ -232,7 +232,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_NOISY_2048_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_NOISY_2048_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len2048; + status=arm_cfft_init_q15(&instCfftQ15,2048); this->ifft=0; @@ -243,7 +243,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_2048_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_2048_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len2048; + status=arm_cfft_init_q15(&instCfftQ15,2048); this->ifft=1; this->scaling=11; @@ -255,7 +255,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_NOISY_4096_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_NOISY_4096_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len4096; + status=arm_cfft_init_q15(&instCfftQ15,4096); this->ifft=0; @@ -266,7 +266,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_4096_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_4096_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len4096; + status=arm_cfft_init_q15(&instCfftQ15,4096); this->ifft=1; this->scaling=12; @@ -280,7 +280,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_STEP_16_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_STEP_16_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len16; + status=arm_cfft_init_q15(&instCfftQ15,16); this->ifft=0; @@ -291,7 +291,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_STEP_16_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_STEP_16_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len16; + status=arm_cfft_init_q15(&instCfftQ15,16); this->ifft=1; this->scaling=4; @@ -303,7 +303,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_STEP_32_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_STEP_32_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len32; + status=arm_cfft_init_q15(&instCfftQ15,32); this->ifft=0; @@ -314,7 +314,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_STEP_32_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_STEP_32_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len32; + status=arm_cfft_init_q15(&instCfftQ15,32); this->ifft=1; this->scaling=5; @@ -326,7 +326,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_STEP_64_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_STEP_64_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len64; + status=arm_cfft_init_q15(&instCfftQ15,64); this->ifft=0; @@ -337,7 +337,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_STEP_64_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_STEP_64_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len64; + status=arm_cfft_init_q15(&instCfftQ15,64); this->ifft=1; this->scaling=6; @@ -349,7 +349,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_STEP_128_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_STEP_128_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len128; + status=arm_cfft_init_q15(&instCfftQ15,128); this->ifft=0; @@ -360,7 +360,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_STEP_128_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_STEP_128_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len128; + status=arm_cfft_init_q15(&instCfftQ15,128); this->ifft=1; this->scaling=7; @@ -372,7 +372,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_STEP_256_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_STEP_256_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len256; + status=arm_cfft_init_q15(&instCfftQ15,256); this->ifft=0; @@ -383,7 +383,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_STEP_256_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_STEP_256_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len256; + status=arm_cfft_init_q15(&instCfftQ15,256); this->ifft=1; this->scaling=8; @@ -395,7 +395,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_STEP_512_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_STEP_512_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len512; + status=arm_cfft_init_q15(&instCfftQ15,512); this->ifft=0; @@ -406,7 +406,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_STEP_512_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_STEP_512_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len512; + status=arm_cfft_init_q15(&instCfftQ15,512); this->ifft=1; this->scaling=9; @@ -418,7 +418,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_STEP_1024_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_STEP_1024_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len1024; + status=arm_cfft_init_q15(&instCfftQ15,1024); this->ifft=0; @@ -429,7 +429,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_STEP_1024_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_STEP_1024_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len1024; + status=arm_cfft_init_q15(&instCfftQ15,1024); this->ifft=1; this->scaling=10; @@ -441,7 +441,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_STEP_2048_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_STEP_2048_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len2048; + status=arm_cfft_init_q15(&instCfftQ15,2048); this->ifft=0; @@ -452,7 +452,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_STEP_2048_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_STEP_2048_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len2048; + status=arm_cfft_init_q15(&instCfftQ15,2048); this->ifft=1; this->scaling=11; @@ -464,7 +464,7 @@ input.reload(TransformCQ15::INPUTS_CFFT_STEP_4096_Q15_ID,mgr); ref.reload( TransformCQ15::REF_CFFT_STEP_4096_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len4096; + status=arm_cfft_init_q15(&instCfftQ15,4096); this->ifft=0; @@ -475,7 +475,7 @@ input.reload(TransformCQ15::INPUTS_CIFFT_STEP_4096_Q15_ID,mgr); ref.reload( TransformCQ15::INPUTS_CFFT_STEP_4096_Q15_ID,mgr); - instCfftQ15 = &arm_cfft_sR_q15_len4096; + status=arm_cfft_init_q15(&instCfftQ15,4096); this->ifft=1; this->scaling=12;