CMSIS-DSP: Added MVE code for arm_cfft_q15

pull/19/head
Christophe Favergeon 6 years ago
parent fa38aed181
commit b6d9bdd4ef

@ -3113,8 +3113,20 @@ void arm_mat_init_f32(
const q15_t *pTwiddle; /**< points to the Twiddle factor table. */
const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
uint16_t bitRevLength; /**< bit reversal table length. */
#if defined(ARM_MATH_MVEI)
const uint32_t *rearranged_twiddle_tab_stride1_arr; /**< Per stage reordered twiddle pointer (offset 1) */ \
const uint32_t *rearranged_twiddle_tab_stride2_arr; /**< Per stage reordered twiddle pointer (offset 2) */ \
const uint32_t *rearranged_twiddle_tab_stride3_arr; /**< Per stage reordered twiddle pointer (offset 3) */ \
const q15_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */ \
const q15_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */ \
const q15_t *rearranged_twiddle_stride3;
#endif
} arm_cfft_instance_q15;
arm_status arm_cfft_init_q15(
arm_cfft_instance_q15 * S,
uint16_t fftLen);
void arm_cfft_q15(
const arm_cfft_instance_q15 * S,
q15_t * p1,
@ -3209,7 +3221,11 @@ void arm_cfft_q31(
uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
const q15_t *pTwiddleAReal; /**< points to the real twiddle factor table. */
const q15_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */
#if defined(ARM_MATH_MVEI)
arm_cfft_instance_q15 cfftInst;
#else
const arm_cfft_instance_q15 *pCfft; /**< points to the complex FFT instance. */
#endif
} arm_rfft_instance_q15;
arm_status arm_rfft_init_q15(

@ -115,76 +115,3 @@ arm_status arm_cfft_init_f64(
}
arm_status arm_cfft_init_q15(
arm_cfft_instance_q15 * S,
uint16_t fftLen)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (float32_t *)twiddleCoef_4096;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen) {
/* Initializations of structure parameters for 4096 point FFT */
case 4096U:
/* Initialise the bit reversal table modifier */
FFTFXTINIT(q15,4096);
break;
/* Initializations of structure parameters for 2048 point FFT */
case 2048U:
/* Initialise the bit reversal table modifier */
FFTFXTINIT(q15,2048);
break;
/* Initializations of structure parameters for 1024 point FFT */
case 1024U:
/* Initialise the bit reversal table modifier */
FFTFXTINIT(q15,1024);
break;
/* Initializations of structure parameters for 512 point FFT */
case 512U:
/* Initialise the bit reversal table modifier */
FFTFXTINIT(q15,512);
break;
case 256U:
FFTFXTINIT(q15,256);
break;
case 128U:
FFTFXTINIT(q15,128);
break;
case 64U:
FFTFXTINIT(q15,64);
break;
case 32U:
FFTFXTINIT(q15,32);
break;
case 16U:
/* Initializations of structure parameters for 16 point FFT */
FFTFXTINIT(q15,16);
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}

@ -229,7 +229,6 @@ const arm_cfft_instance_q31 arm_cfft_sR_q31_len4096 = {
};
#endif
#endif /* !defined(ARM_MATH_MVEI) */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_16) && defined(ARM_TABLE_BITREVIDX_FXT_16))
const arm_cfft_instance_q15 arm_cfft_sR_q15_len16 = {
@ -285,6 +284,8 @@ const arm_cfft_instance_q15 arm_cfft_sR_q15_len4096 = {
};
#endif
#endif /* !defined(ARM_MATH_MVEI) */
/* Structure for real-value inputs */
/* Double precision strucs */
@ -545,7 +546,6 @@ const arm_rfft_instance_q31 arm_rfft_sR_q31_len8192 = {
};
#endif
#endif /* !defined(ARM_MATH_MVEI) */
/* q15_t */
@ -657,4 +657,7 @@ const arm_rfft_instance_q15 arm_rfft_sR_q15_len8192 = {
};
#endif
#endif /* !defined(ARM_MATH_MVEI) */
#endif

@ -38,6 +38,7 @@ if (NOT CONFIGTABLE OR ALLFFT OR CFFT_Q15_16 OR CFFT_Q15_32 OR CFFT_Q15_64 OR CF
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q15.c)
endif()
if (NOT CONFIGTABLE OR ALLFFT OR CFFT_Q31_16 OR CFFT_Q31_32 OR CFFT_Q31_64 OR CFFT_Q31_128 OR CFFT_Q31_256 OR CFFT_Q31_512
@ -82,6 +83,7 @@ target_sources(CMSISDSPTransform PRIVATE arm_dct4_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_rfft_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c)
endif()
@ -117,6 +119,7 @@ if (NOT CONFIGTABLE OR ALLFFT OR RFFT_Q15_32 OR RFFT_Q15_64 OR RFFT_Q15_128 OR R
target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_rfft_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q15.c)
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c)
endif()

@ -59,3 +59,5 @@
#include "arm_rfft_q15.c"
#include "arm_rfft_q31.c"
#include "arm_cfft_init_f32.c"
#include "arm_cfft_init_q31.c"
#include "arm_cfft_init_q15.c"

@ -0,0 +1,356 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_init_q15.c
* Description: Initialization function for cfft q15 instance
*
* $Date: 07. January 2020
* $Revision: V1.7.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define FFTINIT(EXT,SIZE) \
S->bitRevLength = arm_cfft_sR_##EXT##_len##SIZE.bitRevLength; \
S->pBitRevTable = arm_cfft_sR_##EXT##_len##SIZE.pBitRevTable; \
S->pTwiddle = arm_cfft_sR_##EXT##_len##SIZE.pTwiddle;
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Initialization function for the cfft q15 function
@param[in,out] S points to an instance of the floating-point CFFT structure
@param[in] fftLen fft length (number of complex samples)
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
@par Use of this function is mandatory only for the MVE version of the FFT.
Other versions can still initialize directly the data structure using
variables declared in arm_const_structs.h
*/
#include "arm_math.h"
#include "arm_common_tables.h"
#include "arm_const_structs.h"
#if defined(ARM_MATH_MVEI)
#include "arm_vec_fft.h"
#include "arm_mve_tables.h"
arm_status arm_cfft_radix4by2_rearrange_twiddles_q15(arm_cfft_instance_q15 *S, int twidCoefModifier)
{
switch (S->fftLen >> (twidCoefModifier - 1)) {
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096)
case 4096U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_4096_q15;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_4096_q15;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_4096_q15;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_4096_q15;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_4096_q15;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_4096_q15;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_1024) || defined(ARM_TABLE_BITREVIDX_FXT_2048)
case 1024U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_1024_q15;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_1024_q15;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_1024_q15;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_1024_q15;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_1024_q15;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_1024_q15;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_256) || defined(ARM_TABLE_BITREVIDX_FXT_512)
case 256U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_256_q15;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_256_q15;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_256_q15;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_256_q15;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_256_q15;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_256_q15;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_64) || defined(ARM_TABLE_BITREVIDX_FXT_128)
case 64U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_64_q15;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_64_q15;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_64_q15;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_64_q15;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_64_q15;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_64_q15;
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_16) || defined(ARM_TABLE_BITREVIDX_FXT_32)
case 16U:
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_16_q15;
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_16_q15;
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_16_q15;
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_16_q15;
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_16_q15;
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_16_q15;
break;
#endif
default:
return(ARM_MATH_ARGUMENT_ERROR);
break;
/* invalid sizes already filtered */
}
return(ARM_MATH_SUCCESS);
}
arm_status arm_cfft_init_q15(
arm_cfft_instance_q15 * S,
uint16_t fftLen)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = NULL;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen) {
/* Initializations of structure parameters for 4096 point FFT */
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096)
case 4096U:
/* Initialise the bit reversal table modifier */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_4096;
S->pTwiddle = (q15_t *)twiddleCoef_4096_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_2048)
/* Initializations of structure parameters for 2048 point FFT */
case 2048U:
/* Initialise the bit reversal table modifier */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_2048;
S->pTwiddle = (q15_t *)twiddleCoef_2048_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 2);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_1024)
/* Initializations of structure parameters for 1024 point FFT */
case 1024U:
/* Initialise the bit reversal table modifier */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_1024;
S->pTwiddle = (q15_t *)twiddleCoef_1024_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_512)
/* Initializations of structure parameters for 512 point FFT */
case 512U:
/* Initialise the bit reversal table modifier */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_512;
S->pTwiddle = (q15_t *)twiddleCoef_512_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 2);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_256)
case 256U:
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_256;
S->pTwiddle = (q15_t *)twiddleCoef_256_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_128)
case 128U:
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_128;
S->pTwiddle = (q15_t *)twiddleCoef_128_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 2);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_64)
case 64U:
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_64;
S->pTwiddle = (q15_t *)twiddleCoef_64_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_32)
case 32U:
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_32;
S->pTwiddle = (q15_t *)twiddleCoef_32_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 2);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_16)
case 16U:
/* Initializations of structure parameters for 16 point FFT */
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH;
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_16;
S->pTwiddle = (q15_t *)twiddleCoef_16_q15;
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1);
break;
#endif
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
#else
arm_status arm_cfft_init_q15(
arm_cfft_instance_q15 * S,
uint16_t fftLen)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = NULL;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen) {
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) && defined(ARM_TABLE_BITREVIDX_FLT_4096))
/* Initializations of structure parameters for 4096 point FFT */
case 4096U:
/* Initialise the bit reversal table modifier */
FFTINIT(q15,4096);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048))
/* Initializations of structure parameters for 2048 point FFT */
case 2048U:
/* Initialise the bit reversal table modifier */
FFTINIT(q15,2048);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024))
/* Initializations of structure parameters for 1024 point FFT */
case 1024U:
/* Initialise the bit reversal table modifier */
FFTINIT(q15,1024);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_512) && defined(ARM_TABLE_BITREVIDX_FLT_512))
/* Initializations of structure parameters for 512 point FFT */
case 512U:
/* Initialise the bit reversal table modifier */
FFTINIT(q15,512);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_256) && defined(ARM_TABLE_BITREVIDX_FLT_256))
case 256U:
FFTINIT(q15,256);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_128) && defined(ARM_TABLE_BITREVIDX_FLT_128))
case 128U:
FFTINIT(q15,128);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_64) && defined(ARM_TABLE_BITREVIDX_FLT_64))
case 64U:
FFTINIT(q15,64);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_32) && defined(ARM_TABLE_BITREVIDX_FLT_32))
case 32U:
FFTINIT(q15,32);
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_16) && defined(ARM_TABLE_BITREVIDX_FLT_16))
case 16U:
/* Initializations of structure parameters for 16 point FFT */
FFTINIT(q15,16);
break;
#endif
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of ComplexFFT group
*/

@ -28,6 +28,621 @@
#include "arm_math.h"
#if defined(ARM_MATH_MVEI)
#include "arm_vec_fft.h"
static void arm_bitreversal_16_inpl_mve(
uint16_t *pSrc,
const uint16_t bitRevLen,
const uint16_t *pBitRevTab)
{
uint32_t *src = (uint32_t *)pSrc;
uint32_t blkCnt; /* loop counters */
uint32x4_t bitRevTabOff;
uint16x8_t one = vdupq_n_u16(1);
blkCnt = (bitRevLen / 2) / 4;
while (blkCnt > 0U) {
bitRevTabOff = vldrhq_u16(pBitRevTab);
pBitRevTab += 8;
uint32x4_t bitRevOff1 = vmullbq_int_u16(bitRevTabOff, one);
uint32x4_t bitRevOff2 = vmulltq_int_u16(bitRevTabOff, one);
bitRevOff1 = bitRevOff1 >> 3;
bitRevOff2 = bitRevOff2 >> 3;
uint32x4_t in1 = vldrwq_gather_shifted_offset_u32(src, bitRevOff1);
uint32x4_t in2 = vldrwq_gather_shifted_offset_u32(src, bitRevOff2);
vstrwq_scatter_shifted_offset_u32(src, bitRevOff1, in2);
vstrwq_scatter_shifted_offset_u32(src, bitRevOff2, in1);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = bitRevLen & 7;
if (blkCnt > 0U) {
mve_pred16_t p0 = vctp16q(blkCnt);
bitRevTabOff = vldrhq_z_u16(pBitRevTab, p0);
uint32x4_t bitRevOff1 = vmullbq_int_u16(bitRevTabOff, one);
uint32x4_t bitRevOff2 = vmulltq_int_u16(bitRevTabOff, one);
bitRevOff1 = bitRevOff1 >> 3;
bitRevOff2 = bitRevOff2 >> 3;
uint32x4_t in1 = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff1, p0);
uint32x4_t in2 = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff2, p0);
vstrwq_scatter_shifted_offset_p_u32(src, bitRevOff1, in2, p0);
vstrwq_scatter_shifted_offset_p_u32(src, bitRevOff2, in1, p0);
}
}
static void _arm_radix4_butterfly_q15_mve(
const arm_cfft_instance_q15 * S,
q15_t *pSrc,
uint32_t fftLen)
{
q15x8_t vecTmp0, vecTmp1;
q15x8_t vecSum0, vecDiff0, vecSum1, vecDiff1;
q15x8_t vecA, vecB, vecC, vecD;
q15x8_t vecW;
uint32_t blkCnt;
uint32_t n1, n2;
uint32_t stage = 0;
int32_t iter = 1;
static const uint32_t strides[4] = {
(0 - 16) * sizeof(q15_t *), (4 - 16) * sizeof(q15_t *),
(8 - 16) * sizeof(q15_t *), (12 - 16) * sizeof(q15_t *)
};
/*
* Process first stages
* Each stage in middle stages provides two down scaling of the input
*/
n2 = fftLen;
n1 = n2;
n2 >>= 2u;
for (int k = fftLen / 4u; k > 1; k >>= 2u)
{
for (int i = 0; i < iter; i++)
{
q15_t const *p_rearranged_twiddle_tab_stride2 =
&S->rearranged_twiddle_stride2[
S->rearranged_twiddle_tab_stride2_arr[stage]];
q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
S->rearranged_twiddle_tab_stride3_arr[stage]];
q15_t const *p_rearranged_twiddle_tab_stride1 =
&S->rearranged_twiddle_stride1[
S->rearranged_twiddle_tab_stride1_arr[stage]];
q15_t const *pW1, *pW2, *pW3;
q15_t *inA = pSrc + CMPLX_DIM * i * n1;
q15_t *inB = inA + n2 * CMPLX_DIM;
q15_t *inC = inB + n2 * CMPLX_DIM;
q15_t *inD = inC + n2 * CMPLX_DIM;
pW1 = p_rearranged_twiddle_tab_stride1;
pW2 = p_rearranged_twiddle_tab_stride2;
pW3 = p_rearranged_twiddle_tab_stride3;
blkCnt = n2 / 4;
/*
* load 4 x q15 complex pair
*/
vecA = vldrhq_s16(inA);
vecC = vldrhq_s16(inC);
while (blkCnt > 0U)
{
vecB = vldrhq_s16(inB);
vecD = vldrhq_s16(inD);
vecSum0 = vhaddq(vecA, vecC);
vecDiff0 = vhsubq(vecA, vecC);
vecSum1 = vhaddq(vecB, vecD);
vecDiff1 = vhsubq(vecB, vecD);
/*
* [ 1 1 1 1 ] * [ A B C D ]' .* 1
*/
vecTmp0 = vhaddq(vecSum0, vecSum1);
vst1q(inA, vecTmp0);
inA += 8;
/*
* [ 1 -1 1 -1 ] * [ A B C D ]'
*/
vecTmp0 = vhsubq(vecSum0, vecSum1);
/*
* [ 1 -1 1 -1 ] * [ A B C D ]'.* W2
*/
vecW = vld1q(pW2);
pW2 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
vst1q(inB, vecTmp1);
inB += 8;
/*
* [ 1 -i -1 +i ] * [ A B C D ]'
*/
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
/*
* [ 1 -i -1 +i ] * [ A B C D ]'.* W1
*/
vecW = vld1q(pW1);
pW1 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
vst1q(inC, vecTmp1);
inC += 8;
/*
* [ 1 +i -1 -i ] * [ A B C D ]'
*/
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
/*
* [ 1 +i -1 -i ] * [ A B C D ]'.* W3
*/
vecW = vld1q(pW3);
pW3 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
vst1q(inD, vecTmp1);
inD += 8;
vecA = vldrhq_s16(inA);
vecC = vldrhq_s16(inC);
blkCnt--;
}
}
n1 = n2;
n2 >>= 2u;
iter = iter << 2;
stage++;
}
/*
* start of Last stage process
*/
uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
/*
* load scheduling
*/
vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8);
blkCnt = (fftLen >> 4);
while (blkCnt > 0U)
{
vecSum0 = vhaddq(vecA, vecC);
vecDiff0 = vhsubq(vecA, vecC);
vecB = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 4);
vecD = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 12);
vecSum1 = vhaddq(vecB, vecD);
vecDiff1 = vhsubq(vecB, vecD);
/*
* pre-load for next iteration
*/
vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8);
vecTmp0 = vhaddq(vecSum0, vecSum1);
vstrwq_scatter_base_s32(vecScGathAddr, -64, (q15x8_t) vecTmp0);
vecTmp0 = vhsubq(vecSum0, vecSum1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (q15x8_t) vecTmp0);
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (q15x8_t) vecTmp0);
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (q15x8_t) vecTmp0);
blkCnt--;
}
}
static void arm_cfft_radix4by2_q15_mve(const arm_cfft_instance_q15 *S, q15_t *pSrc, uint32_t fftLen)
{
uint32_t n2;
q15_t *pIn0;
q15_t *pIn1;
const q15_t *pCoef = S->pTwiddle;
uint32_t blkCnt;
q15x8_t vecIn0, vecIn1, vecSum, vecDiff;
q15x8_t vecCmplxTmp, vecTw;
q15_t const *pCoefVec;
n2 = fftLen >> 1;
pIn0 = pSrc;
pIn1 = pSrc + fftLen;
pCoefVec = pCoef;
blkCnt = n2 / 4;
while (blkCnt > 0U)
{
vecIn0 = *(q15x8_t *) pIn0;
vecIn1 = *(q15x8_t *) pIn1;
vecIn0 = vecIn0 >> 1;
vecIn1 = vecIn1 >> 1;
vecSum = vhaddq(vecIn0, vecIn1);
vst1q(pIn0, vecSum);
pIn0 += 8;
vecTw = vld1q(pCoefVec);
pCoefVec += 8;
vecDiff = vhsubq(vecIn0, vecIn1);
vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw);
vst1q(pIn1, vecCmplxTmp);
pIn1 += 8;
blkCnt--;
}
_arm_radix4_butterfly_q15_mve(S, pSrc, n2);
_arm_radix4_butterfly_q15_mve(S, pSrc + fftLen, n2);
pIn0 = pSrc;
blkCnt = (fftLen << 1) >> 3;
while (blkCnt > 0U)
{
vecIn0 = *(q15x8_t *) pIn0;
vecIn0 = vecIn0 << 1;
vst1q(pIn0, vecIn0);
pIn0 += 8;
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = (fftLen << 1) & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecIn0 = *(q15x8_t *) pIn0;
vecIn0 = vecIn0 << 1;
vstrhq_p(pIn0, vecIn0, p0);
}
}
static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S,q15_t *pSrc, uint32_t fftLen)
{
q15x8_t vecTmp0, vecTmp1;
q15x8_t vecSum0, vecDiff0, vecSum1, vecDiff1;
q15x8_t vecA, vecB, vecC, vecD;
q15x8_t vecW;
uint32_t blkCnt;
uint32_t n1, n2;
uint32_t stage = 0;
int32_t iter = 1;
static const uint32_t strides[4] = {
(0 - 16) * sizeof(q15_t *), (4 - 16) * sizeof(q15_t *),
(8 - 16) * sizeof(q15_t *), (12 - 16) * sizeof(q15_t *)
};
/*
* Process first stages
* Each stage in middle stages provides two down scaling of the input
*/
n2 = fftLen;
n1 = n2;
n2 >>= 2u;
for (int k = fftLen / 4u; k > 1; k >>= 2u)
{
for (int i = 0; i < iter; i++)
{
q15_t const *p_rearranged_twiddle_tab_stride2 =
&S->rearranged_twiddle_stride2[
S->rearranged_twiddle_tab_stride2_arr[stage]];
q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
S->rearranged_twiddle_tab_stride3_arr[stage]];
q15_t const *p_rearranged_twiddle_tab_stride1 =
&S->rearranged_twiddle_stride1[
S->rearranged_twiddle_tab_stride1_arr[stage]];
q15_t const *pW1, *pW2, *pW3;
q15_t *inA = pSrc + CMPLX_DIM * i * n1;
q15_t *inB = inA + n2 * CMPLX_DIM;
q15_t *inC = inB + n2 * CMPLX_DIM;
q15_t *inD = inC + n2 * CMPLX_DIM;
pW1 = p_rearranged_twiddle_tab_stride1;
pW2 = p_rearranged_twiddle_tab_stride2;
pW3 = p_rearranged_twiddle_tab_stride3;
blkCnt = n2 / 4;
/*
* load 4 x q15 complex pair
*/
vecA = vldrhq_s16(inA);
vecC = vldrhq_s16(inC);
while (blkCnt > 0U)
{
vecB = vldrhq_s16(inB);
vecD = vldrhq_s16(inD);
vecSum0 = vhaddq(vecA, vecC);
vecDiff0 = vhsubq(vecA, vecC);
vecSum1 = vhaddq(vecB, vecD);
vecDiff1 = vhsubq(vecB, vecD);
/*
* [ 1 1 1 1 ] * [ A B C D ]' .* 1
*/
vecTmp0 = vhaddq(vecSum0, vecSum1);
vst1q(inA, vecTmp0);
inA += 8;
/*
* [ 1 -1 1 -1 ] * [ A B C D ]'
*/
vecTmp0 = vhsubq(vecSum0, vecSum1);
/*
* [ 1 -1 1 -1 ] * [ A B C D ]'.* W2
*/
vecW = vld1q(pW2);
pW2 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
vst1q(inB, vecTmp1);
inB += 8;
/*
* [ 1 -i -1 +i ] * [ A B C D ]'
*/
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
/*
* [ 1 -i -1 +i ] * [ A B C D ]'.* W1
*/
vecW = vld1q(pW1);
pW1 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
vst1q(inC, vecTmp1);
inC += 8;
/*
* [ 1 +i -1 -i ] * [ A B C D ]'
*/
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
/*
* [ 1 +i -1 -i ] * [ A B C D ]'.* W3
*/
vecW = vld1q(pW3);
pW3 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
vst1q(inD, vecTmp1);
inD += 8;
vecA = vldrhq_s16(inA);
vecC = vldrhq_s16(inC);
blkCnt--;
}
}
n1 = n2;
n2 >>= 2u;
iter = iter << 2;
stage++;
}
/*
* start of Last stage process
*/
uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
/*
* load scheduling
*/
vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8);
blkCnt = (fftLen >> 4);
while (blkCnt > 0U)
{
vecSum0 = vhaddq(vecA, vecC);
vecDiff0 = vhsubq(vecA, vecC);
vecB = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 4);
vecD = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 12);
vecSum1 = vhaddq(vecB, vecD);
vecDiff1 = vhsubq(vecB, vecD);
/*
* pre-load for next iteration
*/
vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8);
vecTmp0 = vhaddq(vecSum0, vecSum1);
vstrwq_scatter_base_s32(vecScGathAddr, -64, (q15x8_t) vecTmp0);
vecTmp0 = vhsubq(vecSum0, vecSum1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (q15x8_t) vecTmp0);
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (q15x8_t) vecTmp0);
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (q15x8_t) vecTmp0);
blkCnt--;
}
}
static void arm_cfft_radix4by2_inverse_q15_mve(const arm_cfft_instance_q15 *S, q15_t *pSrc, uint32_t fftLen)
{
uint32_t n2;
q15_t *pIn0;
q15_t *pIn1;
const q15_t *pCoef = S->pTwiddle;
uint32_t blkCnt;
q15x8_t vecIn0, vecIn1, vecSum, vecDiff;
q15x8_t vecCmplxTmp, vecTw;
q15_t const *pCoefVec;
n2 = fftLen >> 1;
pIn0 = pSrc;
pIn1 = pSrc + fftLen;
pCoefVec = pCoef;
blkCnt = n2 / 4;
while (blkCnt > 0U)
{
vecIn0 = *(q15x8_t *) pIn0;
vecIn1 = *(q15x8_t *) pIn1;
vecIn0 = vecIn0 >> 1;
vecIn1 = vecIn1 >> 1;
vecSum = vhaddq(vecIn0, vecIn1);
vst1q(pIn0, vecSum);
pIn0 += 8;
vecTw = vld1q(pCoefVec);
pCoefVec += 8;
vecDiff = vhsubq(vecIn0, vecIn1);
vecCmplxTmp = vqrdmlsdhq(vuninitializedq_s16() , vecDiff, vecTw);
vecCmplxTmp = vqrdmladhxq(vecCmplxTmp, vecDiff, vecTw);
vst1q(pIn1, vecCmplxTmp);
pIn1 += 8;
blkCnt--;
}
_arm_radix4_butterfly_inverse_q15_mve(S, pSrc, n2);
_arm_radix4_butterfly_inverse_q15_mve(S, pSrc + fftLen, n2);
pIn0 = pSrc;
blkCnt = (fftLen << 1) >> 3;
while (blkCnt > 0U)
{
vecIn0 = *(q15x8_t *) pIn0;
vecIn0 = vecIn0 << 1;
vst1q(pIn0, vecIn0);
pIn0 += 8;
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = (fftLen << 1) & 7;
while (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecIn0 = *(q15x8_t *) pIn0;
vecIn0 = vecIn0 << 1;
vstrhq_p(pIn0, vecIn0, p0);
}
}
/**
@ingroup groupTransforms
*/
/**
@addtogroup ComplexFFT
@{
*/
/**
@brief Processing function for Q15 complex FFT.
@param[in] S points to an instance of Q15 CFFT structure
@param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
@param[in] ifftFlag flag that selects transform direction
- value = 0: forward transform
- value = 1: inverse transform
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
- value = 0: disables bit reversal of output
- value = 1: enables bit reversal of output
@return none
*/
void arm_cfft_q15(
const arm_cfft_instance_q15 * S,
q15_t * pSrc,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
uint32_t fftLen = S->fftLen;
if (ifftFlag == 1U) {
switch (fftLen) {
case 16:
case 64:
case 256:
case 1024:
case 4096:
_arm_radix4_butterfly_inverse_q15_mve(S, pSrc, fftLen);
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_inverse_q15_mve(S, pSrc, fftLen);
break;
}
} else {
switch (fftLen) {
case 16:
case 64:
case 256:
case 1024:
case 4096:
_arm_radix4_butterfly_q15_mve(S, pSrc, fftLen);
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_q15_mve(S, pSrc, fftLen);
break;
}
}
if (bitReverseFlag)
{
arm_bitreversal_16_inpl_mve((uint16_t*)pSrc, S->bitRevLength, S->pBitRevTable);
}
}
#else
extern void arm_radix4_butterfly_q15(
q15_t * pSrc,
uint32_t fftLen,
@ -330,3 +945,5 @@ void arm_cfft_radix4by2_inverse_q15(
pSrc[4 * i + 3] = p3;
}
}
#endif /* defined(ARM_MATH_MVEI) */

@ -92,55 +92,136 @@ arm_status arm_rfft_init_q15(
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096))
case 8192U:
S->twidCoefRModifier = 1U;
S->pCfft = &arm_cfft_sR_q15_len4096;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),4096);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len4096;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048))
case 4096U:
S->twidCoefRModifier = 2U;
S->pCfft = &arm_cfft_sR_q15_len2048;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),2048);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len2048;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024))
case 2048U:
S->twidCoefRModifier = 4U;
S->pCfft = &arm_cfft_sR_q15_len1024;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),1024);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len1024;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_512) && defined(ARM_TABLE_BITREVIDX_FXT_512))
case 1024U:
S->twidCoefRModifier = 8U;
S->pCfft = &arm_cfft_sR_q15_len512;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),512);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len512;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_256) && defined(ARM_TABLE_BITREVIDX_FXT_256))
case 512U:
S->twidCoefRModifier = 16U;
S->pCfft = &arm_cfft_sR_q15_len256;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),256);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len256;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_128) && defined(ARM_TABLE_BITREVIDX_FXT_128))
case 256U:
S->twidCoefRModifier = 32U;
S->pCfft = &arm_cfft_sR_q15_len128;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),128);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len128;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_64) && defined(ARM_TABLE_BITREVIDX_FXT_64))
case 128U:
S->twidCoefRModifier = 64U;
S->pCfft = &arm_cfft_sR_q15_len64;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),64);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len64;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_32) && defined(ARM_TABLE_BITREVIDX_FXT_32))
case 64U:
S->twidCoefRModifier = 128U;
S->pCfft = &arm_cfft_sR_q15_len32;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),32);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len32;
#endif
break;
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_16) && defined(ARM_TABLE_BITREVIDX_FXT_16))
case 32U:
S->twidCoefRModifier = 256U;
S->pCfft = &arm_cfft_sR_q15_len16;
#if defined(ARM_MATH_MVEI)
status=arm_cfft_init_q15(&(S->cfftInst),16);
if (status != ARM_MATH_SUCCESS)
{
return(status);
}
#else
S->pCfft = &arm_cfft_sR_q15_len16;
#endif
break;
#endif
default:

@ -75,7 +75,11 @@ void arm_rfft_q15(
q15_t * pSrc,
q15_t * pDst)
{
#if defined(ARM_MATH_MVEI)
const arm_cfft_instance_q15 *S_CFFT = &(S->cfftInst);
#else
const arm_cfft_instance_q15 *S_CFFT = S->pCfft;
#endif
uint32_t L2 = S->fftLenReal >> 1U;
uint32_t i;

@ -15,7 +15,7 @@ class TransformCQ15:public Client::Suite
Client::RefPattern<q15_t> ref;
const arm_cfft_instance_q15 *instCfftQ15;
arm_cfft_instance_q15 instCfftQ15;
int ifft;
@ -27,5 +27,7 @@ class TransformCQ15:public Client::Suite
*/
int scaling;
arm_status status;
};

@ -13,10 +13,10 @@
q15_t *outfftp = outputfft.ptr();
memcpy(outfftp,inp,sizeof(q15_t)*input.nbSamples());
memcpy(outfftp,inp,sizeof(q15_t)*input.nbSamples());
arm_cfft_q15(
this->instCfftQ15,
arm_cfft_q15(
&(this->instCfftQ15),
outfftp,
this->ifft,
1);
@ -37,7 +37,7 @@
memcpy(outfftp,inp,sizeof(q15_t)*input.nbSamples());
arm_cfft_q15(
this->instCfftQ15,
&(this->instCfftQ15),
outfftp,
this->ifft,
1);
@ -68,7 +68,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_NOISY_16_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_NOISY_16_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len16;
status=arm_cfft_init_q15(&instCfftQ15,16);
this->ifft=0;
@ -79,7 +79,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_16_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_16_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len16;
status=arm_cfft_init_q15(&instCfftQ15,16);
this->ifft=1;
this->scaling = 4;
@ -91,7 +91,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_NOISY_32_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_NOISY_32_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len32;
status=arm_cfft_init_q15(&instCfftQ15,32);
this->ifft=0;
@ -102,7 +102,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_32_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_32_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len32;
status=arm_cfft_init_q15(&instCfftQ15,32);
this->ifft=1;
this->scaling = 5;
@ -114,7 +114,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_NOISY_64_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_NOISY_64_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len64;
status=arm_cfft_init_q15(&instCfftQ15,64);
this->ifft=0;
@ -126,7 +126,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_64_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_64_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len64;
status=arm_cfft_init_q15(&instCfftQ15,64);
this->ifft=1;
this->scaling=6;
@ -138,7 +138,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_NOISY_128_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_NOISY_128_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len128;
status=arm_cfft_init_q15(&instCfftQ15,128);
this->ifft=0;
@ -149,7 +149,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_128_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_128_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len128;
status=arm_cfft_init_q15(&instCfftQ15,128);
this->ifft=1;
this->scaling=7;
@ -161,7 +161,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_NOISY_256_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_NOISY_256_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len256;
status=arm_cfft_init_q15(&instCfftQ15,256);
this->ifft=0;
@ -172,7 +172,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_256_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_256_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len256;
status=arm_cfft_init_q15(&instCfftQ15,256);
this->ifft=1;
this->scaling=8;
@ -184,7 +184,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_NOISY_512_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_NOISY_512_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len512;
status=arm_cfft_init_q15(&instCfftQ15,512);
this->ifft=0;
@ -196,7 +196,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_512_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_512_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len512;
status=arm_cfft_init_q15(&instCfftQ15,512);
this->ifft=1;
this->scaling=9;
@ -209,7 +209,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_NOISY_1024_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_NOISY_1024_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len1024;
status=arm_cfft_init_q15(&instCfftQ15,1024);
this->ifft=0;
@ -220,7 +220,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_1024_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_1024_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len1024;
status=arm_cfft_init_q15(&instCfftQ15,1024);
this->ifft=1;
this->scaling=10;
@ -232,7 +232,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_NOISY_2048_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_NOISY_2048_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len2048;
status=arm_cfft_init_q15(&instCfftQ15,2048);
this->ifft=0;
@ -243,7 +243,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_2048_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_2048_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len2048;
status=arm_cfft_init_q15(&instCfftQ15,2048);
this->ifft=1;
this->scaling=11;
@ -255,7 +255,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_NOISY_4096_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_NOISY_4096_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len4096;
status=arm_cfft_init_q15(&instCfftQ15,4096);
this->ifft=0;
@ -266,7 +266,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_NOISY_4096_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_NOISY_4096_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len4096;
status=arm_cfft_init_q15(&instCfftQ15,4096);
this->ifft=1;
this->scaling=12;
@ -280,7 +280,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_STEP_16_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_STEP_16_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len16;
status=arm_cfft_init_q15(&instCfftQ15,16);
this->ifft=0;
@ -291,7 +291,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_STEP_16_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_STEP_16_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len16;
status=arm_cfft_init_q15(&instCfftQ15,16);
this->ifft=1;
this->scaling=4;
@ -303,7 +303,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_STEP_32_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_STEP_32_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len32;
status=arm_cfft_init_q15(&instCfftQ15,32);
this->ifft=0;
@ -314,7 +314,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_STEP_32_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_STEP_32_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len32;
status=arm_cfft_init_q15(&instCfftQ15,32);
this->ifft=1;
this->scaling=5;
@ -326,7 +326,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_STEP_64_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_STEP_64_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len64;
status=arm_cfft_init_q15(&instCfftQ15,64);
this->ifft=0;
@ -337,7 +337,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_STEP_64_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_STEP_64_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len64;
status=arm_cfft_init_q15(&instCfftQ15,64);
this->ifft=1;
this->scaling=6;
@ -349,7 +349,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_STEP_128_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_STEP_128_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len128;
status=arm_cfft_init_q15(&instCfftQ15,128);
this->ifft=0;
@ -360,7 +360,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_STEP_128_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_STEP_128_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len128;
status=arm_cfft_init_q15(&instCfftQ15,128);
this->ifft=1;
this->scaling=7;
@ -372,7 +372,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_STEP_256_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_STEP_256_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len256;
status=arm_cfft_init_q15(&instCfftQ15,256);
this->ifft=0;
@ -383,7 +383,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_STEP_256_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_STEP_256_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len256;
status=arm_cfft_init_q15(&instCfftQ15,256);
this->ifft=1;
this->scaling=8;
@ -395,7 +395,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_STEP_512_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_STEP_512_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len512;
status=arm_cfft_init_q15(&instCfftQ15,512);
this->ifft=0;
@ -406,7 +406,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_STEP_512_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_STEP_512_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len512;
status=arm_cfft_init_q15(&instCfftQ15,512);
this->ifft=1;
this->scaling=9;
@ -418,7 +418,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_STEP_1024_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_STEP_1024_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len1024;
status=arm_cfft_init_q15(&instCfftQ15,1024);
this->ifft=0;
@ -429,7 +429,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_STEP_1024_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_STEP_1024_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len1024;
status=arm_cfft_init_q15(&instCfftQ15,1024);
this->ifft=1;
this->scaling=10;
@ -441,7 +441,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_STEP_2048_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_STEP_2048_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len2048;
status=arm_cfft_init_q15(&instCfftQ15,2048);
this->ifft=0;
@ -452,7 +452,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_STEP_2048_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_STEP_2048_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len2048;
status=arm_cfft_init_q15(&instCfftQ15,2048);
this->ifft=1;
this->scaling=11;
@ -464,7 +464,7 @@
input.reload(TransformCQ15::INPUTS_CFFT_STEP_4096_Q15_ID,mgr);
ref.reload( TransformCQ15::REF_CFFT_STEP_4096_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len4096;
status=arm_cfft_init_q15(&instCfftQ15,4096);
this->ifft=0;
@ -475,7 +475,7 @@
input.reload(TransformCQ15::INPUTS_CIFFT_STEP_4096_Q15_ID,mgr);
ref.reload( TransformCQ15::INPUTS_CFFT_STEP_4096_Q15_ID,mgr);
instCfftQ15 = &arm_cfft_sR_q15_len4096;
status=arm_cfft_init_q15(&instCfftQ15,4096);
this->ifft=1;
this->scaling=12;

Loading…
Cancel
Save