From b31c737ad0f30c8006e65d59facab6ea685aa374 Mon Sep 17 00:00:00 2001 From: Christophe Favergeon Date: Tue, 14 Apr 2020 11:25:33 +0200 Subject: [PATCH] CMSIS-DSP: Corrected latest issues in f16 MVE version of cfft. --- Source/TransformFunctions/arm_cfft_f16.c | 17 +++++++++-------- Source/TransformFunctions/arm_cfft_init_f16.c | 18 +++++++++--------- Testing/Source/Tests/TransformCF16.cpp | 2 +- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/Source/TransformFunctions/arm_cfft_f16.c b/Source/TransformFunctions/arm_cfft_f16.c index e0499dc8..729b94bd 100755 --- a/Source/TransformFunctions/arm_cfft_f16.c +++ b/Source/TransformFunctions/arm_cfft_f16.c @@ -29,6 +29,7 @@ #include "arm_math.h" #include "arm_common_tables.h" + #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" @@ -44,11 +45,11 @@ static float16_t arm_inverse_fft_length_f16(uint16_t fftLen) { case 4096U: - retValue = (float16_t)0.000244140625; + retValue = (float16_t)0.000244140625f; break; case 2048U: - retValue = (float16_t)0.00048828125; + retValue = (float16_t)0.00048828125f; break; case 1024U: @@ -56,7 +57,7 @@ static float16_t arm_inverse_fft_length_f16(uint16_t fftLen) break; case 512U: - retValue = (float16_t)0.001953125; + retValue = (float16_t)0.001953125f; break; case 256U: @@ -64,7 +65,7 @@ static float16_t arm_inverse_fft_length_f16(uint16_t fftLen) break; case 128U: - retValue = (float16_t)0.0078125; + retValue = (float16_t)0.0078125f; break; case 64U: @@ -72,7 +73,7 @@ static float16_t arm_inverse_fft_length_f16(uint16_t fftLen) break; case 32U: - retValue = (float16_t)0.03125; + retValue = (float16_t)0.03125f; break; case 16U: @@ -482,7 +483,7 @@ static void _arm_radix4_butterfly_inverse_f16_mve(const arm_cfft_instance_f16 * vecA = vldrwq_gather_base_wb_f32(&vecScGathAddr, 64); vecC = vldrwq_gather_base_f32(vecScGathAddr, 8); - blkCnt = (fftLen >> 3); + blkCnt = (fftLen >> 4); while (blkCnt > 0U) { vecSum0 = vecA + vecC; /* vecSum0 = vaddq(vecA, vecC) */ @@ -546,8 +547,8 @@ static void arm_cfft_radix4by2_inverse_f16_mve(const arm_cfft_instance_f16 * S,f vecTw = vld1q(pCoefVec); pCoefVec += 8; - vecSum = vecIn0 + vecIn1; - vecDiff = vecIn0 - vecIn1; + vecSum = vaddq(vecIn0, vecIn1); + vecDiff = vsubq(vecIn0, vecIn1); vecCmplxTmp = MVE_CMPLX_MULT_FLT_AxB(vecTw, vecDiff); diff --git a/Source/TransformFunctions/arm_cfft_init_f16.c b/Source/TransformFunctions/arm_cfft_init_f16.c index 2c2da03a..9252b128 100755 --- a/Source/TransformFunctions/arm_cfft_init_f16.c +++ b/Source/TransformFunctions/arm_cfft_init_f16.c @@ -163,7 +163,7 @@ arm_status arm_cfft_init_f16( /* Initialise the bit reversal table modifier */ S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH; S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_4096; - S->pTwiddle = (float16_t *)twiddleCoef_4096; + S->pTwiddle = (float16_t *)twiddleCoefF16_4096; status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 1); break; #endif @@ -174,7 +174,7 @@ arm_status arm_cfft_init_f16( /* Initialise the bit reversal table modifier */ S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH; S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_2048; - S->pTwiddle = (float16_t *)twiddleCoef_2048; + S->pTwiddle = (float16_t *)twiddleCoefF16_2048; status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 2); break; #endif @@ -185,7 +185,7 @@ arm_status arm_cfft_init_f16( /* Initialise the bit reversal table modifier */ S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH; S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_1024; - S->pTwiddle = (float16_t *)twiddleCoef_1024; + S->pTwiddle = (float16_t *)twiddleCoefF16_1024; status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 1); break; #endif @@ -196,7 +196,7 @@ arm_status arm_cfft_init_f16( /* Initialise the bit reversal table modifier */ S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH; S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_512; - S->pTwiddle = (float16_t *)twiddleCoef_512; + S->pTwiddle = (float16_t *)twiddleCoefF16_512; status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 2); break; #endif @@ -205,7 +205,7 @@ arm_status arm_cfft_init_f16( case 256U: S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH; S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_256; - S->pTwiddle = (float16_t *)twiddleCoef_256; + S->pTwiddle = (float16_t *)twiddleCoefF16_256; status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 1); break; #endif @@ -214,7 +214,7 @@ arm_status arm_cfft_init_f16( case 128U: S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH; S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_128; - S->pTwiddle = (float16_t *)twiddleCoef_128; + S->pTwiddle = (float16_t *)twiddleCoefF16_128; status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 2); break; #endif @@ -223,7 +223,7 @@ arm_status arm_cfft_init_f16( case 64U: S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH; S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_64; - S->pTwiddle = (float16_t *)twiddleCoef_64; + S->pTwiddle = (float16_t *)twiddleCoefF16_64; status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 1); break; #endif @@ -232,7 +232,7 @@ arm_status arm_cfft_init_f16( case 32U: S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH; S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_32; - S->pTwiddle = (float16_t *)twiddleCoef_32; + S->pTwiddle = (float16_t *)twiddleCoefF16_32; status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 2); break; #endif @@ -242,7 +242,7 @@ arm_status arm_cfft_init_f16( /* Initializations of structure parameters for 16 point FFT */ S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH; S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_16; - S->pTwiddle = (float16_t *)twiddleCoef_16; + S->pTwiddle = (float16_t *)twiddleCoefF16_16; status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 1); break; #endif diff --git a/Testing/Source/Tests/TransformCF16.cpp b/Testing/Source/Tests/TransformCF16.cpp index ea773a0d..ca726ece 100755 --- a/Testing/Source/Tests/TransformCF16.cpp +++ b/Testing/Source/Tests/TransformCF16.cpp @@ -5,7 +5,7 @@ #include "arm_const_structs.h" #include "Test.h" -#define SNR_THRESHOLD 60 +#define SNR_THRESHOLD 58 void TransformCF16::test_cfft_f16() {