From 9b6b4fff80e056abfbd412af8e634da0908143ec Mon Sep 17 00:00:00 2001 From: TTornblom Date: Mon, 27 Sep 2021 10:39:19 +0200 Subject: [PATCH] DSP: Eliminate some non-standard C code Cleaned out some non-standard C code, typeof() and using "const" variables as constants. Also added some initial IAR support. Signed-off-by: TTornblom --- Include/arm_math_types.h | 2 ++ PrivateInclude/arm_vec_fft.h | 6 +++--- Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c | 9 ++++++--- Source/TransformFunctions/arm_cfft_q15.c | 14 +++++++------- Source/TransformFunctions/arm_cfft_q31.c | 16 ++++++++-------- Source/TransformFunctions/arm_rfft_q15.c | 8 ++++---- Source/TransformFunctions/arm_rfft_q31.c | 7 ++++--- 7 files changed, 34 insertions(+), 28 deletions(-) diff --git a/Include/arm_math_types.h b/Include/arm_math_types.h index 7680eefe..017c8928 100755 --- a/Include/arm_math_types.h +++ b/Include/arm_math_types.h @@ -277,7 +277,9 @@ extern "C" /** * @brief 32-bit floating-point type definition. */ +#if !defined(__ICCARM__) || !(__ARM_FEATURE_MVE & 2) typedef float float32_t; +#endif /** * @brief 64-bit floating-point type definition. diff --git a/PrivateInclude/arm_vec_fft.h b/PrivateInclude/arm_vec_fft.h index 30dcb0e2..17e62aa8 100755 --- a/PrivateInclude/arm_vec_fft.h +++ b/PrivateInclude/arm_vec_fft.h @@ -40,8 +40,8 @@ extern "C" #define MVE_CMPLX_MULT_FLT_AxB(A,B) vcmlaq_rot90(vcmulq(A, B), A, B) #define MVE_CMPLX_MULT_FLT_Conj_AxB(A,B) vcmlaq_rot270(vcmulq(A, B), A, B) -#define MVE_CMPLX_MULT_FX_AxB(A,B) vqdmladhxq(vqdmlsdhq((__typeof(A))vuninitializedq_s32(), A, B), A, B) -#define MVE_CMPLX_MULT_FX_AxConjB(A,B) vqdmladhq(vqdmlsdhxq((__typeof(A))vuninitializedq_s32(), A, B), A, B) +#define MVE_CMPLX_MULT_FX_AxB(A,B,TyA) vqdmladhxq(vqdmlsdhq((TyA)vuninitializedq_s32(), A, B), A, B) +#define MVE_CMPLX_MULT_FX_AxConjB(A,B,TyA) vqdmladhq(vqdmlsdhxq((TyA)vuninitializedq_s32(), A, B), A, B) #define MVE_CMPLX_ADD_FX_A_ixB(A, B) vhcaddq_rot90(A,B) #define MVE_CMPLX_SUB_FX_A_ixB(A,B) vhcaddq_rot270(A,B) @@ -322,4 +322,4 @@ __STATIC_INLINE void arm_bitreversal_16_outpl_mve(void *pDst, void *pSrc, uint32 #endif -#endif /* _ARM_VEC_FFT_H_ */ \ No newline at end of file +#endif /* _ARM_VEC_FFT_H_ */ diff --git a/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c b/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c index 7b6b4883..f5666962 100755 --- a/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c +++ b/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c @@ -67,7 +67,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_2x2_mve( const arm_matrix_instance_f16 * pSrcB, arm_matrix_instance_f16 * pDst) { - const uint16_t MATRIX_DIM = 2; +#define MATRIX_DIM 2 float16_t const *pInB = pSrcB->pData; /* input data matrix pointer B */ float16_t *pInA = pSrcA->pData; /* input data matrix pointer A */ float16_t *pOut = pDst->pData; /* output data matrix pointer */ @@ -131,6 +131,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_2x2_mve( * Return to application */ return (ARM_MATH_SUCCESS); +#undef MATRIX_DIM } @@ -140,7 +141,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_3x3_mve( const arm_matrix_instance_f16 * pSrcB, arm_matrix_instance_f16 * pDst) { - const uint16_t MATRIX_DIM = 3; +#define MATRIX_DIM 3 float16_t const *pInB = pSrcB->pData; /* input data matrix pointer B */ float16_t *pInA = pSrcA->pData; /* input data matrix pointer A */ float16_t *pOut = pDst->pData; /* output data matrix pointer */ @@ -226,6 +227,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_3x3_mve( * Return to application */ return (ARM_MATH_SUCCESS); +#undef MATRIX_DIM } @@ -236,7 +238,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_4x4_mve( const arm_matrix_instance_f16 * pSrcB, arm_matrix_instance_f16 * pDst) { - const uint16_t MATRIX_DIM = 4; +#define MATRIX_DIM 4 float16_t const *pInB = pSrcB->pData; /* input data matrix pointer B */ float16_t *pInA = pSrcA->pData; /* input data matrix pointer A */ float16_t *pOut = pDst->pData; /* output data matrix pointer */ @@ -371,6 +373,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_4x4_mve( * Return to application */ return (ARM_MATH_SUCCESS); +#undef MATRIX_DIM } diff --git a/Source/TransformFunctions/arm_cfft_q15.c b/Source/TransformFunctions/arm_cfft_q15.c index 9d4eb96c..69211c43 100644 --- a/Source/TransformFunctions/arm_cfft_q15.c +++ b/Source/TransformFunctions/arm_cfft_q15.c @@ -112,7 +112,7 @@ static void _arm_radix4_butterfly_q15_mve( */ vecW = vld1q(pW2); pW2 += 8; - vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0); + vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q15x8_t); vst1q(inB, vecTmp1); inB += 8; @@ -125,7 +125,7 @@ static void _arm_radix4_butterfly_q15_mve( */ vecW = vld1q(pW1); pW1 += 8; - vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0); + vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q15x8_t); vst1q(inC, vecTmp1); inC += 8; @@ -138,7 +138,7 @@ static void _arm_radix4_butterfly_q15_mve( */ vecW = vld1q(pW3); pW3 += 8; - vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0); + vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q15x8_t); vst1q(inD, vecTmp1); inD += 8; @@ -234,7 +234,7 @@ static void arm_cfft_radix4by2_q15_mve(const arm_cfft_instance_q15 *S, q15_t *pS pCoefVec += 8; vecDiff = vhsubq(vecIn0, vecIn1); - vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw); + vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw, q15x8_t); vst1q(pIn1, vecCmplxTmp); pIn1 += 8; @@ -348,7 +348,7 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S */ vecW = vld1q(pW2); pW2 += 8; - vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW); + vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q15x8_t); vst1q(inB, vecTmp1); inB += 8; @@ -361,7 +361,7 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S */ vecW = vld1q(pW1); pW1 += 8; - vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW); + vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q15x8_t); vst1q(inC, vecTmp1); inC += 8; /* @@ -373,7 +373,7 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S */ vecW = vld1q(pW3); pW3 += 8; - vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW); + vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q15x8_t); vst1q(inD, vecTmp1); inD += 8; diff --git a/Source/TransformFunctions/arm_cfft_q31.c b/Source/TransformFunctions/arm_cfft_q31.c index a26927e3..7f964b82 100644 --- a/Source/TransformFunctions/arm_cfft_q31.c +++ b/Source/TransformFunctions/arm_cfft_q31.c @@ -115,7 +115,7 @@ static void _arm_radix4_butterfly_q31_mve( */ vecW = vld1q(pW2); pW2 += 4; - vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0); + vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q31x4_t); vst1q(inB, vecTmp1); inB += 4; @@ -128,7 +128,7 @@ static void _arm_radix4_butterfly_q31_mve( */ vecW = vld1q(pW1); pW1 += 4; - vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0); + vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q31x4_t); vst1q(inC, vecTmp1); inC += 4; /* @@ -140,7 +140,7 @@ static void _arm_radix4_butterfly_q31_mve( */ vecW = vld1q(pW3); pW3 += 4; - vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0); + vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q31x4_t); vst1q(inD, vecTmp1); inD += 4; @@ -248,7 +248,7 @@ static void arm_cfft_radix4by2_q31_mve(const arm_cfft_instance_q31 *S, q31_t *pS pCoef += 4; vecDiff = vhsubq(vecIn0, vecIn1); - vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw); + vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw, q31x4_t); vst1q(pIn1, vecCmplxTmp); pIn1 += 4; @@ -365,7 +365,7 @@ static void _arm_radix4_butterfly_inverse_q31_mve( */ vecW = vld1q(pW2); pW2 += 4; - vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW); + vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q31x4_t); vst1q(inB, vecTmp1); inB += 4; @@ -378,7 +378,7 @@ static void _arm_radix4_butterfly_inverse_q31_mve( */ vecW = vld1q(pW1); pW1 += 4; - vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW); + vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q31x4_t); vst1q(inC, vecTmp1); inC += 4; /* @@ -390,7 +390,7 @@ static void _arm_radix4_butterfly_inverse_q31_mve( */ vecW = vld1q(pW3); pW3 += 4; - vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW); + vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q31x4_t); vst1q(inD, vecTmp1); inD += 4; @@ -503,7 +503,7 @@ static void arm_cfft_radix4by2_inverse_q31_mve(const arm_cfft_instance_q31 *S, q pCoef += 4; vecDiff = vhsubq(vecIn0, vecIn1); - vecCmplxTmp = MVE_CMPLX_MULT_FX_AxB(vecDiff, vecTw); + vecCmplxTmp = MVE_CMPLX_MULT_FX_AxB(vecDiff, vecTw, q31x4_t); vst1q(pIn1, vecCmplxTmp); pIn1 += 4; diff --git a/Source/TransformFunctions/arm_rfft_q15.c b/Source/TransformFunctions/arm_rfft_q15.c index f7086bec..7d149c60 100644 --- a/Source/TransformFunctions/arm_rfft_q15.c +++ b/Source/TransformFunctions/arm_rfft_q15.c @@ -188,8 +188,8 @@ void arm_split_rfft_q15( q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxB_S16(in1, coefA), MVE_CMPLX_MULT_FX_AxConjB_S16(coefB, in2)); #else - q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxB(in1, coefA), - MVE_CMPLX_MULT_FX_AxConjB(coefB, in2)); + q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxB(in1, coefA, q15x8_t), + MVE_CMPLX_MULT_FX_AxConjB(coefB, in2, q15x8_t)); #endif vst1q_s16(pOut1, out); pOut1 += 8; @@ -413,8 +413,8 @@ void arm_split_rifft_q15( q15x8_t coefB = vldrhq_gather_shifted_offset_s16(pCoefBb, offsetCoef); /* can we avoid the conjugate here ? */ - q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA), - vmulq(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB))); + q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA, q15x8_t), + vmulq(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB, q15x8_t))); vst1q_s16(pDst, out); pDst += 8; diff --git a/Source/TransformFunctions/arm_rfft_q31.c b/Source/TransformFunctions/arm_rfft_q31.c index 9f57011c..ad3212db 100644 --- a/Source/TransformFunctions/arm_rfft_q31.c +++ b/Source/TransformFunctions/arm_rfft_q31.c @@ -181,7 +181,8 @@ void arm_split_rfft_q31( #if defined(__CMSIS_GCC_H) q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB_S32(in1, coefA),MVE_CMPLX_MULT_FX_AxConjB_S32(coefB, in2)); #else - q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB(in1, coefA),MVE_CMPLX_MULT_FX_AxConjB(coefB, in2)); + q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB(in1, coefA, q31x4_t), + MVE_CMPLX_MULT_FX_AxConjB(coefB, in2, q31x4_t)); #endif vst1q(pOut1, out); pOut1 += 4; @@ -340,8 +341,8 @@ void arm_split_rifft_q31( q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB_S32(in1, coefA), vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB_S32(in2, coefB))); #else - q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA), - vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB))); + q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA, q31x4_t), + vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB, q31x4_t))); #endif vst1q_s32(pDst, out); pDst += 4;