DSP: Eliminate some non-standard C code

Cleaned out some non-standard C code, typeof() and using
"const" variables as constants.

Also added some initial IAR support.

Signed-off-by: TTornblom <thomas.tornblom@iar.com>
pull/19/head
TTornblom 4 years ago committed by Christophe Favergeon
parent 8092cc61f9
commit 9b6b4fff80

@ -277,7 +277,9 @@ extern "C"
/**
* @brief 32-bit floating-point type definition.
*/
#if !defined(__ICCARM__) || !(__ARM_FEATURE_MVE & 2)
typedef float float32_t;
#endif
/**
* @brief 64-bit floating-point type definition.

@ -40,8 +40,8 @@ extern "C"
#define MVE_CMPLX_MULT_FLT_AxB(A,B) vcmlaq_rot90(vcmulq(A, B), A, B)
#define MVE_CMPLX_MULT_FLT_Conj_AxB(A,B) vcmlaq_rot270(vcmulq(A, B), A, B)
#define MVE_CMPLX_MULT_FX_AxB(A,B) vqdmladhxq(vqdmlsdhq((__typeof(A))vuninitializedq_s32(), A, B), A, B)
#define MVE_CMPLX_MULT_FX_AxConjB(A,B) vqdmladhq(vqdmlsdhxq((__typeof(A))vuninitializedq_s32(), A, B), A, B)
#define MVE_CMPLX_MULT_FX_AxB(A,B,TyA) vqdmladhxq(vqdmlsdhq((TyA)vuninitializedq_s32(), A, B), A, B)
#define MVE_CMPLX_MULT_FX_AxConjB(A,B,TyA) vqdmladhq(vqdmlsdhxq((TyA)vuninitializedq_s32(), A, B), A, B)
#define MVE_CMPLX_ADD_FX_A_ixB(A, B) vhcaddq_rot90(A,B)
#define MVE_CMPLX_SUB_FX_A_ixB(A,B) vhcaddq_rot270(A,B)

@ -67,7 +67,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_2x2_mve(
const arm_matrix_instance_f16 * pSrcB,
arm_matrix_instance_f16 * pDst)
{
const uint16_t MATRIX_DIM = 2;
#define MATRIX_DIM 2
float16_t const *pInB = pSrcB->pData; /* input data matrix pointer B */
float16_t *pInA = pSrcA->pData; /* input data matrix pointer A */
float16_t *pOut = pDst->pData; /* output data matrix pointer */
@ -131,6 +131,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_2x2_mve(
* Return to application
*/
return (ARM_MATH_SUCCESS);
#undef MATRIX_DIM
}
@ -140,7 +141,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_3x3_mve(
const arm_matrix_instance_f16 * pSrcB,
arm_matrix_instance_f16 * pDst)
{
const uint16_t MATRIX_DIM = 3;
#define MATRIX_DIM 3
float16_t const *pInB = pSrcB->pData; /* input data matrix pointer B */
float16_t *pInA = pSrcA->pData; /* input data matrix pointer A */
float16_t *pOut = pDst->pData; /* output data matrix pointer */
@ -226,6 +227,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_3x3_mve(
* Return to application
*/
return (ARM_MATH_SUCCESS);
#undef MATRIX_DIM
}
@ -236,7 +238,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_4x4_mve(
const arm_matrix_instance_f16 * pSrcB,
arm_matrix_instance_f16 * pDst)
{
const uint16_t MATRIX_DIM = 4;
#define MATRIX_DIM 4
float16_t const *pInB = pSrcB->pData; /* input data matrix pointer B */
float16_t *pInA = pSrcA->pData; /* input data matrix pointer A */
float16_t *pOut = pDst->pData; /* output data matrix pointer */
@ -371,6 +373,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_cmplx_mult_f16_4x4_mve(
* Return to application
*/
return (ARM_MATH_SUCCESS);
#undef MATRIX_DIM
}

@ -112,7 +112,7 @@ static void _arm_radix4_butterfly_q15_mve(
*/
vecW = vld1q(pW2);
pW2 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q15x8_t);
vst1q(inB, vecTmp1);
inB += 8;
@ -125,7 +125,7 @@ static void _arm_radix4_butterfly_q15_mve(
*/
vecW = vld1q(pW1);
pW1 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q15x8_t);
vst1q(inC, vecTmp1);
inC += 8;
@ -138,7 +138,7 @@ static void _arm_radix4_butterfly_q15_mve(
*/
vecW = vld1q(pW3);
pW3 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q15x8_t);
vst1q(inD, vecTmp1);
inD += 8;
@ -234,7 +234,7 @@ static void arm_cfft_radix4by2_q15_mve(const arm_cfft_instance_q15 *S, q15_t *pS
pCoefVec += 8;
vecDiff = vhsubq(vecIn0, vecIn1);
vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw);
vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw, q15x8_t);
vst1q(pIn1, vecCmplxTmp);
pIn1 += 8;
@ -348,7 +348,7 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S
*/
vecW = vld1q(pW2);
pW2 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q15x8_t);
vst1q(inB, vecTmp1);
inB += 8;
@ -361,7 +361,7 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S
*/
vecW = vld1q(pW1);
pW1 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q15x8_t);
vst1q(inC, vecTmp1);
inC += 8;
/*
@ -373,7 +373,7 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S
*/
vecW = vld1q(pW3);
pW3 += 8;
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q15x8_t);
vst1q(inD, vecTmp1);
inD += 8;

@ -115,7 +115,7 @@ static void _arm_radix4_butterfly_q31_mve(
*/
vecW = vld1q(pW2);
pW2 += 4;
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q31x4_t);
vst1q(inB, vecTmp1);
inB += 4;
@ -128,7 +128,7 @@ static void _arm_radix4_butterfly_q31_mve(
*/
vecW = vld1q(pW1);
pW1 += 4;
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q31x4_t);
vst1q(inC, vecTmp1);
inC += 4;
/*
@ -140,7 +140,7 @@ static void _arm_radix4_butterfly_q31_mve(
*/
vecW = vld1q(pW3);
pW3 += 4;
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0);
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q31x4_t);
vst1q(inD, vecTmp1);
inD += 4;
@ -248,7 +248,7 @@ static void arm_cfft_radix4by2_q31_mve(const arm_cfft_instance_q31 *S, q31_t *pS
pCoef += 4;
vecDiff = vhsubq(vecIn0, vecIn1);
vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw);
vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw, q31x4_t);
vst1q(pIn1, vecCmplxTmp);
pIn1 += 4;
@ -365,7 +365,7 @@ static void _arm_radix4_butterfly_inverse_q31_mve(
*/
vecW = vld1q(pW2);
pW2 += 4;
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q31x4_t);
vst1q(inB, vecTmp1);
inB += 4;
@ -378,7 +378,7 @@ static void _arm_radix4_butterfly_inverse_q31_mve(
*/
vecW = vld1q(pW1);
pW1 += 4;
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q31x4_t);
vst1q(inC, vecTmp1);
inC += 4;
/*
@ -390,7 +390,7 @@ static void _arm_radix4_butterfly_inverse_q31_mve(
*/
vecW = vld1q(pW3);
pW3 += 4;
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW);
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q31x4_t);
vst1q(inD, vecTmp1);
inD += 4;
@ -503,7 +503,7 @@ static void arm_cfft_radix4by2_inverse_q31_mve(const arm_cfft_instance_q31 *S, q
pCoef += 4;
vecDiff = vhsubq(vecIn0, vecIn1);
vecCmplxTmp = MVE_CMPLX_MULT_FX_AxB(vecDiff, vecTw);
vecCmplxTmp = MVE_CMPLX_MULT_FX_AxB(vecDiff, vecTw, q31x4_t);
vst1q(pIn1, vecCmplxTmp);
pIn1 += 4;

@ -188,8 +188,8 @@ void arm_split_rfft_q15(
q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxB_S16(in1, coefA),
MVE_CMPLX_MULT_FX_AxConjB_S16(coefB, in2));
#else
q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxB(in1, coefA),
MVE_CMPLX_MULT_FX_AxConjB(coefB, in2));
q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxB(in1, coefA, q15x8_t),
MVE_CMPLX_MULT_FX_AxConjB(coefB, in2, q15x8_t));
#endif
vst1q_s16(pOut1, out);
pOut1 += 8;
@ -413,8 +413,8 @@ void arm_split_rifft_q15(
q15x8_t coefB = vldrhq_gather_shifted_offset_s16(pCoefBb, offsetCoef);
/* can we avoid the conjugate here ? */
q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA),
vmulq(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB)));
q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA, q15x8_t),
vmulq(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB, q15x8_t)));
vst1q_s16(pDst, out);
pDst += 8;

@ -181,7 +181,8 @@ void arm_split_rfft_q31(
#if defined(__CMSIS_GCC_H)
q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB_S32(in1, coefA),MVE_CMPLX_MULT_FX_AxConjB_S32(coefB, in2));
#else
q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB(in1, coefA),MVE_CMPLX_MULT_FX_AxConjB(coefB, in2));
q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB(in1, coefA, q31x4_t),
MVE_CMPLX_MULT_FX_AxConjB(coefB, in2, q31x4_t));
#endif
vst1q(pOut1, out);
pOut1 += 4;
@ -340,8 +341,8 @@ void arm_split_rifft_q31(
q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB_S32(in1, coefA),
vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB_S32(in2, coefB)));
#else
q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA),
vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB)));
q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA, q31x4_t),
vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB, q31x4_t)));
#endif
vst1q_s32(pDst, out);
pDst += 4;

Loading…
Cancel
Save