From ac7da660b7d08d1567939277208a7b17c0afe7e4 Mon Sep 17 00:00:00 2001 From: Christophe Favergeon Date: Mon, 3 May 2021 12:39:43 +0200 Subject: [PATCH] CMSIS-DSP: Remove type punning in vector code and some compilation warnings. --- Source/DistanceFunctions/arm_minkowski_distance_f16.c | 3 +-- Source/DistanceFunctions/arm_minkowski_distance_f32.c | 3 +-- Source/FilteringFunctions/arm_fir_interpolate_f32.c | 6 +++--- Source/FilteringFunctions/arm_fir_interpolate_q31.c | 2 +- Source/FilteringFunctions/arm_levinson_durbin_q31.c | 1 + Source/MatrixFunctions/arm_mat_cmplx_mult_f32.c | 4 +--- Source/MatrixFunctions/arm_mat_cmplx_mult_q31.c | 4 +--- Source/MatrixFunctions/arm_mat_mult_q15.c | 4 +--- Source/MatrixFunctions/arm_mat_mult_q31.c | 4 +--- Source/TransformFunctions/arm_cfft_f16.c | 4 ++-- Source/TransformFunctions/arm_cfft_f32.c | 4 ++-- Source/TransformFunctions/arm_cfft_q15.c | 4 ++-- Source/TransformFunctions/arm_cfft_q31.c | 4 ++-- Testing/Source/Tests/FastMathQ15.cpp | 2 ++ 14 files changed, 21 insertions(+), 28 deletions(-) diff --git a/Source/DistanceFunctions/arm_minkowski_distance_f16.c b/Source/DistanceFunctions/arm_minkowski_distance_f16.c index 1848f952..9fa13908 100755 --- a/Source/DistanceFunctions/arm_minkowski_distance_f16.c +++ b/Source/DistanceFunctions/arm_minkowski_distance_f16.c @@ -69,10 +69,9 @@ float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, int32_t order, uint32_t blockSize) { uint32_t blkCnt; - f16x8_t a, b, tmpV, accumV, sumV; + f16x8_t a, b, tmpV, sumV; sumV = vdupq_n_f16(0.0f); - accumV = vdupq_n_f16(0.0f); blkCnt = blockSize >> 3; while (blkCnt > 0U) { diff --git a/Source/DistanceFunctions/arm_minkowski_distance_f32.c b/Source/DistanceFunctions/arm_minkowski_distance_f32.c index 9f81dca2..b881fc1f 100755 --- a/Source/DistanceFunctions/arm_minkowski_distance_f32.c +++ b/Source/DistanceFunctions/arm_minkowski_distance_f32.c @@ -76,10 +76,9 @@ __attribute__((weak)) float __powisf2(float a, int b) float32_t arm_minkowski_distance_f32(const float32_t *pA,const float32_t *pB, int32_t order, uint32_t blockSize) { uint32_t blkCnt; - f32x4_t a, b, tmpV, accumV, sumV; + f32x4_t a, b, tmpV, sumV; sumV = vdupq_n_f32(0.0f); - accumV = vdupq_n_f32(0.0f); blkCnt = blockSize >> 2; while (blkCnt > 0U) { diff --git a/Source/FilteringFunctions/arm_fir_interpolate_f32.c b/Source/FilteringFunctions/arm_fir_interpolate_f32.c index 95ea0b08..b6a6ecb7 100644 --- a/Source/FilteringFunctions/arm_fir_interpolate_f32.c +++ b/Source/FilteringFunctions/arm_fir_interpolate_f32.c @@ -147,7 +147,7 @@ static void arm_fir_interpolate2_f32_mve( uint32_t blkCnt; /* Loop counters */ uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */ uint32_t strides[4] = { 0, 1 * 2, 2 * 2, 3 * 2 }; - uint32x4_t vec_strides0 = *(uint32x4_t *) strides; + uint32x4_t vec_strides0 = vld1q_u32(strides); uint32x4_t vec_strides1 = vec_strides0 + 1; f32x4_t acc0, acc1; @@ -271,8 +271,8 @@ void arm_fir_interpolate_f32( uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */ uint32_t strides[4] = { 0, 1 * S->L, 2 * S->L, 3 * S->L }; uint32_t stridesM[4] = { 4, 3, 2, 1 }; - uint32x4_t vec_stridesM = *(uint32x4_t *) stridesM; - uint32x4_t vec_strides = *(uint32x4_t *) strides; + uint32x4_t vec_stridesM = vld1q_u32(stridesM); + uint32x4_t vec_strides = vld1q_u32(strides); f32x4_t acc; diff --git a/Source/FilteringFunctions/arm_fir_interpolate_q31.c b/Source/FilteringFunctions/arm_fir_interpolate_q31.c index a38773f6..ea217603 100644 --- a/Source/FilteringFunctions/arm_fir_interpolate_q31.c +++ b/Source/FilteringFunctions/arm_fir_interpolate_q31.c @@ -71,7 +71,7 @@ void arm_fir_interpolate_q31( uint32_t i, blkCnt; /* Loop counters */ uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */ uint32_t strides[4] = { 0, 1 * S->L, 2 * S->L, 3 * S->L }; - uint32x4_t vec_strides0 = *(uint32x4_t *) strides; + uint32x4_t vec_strides0 = vld1q_u32(strides); uint32x4_t vec_strides1 = vec_strides0 + 1; uint32x4_t vec_strides2 = vec_strides0 + 2; uint32x4_t vec_strides3 = vec_strides0 + 3; diff --git a/Source/FilteringFunctions/arm_levinson_durbin_q31.c b/Source/FilteringFunctions/arm_levinson_durbin_q31.c index c25c438a..a226f023 100755 --- a/Source/FilteringFunctions/arm_levinson_durbin_q31.c +++ b/Source/FilteringFunctions/arm_levinson_durbin_q31.c @@ -75,6 +75,7 @@ __STATIC_FORCEINLINE q31_t divide(q31_t n, q31_t d) // Result is in Q14 because of use of HALF_Q15 instead of ONE_Q15. status=arm_divide_q15(HALF_Q15,d>>16,&inverse,&shift); + (void)status; // d is used instead of l // So we will need to substract to 2 instead of 1. diff --git a/Source/MatrixFunctions/arm_mat_cmplx_mult_f32.c b/Source/MatrixFunctions/arm_mat_cmplx_mult_f32.c index cd870c3d..5add938b 100644 --- a/Source/MatrixFunctions/arm_mat_cmplx_mult_f32.c +++ b/Source/MatrixFunctions/arm_mat_cmplx_mult_f32.c @@ -532,7 +532,7 @@ arm_status arm_mat_cmplx_mult_f32( uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */ uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */ uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */ - uint16_t col, i = 0U, row = numRowsA, colCnt; /* loop counters */ + uint16_t col, i = 0U, row = numRowsA; /* loop counters */ arm_status status; /* status of matrix multiplication */ uint32x4_t vecOffs, vecColBOffs; uint32_t blkCnt, rowCnt; /* loop counters */ @@ -611,7 +611,6 @@ arm_status arm_mat_cmplx_mult_f32( /* * Matrix A columns number of MAC operations are to be performed */ - colCnt = numColsA; float32_t const *pSrcA0Vec, *pSrcA1Vec, *pSrcA2Vec, *pSrcA3Vec; float32_t const *pInA0 = pInA; @@ -752,7 +751,6 @@ arm_status arm_mat_cmplx_mult_f32( /* * Matrix A columns number of MAC operations are to be performed */ - colCnt = numColsA; float32_t const *pSrcA0Vec; float32_t const *pInA0 = pInA; diff --git a/Source/MatrixFunctions/arm_mat_cmplx_mult_q31.c b/Source/MatrixFunctions/arm_mat_cmplx_mult_q31.c index 8f6f1f28..ee784a6b 100644 --- a/Source/MatrixFunctions/arm_mat_cmplx_mult_q31.c +++ b/Source/MatrixFunctions/arm_mat_cmplx_mult_q31.c @@ -526,7 +526,7 @@ arm_status arm_mat_cmplx_mult_q31( uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */ uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */ uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */ - uint16_t col, i = 0U, row = numRowsA, colCnt; /* loop counters */ + uint16_t col, i = 0U, row = numRowsA; /* loop counters */ arm_status status; /* status of matrix multiplication */ uint32x4_t vecOffs, vecColBOffs; uint32_t blkCnt, rowCnt; /* loop counters */ @@ -611,7 +611,6 @@ arm_status arm_mat_cmplx_mult_q31( /* * Matrix A columns number of MAC operations are to be performed */ - colCnt = numColsA; q31_t const *pSrcA0Vec, *pSrcA1Vec; q31_t const *pInA0 = pInA; @@ -742,7 +741,6 @@ arm_status arm_mat_cmplx_mult_q31( /* * Matrix A columns number of MAC operations are to be performed */ - colCnt = numColsA; q31_t const *pSrcA0Vec; q31_t const *pInA0 = pInA; diff --git a/Source/MatrixFunctions/arm_mat_mult_q15.c b/Source/MatrixFunctions/arm_mat_mult_q15.c index 7cb20dac..8eed6ee5 100644 --- a/Source/MatrixFunctions/arm_mat_mult_q15.c +++ b/Source/MatrixFunctions/arm_mat_mult_q15.c @@ -328,7 +328,7 @@ arm_status arm_mat_mult_q15( uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */ uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */ uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */ - uint16_t col, i = 0U, row = numRowsA, colCnt; /* loop counters */ + uint16_t col, i = 0U, row = numRowsA; /* loop counters */ uint16x8_t vecOffs, vecColBOffs; uint32_t blkCnt,rowCnt; /* loop counters */ arm_status status; /* Status of matrix multiplication */ @@ -403,7 +403,6 @@ arm_status arm_mat_mult_q15( /* * Matrix A columns number of MAC operations are to be performed */ - colCnt = numColsA; q15_t const *pSrcA0Vec, *pSrcA1Vec, *pSrcA2Vec, *pSrcA3Vec; q15_t *pInA0 = pInA; @@ -519,7 +518,6 @@ arm_status arm_mat_mult_q15( /* * Matrix A columns number of MAC operations are to be performed */ - colCnt = numColsA; q15_t const *pSrcA0Vec; q15_t *pInA0 = pInA; diff --git a/Source/MatrixFunctions/arm_mat_mult_q31.c b/Source/MatrixFunctions/arm_mat_mult_q31.c index 891281a6..18738279 100644 --- a/Source/MatrixFunctions/arm_mat_mult_q31.c +++ b/Source/MatrixFunctions/arm_mat_mult_q31.c @@ -344,7 +344,7 @@ arm_status arm_mat_mult_q31( uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */ uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */ uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */ - uint16_t col, i = 0U, row = numRowsA, colCnt; /* loop counters */ + uint16_t col, i = 0U, row = numRowsA; /* loop counters */ arm_status status; /* status of matrix multiplication */ uint32x4_t vecOffs, vecColBOffs; uint32_t blkCnt, rowCnt; /* loop counters */ @@ -418,7 +418,6 @@ arm_status arm_mat_mult_q31( /* * Matrix A columns number of MAC operations are to be performed */ - colCnt = numColsA; q31_t const *pSrcA0Vec, *pSrcA1Vec, *pSrcA2Vec, *pSrcA3Vec; q31_t const *pInA0 = pInA; @@ -541,7 +540,6 @@ arm_status arm_mat_mult_q31( /* * Matrix A columns number of MAC operations are to be performed */ - colCnt = numColsA; q31_t const *pSrcA0Vec; q31_t const *pInA0 = pInA; diff --git a/Source/TransformFunctions/arm_cfft_f16.c b/Source/TransformFunctions/arm_cfft_f16.c index 9262a346..239e0031 100755 --- a/Source/TransformFunctions/arm_cfft_f16.c +++ b/Source/TransformFunctions/arm_cfft_f16.c @@ -208,7 +208,7 @@ static void _arm_radix4_butterfly_f16_mve(const arm_cfft_instance_f16 * S,float1 /* * start of Last stage process */ - uint32x4_t vecScGathAddr = *(uint32x4_t *) strides; + uint32x4_t vecScGathAddr = vld1q_u32(strides); vecScGathAddr = vecScGathAddr + (uint32_t) pSrc; /* load scheduling */ @@ -414,7 +414,7 @@ static void _arm_radix4_butterfly_inverse_f16_mve(const arm_cfft_instance_f16 * /* * start of Last stage process */ - uint32x4_t vecScGathAddr = *(uint32x4_t *) strides; + uint32x4_t vecScGathAddr = vld1q_u32(strides); vecScGathAddr = vecScGathAddr + (uint32_t) pSrc; /* diff --git a/Source/TransformFunctions/arm_cfft_f32.c b/Source/TransformFunctions/arm_cfft_f32.c index 519d6420..f7f1fde1 100755 --- a/Source/TransformFunctions/arm_cfft_f32.c +++ b/Source/TransformFunctions/arm_cfft_f32.c @@ -210,7 +210,7 @@ static void _arm_radix4_butterfly_f32_mve(const arm_cfft_instance_f32 * S,float3 /* * start of Last stage process */ - uint32x4_t vecScGathAddr = *(uint32x4_t *) strides; + uint32x4_t vecScGathAddr = vld1q_u32(strides); vecScGathAddr = vecScGathAddr + (uint32_t) pSrc; /* load scheduling */ @@ -416,7 +416,7 @@ static void _arm_radix4_butterfly_inverse_f32_mve(const arm_cfft_instance_f32 * /* * start of Last stage process */ - uint32x4_t vecScGathAddr = *(uint32x4_t *) strides; + uint32x4_t vecScGathAddr = vld1q_u32 (strides); vecScGathAddr = vecScGathAddr + (uint32_t) pSrc; /* diff --git a/Source/TransformFunctions/arm_cfft_q15.c b/Source/TransformFunctions/arm_cfft_q15.c index c3ff6277..9d4eb96c 100644 --- a/Source/TransformFunctions/arm_cfft_q15.c +++ b/Source/TransformFunctions/arm_cfft_q15.c @@ -157,7 +157,7 @@ static void _arm_radix4_butterfly_q15_mve( /* * start of Last stage process */ - uint32x4_t vecScGathAddr = *(uint32x4_t *) strides; + uint32x4_t vecScGathAddr = vld1q_u32 (strides); vecScGathAddr = vecScGathAddr + (uint32_t) pSrc; /* @@ -392,7 +392,7 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S /* * start of Last stage process */ - uint32x4_t vecScGathAddr = *(uint32x4_t *) strides; + uint32x4_t vecScGathAddr = vld1q_u32(strides); vecScGathAddr = vecScGathAddr + (uint32_t) pSrc; /* diff --git a/Source/TransformFunctions/arm_cfft_q31.c b/Source/TransformFunctions/arm_cfft_q31.c index 97ab1294..a26927e3 100644 --- a/Source/TransformFunctions/arm_cfft_q31.c +++ b/Source/TransformFunctions/arm_cfft_q31.c @@ -167,7 +167,7 @@ static void _arm_radix4_butterfly_q31_mve( /* * start of Last stage process */ - uint32x4_t vecScGathAddr = *(uint32x4_t *) strides; + uint32x4_t vecScGathAddr = vld1q_u32(strides); vecScGathAddr = vecScGathAddr + (uint32_t) pSrc; /* @@ -417,7 +417,7 @@ static void _arm_radix4_butterfly_inverse_q31_mve( /* * start of Last stage process */ - uint32x4_t vecScGathAddr = *(uint32x4_t *) strides; + uint32x4_t vecScGathAddr = vld1q_u32(strides); vecScGathAddr = vecScGathAddr + (uint32_t) pSrc; /* diff --git a/Testing/Source/Tests/FastMathQ15.cpp b/Testing/Source/Tests/FastMathQ15.cpp index 2fe13202..20efd1ea 100755 --- a/Testing/Source/Tests/FastMathQ15.cpp +++ b/Testing/Source/Tests/FastMathQ15.cpp @@ -30,6 +30,8 @@ a double precision computation. status = arm_divide_q15(nump[i],denp[i],&outp[i],&shiftp[i]); } + (void)status; + ASSERT_SNR(ref,output,(float32_t)SNR_THRESHOLD); ASSERT_NEAR_EQ(ref,output,ABS_ERROR); ASSERT_EQ(refShift,shift);