CMSIS-DSP: Remove type punning in vector code and some compilation warnings.

pull/19/head
Christophe Favergeon 5 years ago
parent 12e56614de
commit ac7da660b7

@ -69,10 +69,9 @@
float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, int32_t order, uint32_t blockSize)
{
uint32_t blkCnt;
f16x8_t a, b, tmpV, accumV, sumV;
f16x8_t a, b, tmpV, sumV;
sumV = vdupq_n_f16(0.0f);
accumV = vdupq_n_f16(0.0f);
blkCnt = blockSize >> 3;
while (blkCnt > 0U) {

@ -76,10 +76,9 @@ __attribute__((weak)) float __powisf2(float a, int b)
float32_t arm_minkowski_distance_f32(const float32_t *pA,const float32_t *pB, int32_t order, uint32_t blockSize)
{
uint32_t blkCnt;
f32x4_t a, b, tmpV, accumV, sumV;
f32x4_t a, b, tmpV, sumV;
sumV = vdupq_n_f32(0.0f);
accumV = vdupq_n_f32(0.0f);
blkCnt = blockSize >> 2;
while (blkCnt > 0U) {

@ -147,7 +147,7 @@ static void arm_fir_interpolate2_f32_mve(
uint32_t blkCnt; /* Loop counters */
uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */
uint32_t strides[4] = { 0, 1 * 2, 2 * 2, 3 * 2 };
uint32x4_t vec_strides0 = *(uint32x4_t *) strides;
uint32x4_t vec_strides0 = vld1q_u32(strides);
uint32x4_t vec_strides1 = vec_strides0 + 1;
f32x4_t acc0, acc1;
@ -271,8 +271,8 @@ void arm_fir_interpolate_f32(
uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */
uint32_t strides[4] = { 0, 1 * S->L, 2 * S->L, 3 * S->L };
uint32_t stridesM[4] = { 4, 3, 2, 1 };
uint32x4_t vec_stridesM = *(uint32x4_t *) stridesM;
uint32x4_t vec_strides = *(uint32x4_t *) strides;
uint32x4_t vec_stridesM = vld1q_u32(stridesM);
uint32x4_t vec_strides = vld1q_u32(strides);
f32x4_t acc;

@ -71,7 +71,7 @@ void arm_fir_interpolate_q31(
uint32_t i, blkCnt; /* Loop counters */
uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */
uint32_t strides[4] = { 0, 1 * S->L, 2 * S->L, 3 * S->L };
uint32x4_t vec_strides0 = *(uint32x4_t *) strides;
uint32x4_t vec_strides0 = vld1q_u32(strides);
uint32x4_t vec_strides1 = vec_strides0 + 1;
uint32x4_t vec_strides2 = vec_strides0 + 2;
uint32x4_t vec_strides3 = vec_strides0 + 3;

@ -75,6 +75,7 @@ __STATIC_FORCEINLINE q31_t divide(q31_t n, q31_t d)
// Result is in Q14 because of use of HALF_Q15 instead of ONE_Q15.
status=arm_divide_q15(HALF_Q15,d>>16,&inverse,&shift);
(void)status;
// d is used instead of l
// So we will need to substract to 2 instead of 1.

@ -532,7 +532,7 @@ arm_status arm_mat_cmplx_mult_f32(
uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */
uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */
uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */
uint16_t col, i = 0U, row = numRowsA, colCnt; /* loop counters */
uint16_t col, i = 0U, row = numRowsA; /* loop counters */
arm_status status; /* status of matrix multiplication */
uint32x4_t vecOffs, vecColBOffs;
uint32_t blkCnt, rowCnt; /* loop counters */
@ -611,7 +611,6 @@ arm_status arm_mat_cmplx_mult_f32(
/*
* Matrix A columns number of MAC operations are to be performed
*/
colCnt = numColsA;
float32_t const *pSrcA0Vec, *pSrcA1Vec, *pSrcA2Vec, *pSrcA3Vec;
float32_t const *pInA0 = pInA;
@ -752,7 +751,6 @@ arm_status arm_mat_cmplx_mult_f32(
/*
* Matrix A columns number of MAC operations are to be performed
*/
colCnt = numColsA;
float32_t const *pSrcA0Vec;
float32_t const *pInA0 = pInA;

@ -526,7 +526,7 @@ arm_status arm_mat_cmplx_mult_q31(
uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */
uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */
uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */
uint16_t col, i = 0U, row = numRowsA, colCnt; /* loop counters */
uint16_t col, i = 0U, row = numRowsA; /* loop counters */
arm_status status; /* status of matrix multiplication */
uint32x4_t vecOffs, vecColBOffs;
uint32_t blkCnt, rowCnt; /* loop counters */
@ -611,7 +611,6 @@ arm_status arm_mat_cmplx_mult_q31(
/*
* Matrix A columns number of MAC operations are to be performed
*/
colCnt = numColsA;
q31_t const *pSrcA0Vec, *pSrcA1Vec;
q31_t const *pInA0 = pInA;
@ -742,7 +741,6 @@ arm_status arm_mat_cmplx_mult_q31(
/*
* Matrix A columns number of MAC operations are to be performed
*/
colCnt = numColsA;
q31_t const *pSrcA0Vec;
q31_t const *pInA0 = pInA;

@ -328,7 +328,7 @@ arm_status arm_mat_mult_q15(
uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */
uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */
uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */
uint16_t col, i = 0U, row = numRowsA, colCnt; /* loop counters */
uint16_t col, i = 0U, row = numRowsA; /* loop counters */
uint16x8_t vecOffs, vecColBOffs;
uint32_t blkCnt,rowCnt; /* loop counters */
arm_status status; /* Status of matrix multiplication */
@ -403,7 +403,6 @@ arm_status arm_mat_mult_q15(
/*
* Matrix A columns number of MAC operations are to be performed
*/
colCnt = numColsA;
q15_t const *pSrcA0Vec, *pSrcA1Vec, *pSrcA2Vec, *pSrcA3Vec;
q15_t *pInA0 = pInA;
@ -519,7 +518,6 @@ arm_status arm_mat_mult_q15(
/*
* Matrix A columns number of MAC operations are to be performed
*/
colCnt = numColsA;
q15_t const *pSrcA0Vec;
q15_t *pInA0 = pInA;

@ -344,7 +344,7 @@ arm_status arm_mat_mult_q31(
uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */
uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */
uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */
uint16_t col, i = 0U, row = numRowsA, colCnt; /* loop counters */
uint16_t col, i = 0U, row = numRowsA; /* loop counters */
arm_status status; /* status of matrix multiplication */
uint32x4_t vecOffs, vecColBOffs;
uint32_t blkCnt, rowCnt; /* loop counters */
@ -418,7 +418,6 @@ arm_status arm_mat_mult_q31(
/*
* Matrix A columns number of MAC operations are to be performed
*/
colCnt = numColsA;
q31_t const *pSrcA0Vec, *pSrcA1Vec, *pSrcA2Vec, *pSrcA3Vec;
q31_t const *pInA0 = pInA;
@ -541,7 +540,6 @@ arm_status arm_mat_mult_q31(
/*
* Matrix A columns number of MAC operations are to be performed
*/
colCnt = numColsA;
q31_t const *pSrcA0Vec;
q31_t const *pInA0 = pInA;

@ -208,7 +208,7 @@ static void _arm_radix4_butterfly_f16_mve(const arm_cfft_instance_f16 * S,float1
/*
* start of Last stage process
*/
uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
uint32x4_t vecScGathAddr = vld1q_u32(strides);
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
/* load scheduling */
@ -414,7 +414,7 @@ static void _arm_radix4_butterfly_inverse_f16_mve(const arm_cfft_instance_f16 *
/*
* start of Last stage process
*/
uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
uint32x4_t vecScGathAddr = vld1q_u32(strides);
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
/*

@ -210,7 +210,7 @@ static void _arm_radix4_butterfly_f32_mve(const arm_cfft_instance_f32 * S,float3
/*
* start of Last stage process
*/
uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
uint32x4_t vecScGathAddr = vld1q_u32(strides);
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
/* load scheduling */
@ -416,7 +416,7 @@ static void _arm_radix4_butterfly_inverse_f32_mve(const arm_cfft_instance_f32 *
/*
* start of Last stage process
*/
uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
uint32x4_t vecScGathAddr = vld1q_u32 (strides);
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
/*

@ -157,7 +157,7 @@ static void _arm_radix4_butterfly_q15_mve(
/*
* start of Last stage process
*/
uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
uint32x4_t vecScGathAddr = vld1q_u32 (strides);
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
/*
@ -392,7 +392,7 @@ static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S
/*
* start of Last stage process
*/
uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
uint32x4_t vecScGathAddr = vld1q_u32(strides);
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
/*

@ -167,7 +167,7 @@ static void _arm_radix4_butterfly_q31_mve(
/*
* start of Last stage process
*/
uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
uint32x4_t vecScGathAddr = vld1q_u32(strides);
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
/*
@ -417,7 +417,7 @@ static void _arm_radix4_butterfly_inverse_q31_mve(
/*
* start of Last stage process
*/
uint32x4_t vecScGathAddr = *(uint32x4_t *) strides;
uint32x4_t vecScGathAddr = vld1q_u32(strides);
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
/*

@ -30,6 +30,8 @@ a double precision computation.
status = arm_divide_q15(nump[i],denp[i],&outp[i],&shiftp[i]);
}
(void)status;
ASSERT_SNR(ref,output,(float32_t)SNR_THRESHOLD);
ASSERT_NEAR_EQ(ref,output,ABS_ERROR);
ASSERT_EQ(refShift,shift);

Loading…
Cancel
Save