CMSIS-DSP: Cleaning to MVE code for fir f32

pull/19/head
Christophe Favergeon 5 years ago
parent 6f229b4f65
commit 364fdb9a28

@ -449,9 +449,7 @@ uint32_t blockSize)
vecAcc0 = vfmaq(vecAcc0, vecIn0, c6); vecAcc0 = vfmaq(vecAcc0, vecIn0, c6);
vecIn0 = vld1q(&pSamples[7]); vecIn0 = vld1q(&pSamples[7]);
vecAcc0 = vfmaq(vecAcc0, vecIn0, c7); vecAcc0 = vfmaq(vecAcc0, vecIn0, c7);
pSamples += cnt;
vstrwq_p_f32(partial_accu_ptr, vecAcc0,p0); vstrwq_p_f32(partial_accu_ptr, vecAcc0,p0);
partial_accu_ptr += cnt;
} }
int localTaps = numTaps - FIR_F32_MAX_COEF_BLK; int localTaps = numTaps - FIR_F32_MAX_COEF_BLK;
@ -519,10 +517,8 @@ uint32_t blockSize)
vecAcc0 = vfmaq(vecAcc0, vecIn0, c6); vecAcc0 = vfmaq(vecAcc0, vecIn0, c6);
vecIn0 = vld1q(&pSamples[7]); vecIn0 = vld1q(&pSamples[7]);
vecAcc0 = vfmaq(vecAcc0, vecIn0, c7); vecAcc0 = vfmaq(vecAcc0, vecIn0, c7);
pSamples += cnt;
vecAcc0 += vld1q_f32(partial_accu_ptr); vecAcc0 += vld1q_f32(partial_accu_ptr);
vstrwq_p_f32(partial_accu_ptr, vecAcc0,p0); vstrwq_p_f32(partial_accu_ptr, vecAcc0,p0);
partial_accu_ptr += cnt;
} }
localTaps -= FIR_F32_MAX_COEF_BLK; localTaps -= FIR_F32_MAX_COEF_BLK;
@ -595,10 +591,8 @@ uint32_t blockSize)
vecAcc0 = vfmaq(vecAcc0, vecIn0, c6); vecAcc0 = vfmaq(vecAcc0, vecIn0, c6);
vecIn0 = vld1q(&pSamples[7]); vecIn0 = vld1q(&pSamples[7]);
vecAcc0 = vfmaq(vecAcc0, vecIn0, c7); vecAcc0 = vfmaq(vecAcc0, vecIn0, c7);
pSamples += cnt;
float32x4_t pap = vld1q_f32(partial_accu_ptr); float32x4_t pap = vld1q_f32(partial_accu_ptr);
vstrwq_p_f32(pOutput, vecAcc0+pap,p0); vstrwq_p_f32(pOutput, vecAcc0+pap,p0);
partial_accu_ptr += cnt;
pOutput += cnt; pOutput += cnt;
} }
} }
@ -646,10 +640,8 @@ uint32_t blockSize)
vecAcc0 = vfmaq(vecAcc0, vecIn0, c2); vecAcc0 = vfmaq(vecAcc0, vecIn0, c2);
vecIn0 = vld1q(&pSamples[3]); vecIn0 = vld1q(&pSamples[3]);
vecAcc0 = vfmaq(vecAcc0, vecIn0, c3); vecAcc0 = vfmaq(vecAcc0, vecIn0, c3);
pSamples += cnt;
float32x4_t pap = vld1q_f32(partial_accu_ptr); float32x4_t pap = vld1q_f32(partial_accu_ptr);
vstrwq_p_f32(pOutput, vecAcc0+pap,p0); vstrwq_p_f32(pOutput, vecAcc0+pap,p0);
partial_accu_ptr += cnt;
pOutput += cnt; pOutput += cnt;
} }
} }

Loading…
Cancel
Save