diff --git a/Source/FilteringFunctions/arm_levinson_durbin_q31.c b/Source/FilteringFunctions/arm_levinson_durbin_q31.c index 3bececff..d6e92209 100755 --- a/Source/FilteringFunctions/arm_levinson_durbin_q31.c +++ b/Source/FilteringFunctions/arm_levinson_durbin_q31.c @@ -33,7 +33,6 @@ #define HALF_Q15 0x3FFF #define LOWPART_MASK 0x07FFF - __STATIC_FORCEINLINE q31_t mul32x16(q31_t a, q15_t b) { q31_t r = ((q63_t)a * (q63_t)b) >> 15; @@ -86,7 +85,6 @@ __STATIC_FORCEINLINE q31_t divide(q31_t n, q31_t d) } - /** @ingroup groupFilters */ @@ -109,6 +107,190 @@ __STATIC_FORCEINLINE q31_t divide(q31_t n, q31_t d) @param[in] nbCoefs number of autoregressive coefficients @return none */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#define LANE23_MASK 0xFF00 + +#include "arm_helium_utils.h" +void arm_levinson_durbin_q31(const q31_t *phi, + q31_t *a, + q31_t *err, + int nbCoefs) +{ + q31_t e; + + static const uint32_t revOffsetArray[4] = {3,2,1,0}; + + //a[0] = phi[1] / phi[0]; + a[0] = divide(phi[1], phi[0]); + + + //e = phi[0] - phi[1] * a[0]; + e = phi[0] - mul32x32(phi[1],a[0]); + + for(int p=1; p < nbCoefs; p++) + { + q63_t suma=0; + q63_t sumb=0; + q31x4_t vecA,vecRevPhi,vecPhi; + q31_t k; + uint32_t blkCnt; + const q31_t *pPhi,*pRevPhi,*pA; + uint32x4_t revOffset; + + + int nb,j,i; + + revOffset = vld1q(revOffsetArray); + + pRevPhi = &phi[p-3]; + pPhi = &phi[1]; + pA = a; + + i = 0; + blkCnt = p >> 2; + while(blkCnt > 0) + { + vecA = vld1q(pA); + pA += 4; + + vecPhi = vld1q(pPhi); + pPhi += 4; + + vecRevPhi = vldrwq_gather_shifted_offset_s32(pRevPhi,revOffset); + pRevPhi -= 4; + + suma = vmlaldavaq(suma,vecA,vecRevPhi); + sumb = vmlaldavaq(sumb,vecA,vecPhi); + + i += 4; + blkCnt--; + } + + + blkCnt = p & 3; + while(blkCnt > 0) + { + suma += ((q63_t)a[i] * phi[p - i]); + sumb += ((q63_t)a[i] * phi[i + 1]); + + i++; + blkCnt--; + } + + suma = asrl(suma, 31); + sumb = asrl(sumb, 31); + + + + //k = (phi[p+1]-suma)/(phi[0] - sumb); + k = divide(phi[p+1]-(q31_t)suma,phi[0] - (q31_t)sumb); + + q31x4_t vecRevA,tmp; + static uint32_t orgOffsetArray[4]={0,1,-1,-2}; + static const uint32_t offsetIncArray[4]={2,2,-2,-2}; + + uint32x4_t offset,offsetInc,vecTmp; + + + offset = vld1q(orgOffsetArray); + vecTmp = vdupq_n_u32(p); + + offset = vaddq_m_u32(offset,offset,vecTmp,LANE23_MASK); + offsetInc = vld1q(offsetIncArray); + + + nb = p >> 2; + j=0; + for(int i =0;i < nb ; i++) + { + /* + q31_t x0,x1,x2,x3; + + //x = a[j] - k * a[p-1-j]; + x0 = a[j] - mul32x32(k,a[p-1-j]); + x1 = a[j+1] - mul32x32(k,a[p-2-j]); + + //y = a[p-1-j] - k * a[j]; + x2 = a[p-1-j] - mul32x32(k , a[j]); + x3 = a[p-2-j] - mul32x32(k , a[j+1]); + + a[j] = x0; + a[j+1] = x1; + a[p-1-j] = x2; + a[p-2-j] = x3; + */ + + uint64_t tmpa,tmpb; + vecA = vldrwq_gather_shifted_offset_s32(a,offset); + + + tmpa = vgetq_lane_u64((uint64x2_t)vecA,0); + tmpb = vgetq_lane_u64((uint64x2_t)vecA,1); + vecRevA = (q31x4_t) vsetq_lane_u64(tmpb,(uint64x2_t)vecRevA,0); + vecRevA = (q31x4_t) vsetq_lane_u64(tmpa,(uint64x2_t)vecRevA,1); + + + tmp = vsubq(vecA,vqdmulhq_n_s32(vecRevA,k)); + vstrwq_scatter_shifted_offset_s32(a, offset, tmp); + + offset = vaddq(offset,offsetInc); + + j+=2; + } + + switch(p & 3) + { + case 3: + { + q31_t x,y; + + //x = a[j] - k * a[p-1-j]; + x = a[j] - mul32x32(k,a[p-1-j]); + + //y = a[p-1-j] - k * a[j]; + y = a[p-1-j] - mul32x32(k , a[j]); + + a[j] = x; + a[p-1-j] = y; + + //a[j] = a[j]- k * a[p-1-j]; + a[j+1] = a[j+1] - mul32x32(k,a[p-2-j]); + } + break; + + case 2: + { + q31_t x,y; + + //x = a[j] - k * a[p-1-j]; + x = a[j] - mul32x32(k,a[p-1-j]); + + //y = a[p-1-j] - k * a[j]; + y = a[p-1-j] - mul32x32(k , a[j]); + + a[j] = x; + a[p-1-j] = y; + } + break; + + case 1: + //a[j] = a[j]- k * a[p-1-j]; + a[j] = a[j] - mul32x32(k,a[p-1-j]); + break; + } + + a[p] = k; + + // e = e * (1 - k*k); + e = mul32x32(e,ONE_Q31 - mul32x32(k,k)); + + + } + *err = e; +} + +#else void arm_levinson_durbin_q31(const q31_t *phi, q31_t *a, @@ -180,6 +362,7 @@ void arm_levinson_durbin_q31(const q31_t *phi, } *err = e; } +#endif /* defined(ARM_MATH_MVEI) */ /** @} end of LD group