|
|
|
@ -33,7 +33,6 @@
|
|
|
|
#define HALF_Q15 0x3FFF
|
|
|
|
#define HALF_Q15 0x3FFF
|
|
|
|
#define LOWPART_MASK 0x07FFF
|
|
|
|
#define LOWPART_MASK 0x07FFF
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__STATIC_FORCEINLINE q31_t mul32x16(q31_t a, q15_t b)
|
|
|
|
__STATIC_FORCEINLINE q31_t mul32x16(q31_t a, q15_t b)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
q31_t r = ((q63_t)a * (q63_t)b) >> 15;
|
|
|
|
q31_t r = ((q63_t)a * (q63_t)b) >> 15;
|
|
|
|
@ -86,7 +85,6 @@ __STATIC_FORCEINLINE q31_t divide(q31_t n, q31_t d)
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
/**
|
|
|
|
@ingroup groupFilters
|
|
|
|
@ingroup groupFilters
|
|
|
|
*/
|
|
|
|
*/
|
|
|
|
@ -109,6 +107,190 @@ __STATIC_FORCEINLINE q31_t divide(q31_t n, q31_t d)
|
|
|
|
@param[in] nbCoefs number of autoregressive coefficients
|
|
|
|
@param[in] nbCoefs number of autoregressive coefficients
|
|
|
|
@return none
|
|
|
|
@return none
|
|
|
|
*/
|
|
|
|
*/
|
|
|
|
|
|
|
|
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define LANE23_MASK 0xFF00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "arm_helium_utils.h"
|
|
|
|
|
|
|
|
void arm_levinson_durbin_q31(const q31_t *phi,
|
|
|
|
|
|
|
|
q31_t *a,
|
|
|
|
|
|
|
|
q31_t *err,
|
|
|
|
|
|
|
|
int nbCoefs)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
q31_t e;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static const uint32_t revOffsetArray[4] = {3,2,1,0};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//a[0] = phi[1] / phi[0];
|
|
|
|
|
|
|
|
a[0] = divide(phi[1], phi[0]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//e = phi[0] - phi[1] * a[0];
|
|
|
|
|
|
|
|
e = phi[0] - mul32x32(phi[1],a[0]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for(int p=1; p < nbCoefs; p++)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
q63_t suma=0;
|
|
|
|
|
|
|
|
q63_t sumb=0;
|
|
|
|
|
|
|
|
q31x4_t vecA,vecRevPhi,vecPhi;
|
|
|
|
|
|
|
|
q31_t k;
|
|
|
|
|
|
|
|
uint32_t blkCnt;
|
|
|
|
|
|
|
|
const q31_t *pPhi,*pRevPhi,*pA;
|
|
|
|
|
|
|
|
uint32x4_t revOffset;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int nb,j,i;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
revOffset = vld1q(revOffsetArray);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pRevPhi = &phi[p-3];
|
|
|
|
|
|
|
|
pPhi = &phi[1];
|
|
|
|
|
|
|
|
pA = a;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
i = 0;
|
|
|
|
|
|
|
|
blkCnt = p >> 2;
|
|
|
|
|
|
|
|
while(blkCnt > 0)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
vecA = vld1q(pA);
|
|
|
|
|
|
|
|
pA += 4;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vecPhi = vld1q(pPhi);
|
|
|
|
|
|
|
|
pPhi += 4;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vecRevPhi = vldrwq_gather_shifted_offset_s32(pRevPhi,revOffset);
|
|
|
|
|
|
|
|
pRevPhi -= 4;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
suma = vmlaldavaq(suma,vecA,vecRevPhi);
|
|
|
|
|
|
|
|
sumb = vmlaldavaq(sumb,vecA,vecPhi);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
i += 4;
|
|
|
|
|
|
|
|
blkCnt--;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
blkCnt = p & 3;
|
|
|
|
|
|
|
|
while(blkCnt > 0)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
suma += ((q63_t)a[i] * phi[p - i]);
|
|
|
|
|
|
|
|
sumb += ((q63_t)a[i] * phi[i + 1]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
i++;
|
|
|
|
|
|
|
|
blkCnt--;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
suma = asrl(suma, 31);
|
|
|
|
|
|
|
|
sumb = asrl(sumb, 31);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//k = (phi[p+1]-suma)/(phi[0] - sumb);
|
|
|
|
|
|
|
|
k = divide(phi[p+1]-(q31_t)suma,phi[0] - (q31_t)sumb);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
q31x4_t vecRevA,tmp;
|
|
|
|
|
|
|
|
static uint32_t orgOffsetArray[4]={0,1,-1,-2};
|
|
|
|
|
|
|
|
static const uint32_t offsetIncArray[4]={2,2,-2,-2};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uint32x4_t offset,offsetInc,vecTmp;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
offset = vld1q(orgOffsetArray);
|
|
|
|
|
|
|
|
vecTmp = vdupq_n_u32(p);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
offset = vaddq_m_u32(offset,offset,vecTmp,LANE23_MASK);
|
|
|
|
|
|
|
|
offsetInc = vld1q(offsetIncArray);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
nb = p >> 2;
|
|
|
|
|
|
|
|
j=0;
|
|
|
|
|
|
|
|
for(int i =0;i < nb ; i++)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
|
|
q31_t x0,x1,x2,x3;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//x = a[j] - k * a[p-1-j];
|
|
|
|
|
|
|
|
x0 = a[j] - mul32x32(k,a[p-1-j]);
|
|
|
|
|
|
|
|
x1 = a[j+1] - mul32x32(k,a[p-2-j]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//y = a[p-1-j] - k * a[j];
|
|
|
|
|
|
|
|
x2 = a[p-1-j] - mul32x32(k , a[j]);
|
|
|
|
|
|
|
|
x3 = a[p-2-j] - mul32x32(k , a[j+1]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
a[j] = x0;
|
|
|
|
|
|
|
|
a[j+1] = x1;
|
|
|
|
|
|
|
|
a[p-1-j] = x2;
|
|
|
|
|
|
|
|
a[p-2-j] = x3;
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uint64_t tmpa,tmpb;
|
|
|
|
|
|
|
|
vecA = vldrwq_gather_shifted_offset_s32(a,offset);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tmpa = vgetq_lane_u64((uint64x2_t)vecA,0);
|
|
|
|
|
|
|
|
tmpb = vgetq_lane_u64((uint64x2_t)vecA,1);
|
|
|
|
|
|
|
|
vecRevA = (q31x4_t) vsetq_lane_u64(tmpb,(uint64x2_t)vecRevA,0);
|
|
|
|
|
|
|
|
vecRevA = (q31x4_t) vsetq_lane_u64(tmpa,(uint64x2_t)vecRevA,1);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tmp = vsubq(vecA,vqdmulhq_n_s32(vecRevA,k));
|
|
|
|
|
|
|
|
vstrwq_scatter_shifted_offset_s32(a, offset, tmp);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
offset = vaddq(offset,offsetInc);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
j+=2;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
switch(p & 3)
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
case 3:
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
q31_t x,y;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//x = a[j] - k * a[p-1-j];
|
|
|
|
|
|
|
|
x = a[j] - mul32x32(k,a[p-1-j]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//y = a[p-1-j] - k * a[j];
|
|
|
|
|
|
|
|
y = a[p-1-j] - mul32x32(k , a[j]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
a[j] = x;
|
|
|
|
|
|
|
|
a[p-1-j] = y;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//a[j] = a[j]- k * a[p-1-j];
|
|
|
|
|
|
|
|
a[j+1] = a[j+1] - mul32x32(k,a[p-2-j]);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
case 2:
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
q31_t x,y;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//x = a[j] - k * a[p-1-j];
|
|
|
|
|
|
|
|
x = a[j] - mul32x32(k,a[p-1-j]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//y = a[p-1-j] - k * a[j];
|
|
|
|
|
|
|
|
y = a[p-1-j] - mul32x32(k , a[j]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
a[j] = x;
|
|
|
|
|
|
|
|
a[p-1-j] = y;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
case 1:
|
|
|
|
|
|
|
|
//a[j] = a[j]- k * a[p-1-j];
|
|
|
|
|
|
|
|
a[j] = a[j] - mul32x32(k,a[p-1-j]);
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
a[p] = k;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// e = e * (1 - k*k);
|
|
|
|
|
|
|
|
e = mul32x32(e,ONE_Q31 - mul32x32(k,k));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
*err = e;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
|
|
|
|
void arm_levinson_durbin_q31(const q31_t *phi,
|
|
|
|
void arm_levinson_durbin_q31(const q31_t *phi,
|
|
|
|
q31_t *a,
|
|
|
|
q31_t *a,
|
|
|
|
@ -180,6 +362,7 @@ void arm_levinson_durbin_q31(const q31_t *phi,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*err = e;
|
|
|
|
*err = e;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* defined(ARM_MATH_MVEI) */
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
/**
|
|
|
|
@} end of LD group
|
|
|
|
@} end of LD group
|
|
|
|
|