CMSIS-DSP: Added mve code for Levinson Durbin f32.

Some correction to Levinson Durbin Q31.
pull/19/head
Christophe Favergeon 5 years ago
parent 630122ae1b
commit dc5477f872

@ -47,6 +47,169 @@
@param[in] nbCoefs number of autoregressive coefficients
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#define LANE23_MASK 0xFF00
void arm_levinson_durbin_f32(const float32_t *phi,
float32_t *a,
float32_t *err,
int nbCoefs)
{
float32_t e;
static const uint32_t revOffsetArray[4] = {3,2,1,0};
a[0] = phi[1] / phi[0];
e = phi[0] - phi[1] * a[0];
for(int p=1; p < nbCoefs; p++)
{
float32_t suma = 0.0f;
float32_t sumb = 0.0f;
f32x4_t vecA,vecRevPhi,vecPhi,vecSumA, vecSumB;
float32_t k;
uint32_t blkCnt;
const float32_t *pPhi,*pRevPhi,*pA;
uint32x4_t revOffset;
int nb,j,i;
revOffset = vld1q(revOffsetArray);
vecSumA = vdupq_n_f32(0.0f);
vecSumB = vdupq_n_f32(0.0f);
pRevPhi = &phi[p-3];
pPhi = &phi[1];
pA = a;
i = 0;
blkCnt = p >> 2;
while(blkCnt > 0)
{
vecA = vld1q(pA);
pA += 4;
vecPhi = vld1q(pPhi);
pPhi += 4;
vecRevPhi = vldrwq_gather_shifted_offset_f32(pRevPhi,revOffset);
pRevPhi -= 4;
vecSumA = vfmaq(vecSumA,vecA,vecRevPhi);
vecSumB = vfmaq(vecSumB,vecA,vecPhi);
i += 4;
blkCnt--;
}
suma = vecAddAcrossF32Mve(vecSumA);
sumb = vecAddAcrossF32Mve(vecSumB);
blkCnt = p & 3;
while(blkCnt > 0)
{
suma += a[i] * phi[p - i];
sumb += a[i] * phi[i + 1];
i++;
blkCnt--;
}
k = (phi[p+1] - suma)/(phi[0] - sumb);
f32x4_t vecRevA,tmp;
static uint32_t orgOffsetArray[4]={0,1,-1,-2};
static const uint32_t offsetIncArray[4]={2,2,-2,-2};
uint32x4_t offset,offsetInc,vecTmp;
offset = vld1q(orgOffsetArray);
vecTmp = vdupq_n_u32(p);
offset = vaddq_m_u32(offset,offset,vecTmp,LANE23_MASK);
offsetInc = vld1q(offsetIncArray);
nb = p >> 2;
j=0;
for(int i = 0; i < nb ; i++)
{
/*
x0=a[j] - k * a[p-1-j];
x1=a[j+1] - k * a[p-2-j];
x3=a[p-1-j] - k * a[j];
x4=a[p-2-j] - k * a[j+1];
a[j] = x0;
a[j+1] = x1;
a[p-1-j] = x2;
a[p-2-j] = x3;
*/
uint64_t tmpa,tmpb;
vecA = vldrwq_gather_shifted_offset_f32(a,offset);
tmpa = vgetq_lane_u64((uint64x2_t)vecA,0);
tmpb = vgetq_lane_u64((uint64x2_t)vecA,1);
vecRevA = (f32x4_t) vsetq_lane_u64(tmpb,(uint64x2_t)vecRevA,0);
vecRevA = (f32x4_t) vsetq_lane_u64(tmpa,(uint64x2_t)vecRevA,1);
tmp = vsubq(vecA,vmulq_n_f32(vecRevA,k));
vstrwq_scatter_shifted_offset_f32(a, offset, tmp);
offset = vaddq(offset,offsetInc);
j+=2;
}
switch(p & 3)
{
case 3:
{
float32_t x,y;
x = a[j] - k * a[p-1-j];
y = a[p-1-j] - k * a[j];
a[j] = x;
a[p-1-j] = y;
a[j+1] = a[j+1] - k * a[p-1-(j+1)];
}
break;
case 2:
{
float32_t x,y;
x = a[j] - k * a[p-1-j];
y = a[p-1-j] - k * a[j];
a[j] = x;
a[p-1-j] = y;
}
break;
case 1:
a[j] = a[j]- k * a[p-1-j];
break;
}
a[p] = k;
e = e * (1 - k*k);
}
*err = e;
}
#else
void arm_levinson_durbin_f32(const float32_t *phi,
float32_t *a,
float32_t *err,
@ -75,7 +238,7 @@ void arm_levinson_durbin_f32(const float32_t *phi,
nb = p >> 1;
j=0;
for(int i =0;i < nb ; i++)
for(int i =0; i < nb ; i++)
{
float32_t x,y;
@ -101,6 +264,7 @@ void arm_levinson_durbin_f32(const float32_t *phi,
}
*err = e;
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of LD group

@ -33,31 +33,6 @@
#define HALF_Q15 0x3FFF
#define LOWPART_MASK 0x07FFF
#include <stdio.h>
#define FQ31(X) (1.0*(X) / ONE_Q31)
#define FQ15(X) (1.0*(X) / ONE_Q15)
#define PQ31(X) \
if ((X)>=0) \
{ \
printf("%08X (%f)",(X),FQ31(X)); \
} \
else \
{ \
printf("-%08X (%f)",-(X),FQ31(X));\
}
#define PQ15(X) \
if ((X)>=0) \
{ \
printf("%04X (%f)",(X),FQ15(X)); \
} \
else \
{ \
printf("-%04X (%f)",-(X),FQ15(X));\
}
__STATIC_FORCEINLINE q31_t mul32x16(q31_t a, q15_t b)
{
@ -144,8 +119,6 @@ void arm_levinson_durbin_q31(const q31_t *phi,
//a[0] = phi[1] / phi[0];
a[0] = divide(phi[1], phi[0]);
//printf("%08X %08X\n",phi[1],phi[0]);
//printf("%f / %f = %f\n",FQ31(phi[1]),FQ31(phi[0]),FQ31(a[0]));
//e = phi[0] - phi[1] * a[0];
@ -167,7 +140,6 @@ void arm_levinson_durbin_q31(const q31_t *phi,
suma = suma >> 31;
sumb = sumb >> 31;
//printf("suma = %08X, sumb=%08X\n",(q31_t)(suma & 0x0FFFFFFFF),(q31_t)(sumb & 0x0FFFFFFFF));
//k = (phi[p+1]-suma)/(phi[0] - sumb);

Loading…
Cancel
Save