Fixed compiler warnings

Fixed shadowed variables in assembly macros for Cortex-M convolution
Fixed type promotions in _f64 matrix and transform code
pull/95/head
Timothy Fosdike 3 years ago
parent e8d1f3a10c
commit aa2ed3777c

@ -42,29 +42,29 @@ extern "C"
#define SCALE_COL_T(T,CAST,A,ROW,v,i) \
{ \
int32_t w; \
int32_t _w; \
T *data = (A)->pData; \
const int32_t numCols = (A)->numCols; \
const int32_t _numCols = (A)->numCols; \
const int32_t nb = (A)->numRows - ROW;\
\
data += i + numCols * (ROW); \
data += i + _numCols * (ROW); \
\
for(w=0;w < nb; w++) \
for(_w=0;_w < nb; _w++) \
{ \
*data *= CAST v; \
data += numCols; \
data += _numCols; \
} \
}
#define COPY_COL_T(T,A,ROW,COL,DST) \
{ \
uint32_t row; \
T *pb=DST; \
T *pa = (A)->pData + ROW * (A)->numCols + COL;\
for(row = ROW; row < (A)->numRows; row ++) \
uint32_t _row; \
T *_pb=DST; \
T *_pa = (A)->pData + ROW * (A)->numCols + COL;\
for(_row = ROW; _row < (A)->numRows; _row ++) \
{ \
*pb++ = *pa; \
pa += (A)->numCols; \
*_pb++ = *_pa; \
_pa += (A)->numCols; \
} \
}
@ -74,20 +74,20 @@ extern "C"
#define SWAP_ROWS_F16(A,COL,i,j) \
{ \
int cnt = ((A)->numCols)-(COL); \
int32_t w; \
int32_t _w; \
float16_t *data = (A)->pData; \
const int32_t numCols = (A)->numCols; \
const int32_t _numCols = (A)->numCols; \
\
for(w=(COL);w < numCols; w+=8) \
for(_w=(COL);_w < _numCols; _w+=8) \
{ \
f16x8_t tmpa,tmpb; \
mve_pred16_t p0 = vctp16q(cnt); \
\
tmpa=vldrhq_z_f16(&data[i*numCols + w],p0);\
tmpb=vldrhq_z_f16(&data[j*numCols + w],p0);\
tmpa=vldrhq_z_f16(&data[i*_numCols + _w],p0);\
tmpb=vldrhq_z_f16(&data[j*_numCols + _w],p0);\
\
vstrhq_p(&data[i*numCols + w], tmpb, p0); \
vstrhq_p(&data[j*numCols + w], tmpa, p0); \
vstrhq_p(&data[i*_numCols + _w], tmpb, p0); \
vstrhq_p(&data[j*_numCols + _w], tmpa, p0); \
\
cnt -= 8; \
} \
@ -96,17 +96,17 @@ extern "C"
#define SCALE_ROW_F16(A,COL,v,i) \
{ \
int cnt = ((A)->numCols)-(COL); \
int32_t w; \
int32_t _w; \
float16_t *data = (A)->pData; \
const int32_t numCols = (A)->numCols; \
const int32_t _numCols = (A)->numCols; \
\
for(w=(COL);w < numCols; w+=8) \
for(_w=(COL);_w < _numCols; _w+=8) \
{ \
f16x8_t tmpa; \
mve_pred16_t p0 = vctp16q(cnt); \
tmpa = vldrhq_z_f16(&data[i*numCols + w],p0);\
tmpa = vldrhq_z_f16(&data[i*_numCols + _w],p0);\
tmpa = vmulq_n_f16(tmpa,(_Float16)v); \
vstrhq_p(&data[i*numCols + w], tmpa, p0); \
vstrhq_p(&data[i*_numCols + _w], tmpa, p0); \
cnt -= 8; \
} \
\
@ -115,19 +115,19 @@ extern "C"
#define MAC_ROW_F16(COL,A,i,v,B,j) \
{ \
int cnt = ((A)->numCols)-(COL); \
int32_t w; \
int32_t _w; \
float16_t *dataA = (A)->pData; \
float16_t *dataB = (B)->pData; \
const int32_t numCols = (A)->numCols; \
const int32_t _numCols = (A)->numCols; \
\
for(w=(COL);w < numCols; w+=8) \
for(_w=(COL);_w < _numCols; _w+=8) \
{ \
f16x8_t tmpa,tmpb; \
mve_pred16_t p0 = vctp16q(cnt); \
tmpa = vldrhq_z_f16(&dataA[i*numCols + w],p0);\
tmpb = vldrhq_z_f16(&dataB[j*numCols + w],p0);\
tmpa = vldrhq_z_f16(&dataA[i*_numCols + _w],p0);\
tmpb = vldrhq_z_f16(&dataB[j*_numCols + _w],p0);\
tmpa = vfmaq_n_f16(tmpa,tmpb,v); \
vstrhq_p(&dataA[i*numCols + w], tmpa, p0); \
vstrhq_p(&dataA[i*_numCols + _w], tmpa, p0); \
cnt -= 8; \
} \
\
@ -136,20 +136,20 @@ extern "C"
#define MAS_ROW_F16(COL,A,i,v,B,j) \
{ \
int cnt = ((A)->numCols)-(COL); \
int32_t w; \
int32_t _w; \
float16_t *dataA = (A)->pData; \
float16_t *dataB = (B)->pData; \
const int32_t numCols = (A)->numCols; \
const int32_t _numCols = (A)->numCols; \
f16x8_t vec=vdupq_n_f16(v); \
\
for(w=(COL);w < numCols; w+=8) \
for(_w=(COL);_w < _numCols; _w+=8) \
{ \
f16x8_t tmpa,tmpb; \
mve_pred16_t p0 = vctp16q(cnt); \
tmpa = vldrhq_z_f16(&dataA[i*numCols + w],p0);\
tmpb = vldrhq_z_f16(&dataB[j*numCols + w],p0);\
tmpa = vldrhq_z_f16(&dataA[i*_numCols + _w],p0);\
tmpb = vldrhq_z_f16(&dataB[j*_numCols + _w],p0);\
tmpa = vfmsq_f16(tmpa,tmpb,vec); \
vstrhq_p(&dataA[i*numCols + w], tmpa, p0); \
vstrhq_p(&dataA[i*_numCols + _w], tmpa, p0); \
cnt -= 8; \
} \
\
@ -160,16 +160,16 @@ extern "C"
#define SWAP_ROWS_F16(A,COL,i,j) \
{ \
int32_t w; \
int32_t _w; \
float16_t *dataI = (A)->pData; \
float16_t *dataJ = (A)->pData; \
const int32_t numCols = (A)->numCols;\
const int32_t nb = numCols-(COL); \
const int32_t _numCols = (A)->numCols;\
const int32_t nb = _numCols-(COL); \
\
dataI += i*numCols + (COL); \
dataJ += j*numCols + (COL); \
dataI += i*_numCols + (COL); \
dataJ += j*_numCols + (COL); \
\
for(w=0;w < nb; w++) \
for(_w=0;_w < nb; _w++) \
{ \
float16_t tmp; \
tmp = *dataI; \
@ -180,14 +180,14 @@ extern "C"
#define SCALE_ROW_F16(A,COL,v,i) \
{ \
int32_t w; \
int32_t _w; \
float16_t *data = (A)->pData; \
const int32_t numCols = (A)->numCols;\
const int32_t nb = numCols-(COL); \
const int32_t _numCols = (A)->numCols;\
const int32_t nb = _numCols-(COL); \
\
data += i*numCols + (COL); \
data += i*_numCols + (COL); \
\
for(w=0;w < nb; w++) \
for(_w=0;_w < nb; _w++) \
{ \
*data++ *= (_Float16)v; \
} \
@ -196,16 +196,16 @@ extern "C"
#define MAC_ROW_F16(COL,A,i,v,B,j) \
{ \
int32_t w; \
int32_t _w; \
float16_t *dataA = (A)->pData; \
float16_t *dataB = (B)->pData; \
const int32_t numCols = (A)->numCols; \
const int32_t nb = numCols-(COL); \
const int32_t _numCols = (A)->numCols; \
const int32_t nb = _numCols-(COL); \
\
dataA += i*numCols + (COL); \
dataB += j*numCols + (COL); \
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
for(w=0;w < nb; w++) \
for(_w=0;_w < nb; _w++) \
{ \
*dataA++ += (_Float16)v * (_Float16)*dataB++;\
} \
@ -213,16 +213,16 @@ extern "C"
#define MAS_ROW_F16(COL,A,i,v,B,j) \
{ \
int32_t w; \
int32_t _w; \
float16_t *dataA = (A)->pData; \
float16_t *dataB = (B)->pData; \
const int32_t numCols = (A)->numCols; \
const int32_t nb = numCols-(COL); \
const int32_t _numCols = (A)->numCols; \
const int32_t nb = _numCols-(COL); \
\
dataA += i*numCols + (COL); \
dataB += j*numCols + (COL); \
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
for(w=0;w < nb; w++) \
for(_w=0;_w < nb; _w++) \
{ \
*dataA++ -= (_Float16)v * (_Float16)*dataB++;\
} \
@ -245,19 +245,19 @@ extern "C"
{ \
int cnt = ((A)->numCols)-(COL); \
float32_t *data = (A)->pData; \
const int32_t numCols = (A)->numCols; \
int32_t w; \
const int32_t _numCols = (A)->numCols; \
int32_t _w; \
\
for(w=(COL);w < numCols; w+=4) \
for(_w=(COL);_w < _numCols; _w+=4) \
{ \
f32x4_t tmpa,tmpb; \
mve_pred16_t p0 = vctp32q(cnt); \
\
tmpa=vldrwq_z_f32(&data[i*numCols + w],p0);\
tmpb=vldrwq_z_f32(&data[j*numCols + w],p0);\
tmpa=vldrwq_z_f32(&data[i*_numCols + _w],p0);\
tmpb=vldrwq_z_f32(&data[j*_numCols + _w],p0);\
\
vstrwq_p(&data[i*numCols + w], tmpb, p0); \
vstrwq_p(&data[j*numCols + w], tmpa, p0); \
vstrwq_p(&data[i*_numCols + _w], tmpb, p0); \
vstrwq_p(&data[j*_numCols + _w], tmpa, p0); \
\
cnt -= 4; \
} \
@ -268,17 +268,17 @@ extern "C"
int cnt = ((A)->numCols)-(COL); \
float32_t *dataA = (A)->pData; \
float32_t *dataB = (B)->pData; \
const int32_t numCols = (A)->numCols; \
int32_t w; \
const int32_t _numCols = (A)->numCols; \
int32_t _w; \
\
for(w=(COL);w < numCols; w+=4) \
for(_w=(COL);_w < _numCols; _w+=4) \
{ \
f32x4_t tmpa,tmpb; \
mve_pred16_t p0 = vctp32q(cnt); \
tmpa = vldrwq_z_f32(&dataA[i*numCols + w],p0);\
tmpb = vldrwq_z_f32(&dataB[j*numCols + w],p0);\
tmpa = vldrwq_z_f32(&dataA[i*_numCols + _w],p0);\
tmpb = vldrwq_z_f32(&dataB[j*_numCols + _w],p0);\
tmpa = vfmaq_n_f32(tmpa,tmpb,v); \
vstrwq_p(&dataA[i*numCols + w], tmpa, p0); \
vstrwq_p(&dataA[i*_numCols + _w], tmpa, p0); \
cnt -= 4; \
} \
\
@ -289,18 +289,18 @@ extern "C"
int cnt = ((A)->numCols)-(COL); \
float32_t *dataA = (A)->pData; \
float32_t *dataB = (B)->pData; \
const int32_t numCols = (A)->numCols; \
int32_t w; \
const int32_t _numCols = (A)->numCols; \
int32_t _w; \
f32x4_t vec=vdupq_n_f32(v); \
\
for(w=(COL);w < numCols; w+=4) \
for(_w=(COL);_w < _numCols; _w+=4) \
{ \
f32x4_t tmpa,tmpb; \
mve_pred16_t p0 = vctp32q(cnt); \
tmpa = vldrwq_z_f32(&dataA[i*numCols + w],p0);\
tmpb = vldrwq_z_f32(&dataB[j*numCols + w],p0);\
tmpa = vldrwq_z_f32(&dataA[i*_numCols + _w],p0);\
tmpb = vldrwq_z_f32(&dataB[j*_numCols + _w],p0);\
tmpa = vfmsq_f32(tmpa,tmpb,vec); \
vstrwq_p(&dataA[i*numCols + w], tmpa, p0); \
vstrwq_p(&dataA[i*_numCols + _w], tmpa, p0); \
cnt -= 4; \
} \
\
@ -310,16 +310,16 @@ extern "C"
{ \
int cnt = ((A)->numCols)-(COL); \
float32_t *data = (A)->pData; \
const int32_t numCols = (A)->numCols; \
int32_t w; \
const int32_t _numCols = (A)->numCols; \
int32_t _w; \
\
for(w=(COL);w < numCols; w+=4) \
for(_w=(COL);_w < _numCols; _w+=4) \
{ \
f32x4_t tmpa; \
mve_pred16_t p0 = vctp32q(cnt); \
tmpa = vldrwq_z_f32(&data[i*numCols + w],p0);\
tmpa = vldrwq_z_f32(&data[i*_numCols + _w],p0);\
tmpa = vmulq_n_f32(tmpa,v); \
vstrwq_p(&data[i*numCols + w], tmpa, p0); \
vstrwq_p(&data[i*_numCols + _w], tmpa, p0); \
cnt -= 4; \
} \
\
@ -329,18 +329,18 @@ extern "C"
#define SWAP_ROWS_F32(A,COL,i,j) \
{ \
int32_t w; \
int32_t _w; \
float32_t *dataI = (A)->pData; \
float32_t *dataJ = (A)->pData; \
const int32_t numCols = (A)->numCols;\
const int32_t nb = numCols - COL; \
const int32_t _numCols = (A)->numCols;\
const int32_t nb = _numCols - COL; \
\
dataI += i*numCols + (COL); \
dataJ += j*numCols + (COL); \
dataI += i*_numCols + (COL); \
dataJ += j*_numCols + (COL); \
\
float32_t tmp; \
\
for(w=0;w < nb; w++) \
for(_w=0;_w < nb; _w++) \
{ \
tmp = *dataI; \
*dataI++ = *dataJ; \
@ -352,15 +352,15 @@ extern "C"
{ \
float32_t *dataA = (A)->pData; \
float32_t *dataB = (B)->pData; \
const int32_t numCols = (A)->numCols;\
const int32_t nb = numCols - (COL); \
const int32_t _numCols = (A)->numCols;\
const int32_t nb = _numCols - (COL); \
int32_t nbElems; \
f32x4_t vec = vdupq_n_f32(v); \
\
nbElems = nb >> 2; \
\
dataA += i*numCols + (COL); \
dataB += j*numCols + (COL); \
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
while(nbElems>0) \
{ \
@ -386,15 +386,15 @@ extern "C"
{ \
float32_t *dataA = (A)->pData; \
float32_t *dataB = (B)->pData; \
const int32_t numCols = (A)->numCols;\
const int32_t nb = numCols - (COL); \
const int32_t _numCols = (A)->numCols;\
const int32_t nb = _numCols - (COL); \
int32_t nbElems; \
f32x4_t vec = vdupq_n_f32(v); \
\
nbElems = nb >> 2; \
\
dataA += i*numCols + (COL); \
dataB += j*numCols + (COL); \
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
while(nbElems>0) \
{ \
@ -419,14 +419,14 @@ extern "C"
#define SCALE_ROW_F32(A,COL,v,i) \
{ \
float32_t *data = (A)->pData; \
const int32_t numCols = (A)->numCols; \
const int32_t nb = numCols - (COL); \
const int32_t _numCols = (A)->numCols; \
const int32_t nb = _numCols - (COL); \
int32_t nbElems; \
f32x4_t vec = vdupq_n_f32(v); \
\
nbElems = nb >> 2; \
\
data += i*numCols + (COL); \
data += i*_numCols + (COL); \
while(nbElems>0) \
{ \
f32x4_t tmpa; \
@ -450,18 +450,18 @@ extern "C"
#define SWAP_ROWS_F32(A,COL,i,j) \
{ \
int32_t w; \
int32_t _w; \
float32_t tmp; \
float32_t *dataI = (A)->pData; \
float32_t *dataJ = (A)->pData; \
const int32_t numCols = (A)->numCols;\
const int32_t nb = numCols - COL; \
const int32_t _numCols = (A)->numCols;\
const int32_t nb = _numCols - COL; \
\
dataI += i*numCols + (COL); \
dataJ += j*numCols + (COL); \
dataI += i*_numCols + (COL); \
dataJ += j*_numCols + (COL); \
\
\
for(w=0;w < nb; w++) \
for(_w=0;_w < nb; _w++) \
{ \
tmp = *dataI; \
*dataI++ = *dataJ; \
@ -471,14 +471,14 @@ extern "C"
#define SCALE_ROW_F32(A,COL,v,i) \
{ \
int32_t w; \
int32_t _w; \
float32_t *data = (A)->pData; \
const int32_t numCols = (A)->numCols;\
const int32_t nb = numCols - COL; \
const int32_t _numCols = (A)->numCols;\
const int32_t nb = _numCols - COL; \
\
data += i*numCols + (COL); \
data += i*_numCols + (COL); \
\
for(w=0;w < nb; w++) \
for(_w=0;_w < nb; _w++) \
{ \
*data++ *= v; \
} \
@ -487,16 +487,16 @@ extern "C"
#define MAC_ROW_F32(COL,A,i,v,B,j) \
{ \
int32_t w; \
int32_t _w; \
float32_t *dataA = (A)->pData; \
float32_t *dataB = (B)->pData; \
const int32_t numCols = (A)->numCols;\
const int32_t nb = numCols-(COL); \
const int32_t _numCols = (A)->numCols;\
const int32_t nb = _numCols-(COL); \
\
dataA = dataA + i*numCols + (COL); \
dataB = dataB + j*numCols + (COL); \
dataA = dataA + i*_numCols + (COL); \
dataB = dataB + j*_numCols + (COL); \
\
for(w=0;w < nb; w++) \
for(_w=0;_w < nb; _w++) \
{ \
*dataA++ += v* *dataB++; \
} \
@ -504,16 +504,16 @@ extern "C"
#define MAS_ROW_F32(COL,A,i,v,B,j) \
{ \
int32_t w; \
int32_t _w; \
float32_t *dataA = (A)->pData; \
float32_t *dataB = (B)->pData; \
const int32_t numCols = (A)->numCols;\
const int32_t nb = numCols-(COL); \
const int32_t _numCols = (A)->numCols;\
const int32_t nb = _numCols-(COL); \
\
dataA = dataA + i*numCols + (COL); \
dataB = dataB + j*numCols + (COL); \
dataA = dataA + i*_numCols + (COL); \
dataB = dataB + j*_numCols + (COL); \
\
for(w=0;w < nb; w++) \
for(_w=0;_w < nb; _w++) \
{ \
*dataA++ -= v* *dataB++; \
} \
@ -522,7 +522,7 @@ extern "C"
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/* Functions with only a scalar version */
/* Functions _with only a scalar version */
#define COPY_COL_F32(A,ROW,COL,DST) \
COPY_COL_T(float32_t,A,ROW,COL,DST)
@ -532,15 +532,15 @@ extern "C"
#define SWAP_COLS_F32(A,COL,i,j) \
{ \
int32_t w; \
int32_t _w; \
float32_t *data = (A)->pData; \
const int32_t numCols = (A)->numCols; \
for(w=(COL);w < numCols; w++) \
const int32_t _numCols = (A)->numCols; \
for(_w=(COL);_w < _numCols; _w++) \
{ \
float32_t tmp; \
tmp = data[w*numCols + i]; \
data[w*numCols + i] = data[w*numCols + j];\
data[w*numCols + j] = tmp; \
tmp = data[_w*_numCols + i]; \
data[_w*_numCols + i] = data[_w*_numCols + j];\
data[_w*_numCols + j] = tmp; \
} \
}
@ -549,16 +549,16 @@ extern "C"
#define SWAP_ROWS_F64(A,COL,i,j) \
{ \
int32_t w; \
int32_t _w; \
float64_t *dataI = (A)->pData; \
float64_t *dataJ = (A)->pData; \
const int32_t numCols = (A)->numCols;\
const int32_t nb = numCols-(COL); \
const int32_t _numCols = (A)->numCols;\
const int32_t nb = _numCols-(COL); \
\
dataI += i*numCols + (COL); \
dataJ += j*numCols + (COL); \
dataI += i*_numCols + (COL); \
dataJ += j*_numCols + (COL); \
\
for(w=0;w < nb; w++) \
for(_w=0;_w < nb; _w++) \
{ \
float64_t tmp; \
tmp = *dataI; \
@ -569,28 +569,28 @@ extern "C"
#define SWAP_COLS_F64(A,COL,i,j) \
{ \
int32_t w; \
int32_t _w; \
float64_t *data = (A)->pData; \
const int32_t numCols = (A)->numCols; \
for(w=(COL);w < numCols; w++) \
const int32_t _numCols = (A)->numCols; \
for(_w=(COL);_w < _numCols; _w++) \
{ \
float64_t tmp; \
tmp = data[w*numCols + i]; \
data[w*numCols + i] = data[w*numCols + j];\
data[w*numCols + j] = tmp; \
tmp = data[_w*_numCols + i]; \
data[_w*_numCols + i] = data[_w*_numCols + j];\
data[_w*_numCols + j] = tmp; \
} \
}
#define SCALE_ROW_F64(A,COL,v,i) \
{ \
int32_t w; \
int32_t _w; \
float64_t *data = (A)->pData; \
const int32_t numCols = (A)->numCols;\
const int32_t nb = numCols-(COL); \
const int32_t _numCols = (A)->numCols;\
const int32_t nb = _numCols-(COL); \
\
data += i*numCols + (COL); \
data += i*_numCols + (COL); \
\
for(w=0;w < nb; w++) \
for(_w=0;_w < nb; _w++) \
{ \
*data++ *= v; \
} \
@ -601,16 +601,16 @@ extern "C"
#define MAC_ROW_F64(COL,A,i,v,B,j) \
{ \
int32_t w; \
int32_t _w; \
float64_t *dataA = (A)->pData; \
float64_t *dataB = (B)->pData; \
const int32_t numCols = (A)->numCols;\
const int32_t nb = numCols-(COL); \
const int32_t _numCols = (A)->numCols;\
const int32_t nb = _numCols-(COL); \
\
dataA += i*numCols + (COL); \
dataB += j*numCols + (COL); \
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
for(w=0;w < nb; w++) \
for(_w=0;_w < nb; _w++) \
{ \
*dataA++ += v* *dataB++; \
} \
@ -618,16 +618,16 @@ extern "C"
#define MAS_ROW_F64(COL,A,i,v,B,j) \
{ \
int32_t w; \
int32_t _w; \
float64_t *dataA = (A)->pData; \
float64_t *dataB = (B)->pData; \
const int32_t numCols = (A)->numCols;\
const int32_t nb = numCols-(COL); \
const int32_t _numCols = (A)->numCols;\
const int32_t nb = _numCols-(COL); \
\
dataA += i*numCols + (COL); \
dataB += j*numCols + (COL); \
dataA += i*_numCols + (COL); \
dataB += j*_numCols + (COL); \
\
for(w=0;w < nb; w++) \
for(_w=0;_w < nb; _w++) \
{ \
*dataA++ -= v* *dataB++; \
} \

@ -59,7 +59,7 @@ float64_t arm_cosine_distance_f64(const float64_t *pA,const float64_t *pB, uint3
arm_dot_prod_f64(pA,pB,blockSize,&dot);
tmp = sqrt(pwra * pwrb);
return(1. - dot / tmp);
return(1.0L - dot / tmp);
}

@ -28,6 +28,7 @@
*/
#include "dsp/distance_functions.h"
#include "dsp/matrix_utils.h"
#include <limits.h>
#include <math.h>

@ -453,13 +453,19 @@ arm_status arm_conv_partial_q15(
}
/* Store the results in the accumulators in the destination buffer. */
#ifndef ARM_MATH_BIG_ENDIAN
write_q15x2_ia (&pOut, __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16));
write_q15x2_ia (&pOut, __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16));
{
int32_t sat0 = __SSAT((acc0 >> 15), 16);
int32_t sat1 = __SSAT((acc1 >> 15), 16);
int32_t sat2 = __SSAT((acc2 >> 15), 16);
int32_t sat3 = __SSAT((acc3 >> 15), 16);
#ifndef ARM_MATH_BIG_ENDIAN
write_q15x2_ia (&pOut, __PKHBT(sat0, sat1, 16));
write_q15x2_ia (&pOut, __PKHBT(sat2, sat3, 16));
#else
write_q15x2_ia (&pOut, __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16));
write_q15x2_ia (&pOut, __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
write_q15x2_ia (&pOut, __PKHBT(sat1, sat0, 16));
write_q15x2_ia (&pOut, __PKHBT(sat3, sat2, 16));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
}
/* Increment the pointer pIn1 index, count by 4 */
count += 4U;

@ -586,14 +586,19 @@ void arm_conv_q15(
}
/* Store the result in the accumulator in the destination buffer. */
{
int32_t sat0 = __SSAT((acc0 >> 15), 16);
int32_t sat1 = __SSAT((acc1 >> 15), 16);
int32_t sat2 = __SSAT((acc2 >> 15), 16);
int32_t sat3 = __SSAT((acc3 >> 15), 16);
#ifndef ARM_MATH_BIG_ENDIAN
write_q15x2_ia (&pOut, __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16));
write_q15x2_ia (&pOut, __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16));
write_q15x2_ia (&pOut, __PKHBT(sat0, sat1, 16));
write_q15x2_ia (&pOut, __PKHBT(sat2, sat3, 16));
#else
write_q15x2_ia (&pOut, __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16));
write_q15x2_ia (&pOut, __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16));
write_q15x2_ia (&pOut, __PKHBT(sat1, sat0, 16));
write_q15x2_ia (&pOut, __PKHBT(sat3, sat2, 16));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
}
/* Increment the pointer pIn1 index, count by 4 */
count += 4U;

@ -93,12 +93,12 @@ float64_t arm_householder_f64(
beta = alpha * alpha + x1norm2;
beta=sqrt(beta);
if (alpha > 0.0)
if (alpha > 0.0L)
{
beta = -beta;
}
r = 1.0 / (alpha -beta);
r = 1.0L / (alpha -beta);
arm_scale_f64(pOut,r,pOut,blockSize);
pOut[0] = 1.0;

@ -192,12 +192,12 @@ arm_status arm_mat_cholesky_f64(
pG[j * n + i] -= sum;
}
if (pG[i * n + i] <= 0.0)
if (pG[i * n + i] <= 0.0L)
{
return(ARM_MATH_DECOMPOSITION_FAILURE);
}
invSqrtVj = 1.0/sqrt(pG[i * n + i]);
invSqrtVj = 1.0L/sqrt(pG[i * n + i]);
SCALE_COL_F64(pDst,i,invSqrtVj,i);
}
@ -254,12 +254,12 @@ arm_status arm_mat_cholesky_f64(
}
}
if (pG[i * n + i] <= 0.0)
if (pG[i * n + i] <= 0.0L)
{
return(ARM_MATH_DECOMPOSITION_FAILURE);
}
invSqrtVj = 1.0/sqrt(pG[i * n + i]);
invSqrtVj = 1.0L/sqrt(pG[i * n + i]);
SCALE_COL_F64(pDst,i,invSqrtVj,i);
}

@ -61,7 +61,7 @@ arm_status arm_mat_inverse_f64(
uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */
float64_t pivot = 0.0, newPivot=0.0; /* Temporary input values */
float64_t pivot = 0.0L, newPivot=0.0L; /* Temporary input values */
uint32_t selectedRow,pivotRow,i, rowNb, rowCnt, flag = 0U, j,column; /* loop counters */
arm_status status; /* status of matrix inverse */
@ -182,7 +182,7 @@ arm_status arm_mat_inverse_f64(
/* Check if there is a non zero pivot element to
* replace in the rows below */
if ((pivot != 0.0) && (selectedRow != column))
if ((pivot != 0.0L) && (selectedRow != column))
{
/* Loop over number of columns
* to the right of the pilot element */
@ -198,14 +198,14 @@ arm_status arm_mat_inverse_f64(
/* Update the status if the matrix is singular */
if ((flag != 1U) && (pivot == 0.0))
if ((flag != 1U) && (pivot == 0.0L))
{
return ARM_MATH_SINGULAR;
}
/* Pivot element of the row */
pivot = 1.0 / pivot;
pivot = 1.0L / pivot;
SCALE_ROW_F64(pSrc,column,pivot,pivotRow);
SCALE_ROW_F64(pDst,0,pivot,pivotRow);
@ -241,12 +241,12 @@ arm_status arm_mat_inverse_f64(
/* Set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
if ((flag != 1U) && (pivot == 0.0))
if ((flag != 1U) && (pivot == 0.0L))
{
pIn = pSrc->pData;
for (i = 0; i < numRows * numCols; i++)
{
if (pIn[i] != 0.0)
if (pIn[i] != 0.0L)
break;
}

@ -365,7 +365,6 @@ arm_status arm_mat_ldlt_f32(
int r;
int w;
for(r=k;r<n;r++)
{
@ -394,7 +393,7 @@ arm_status arm_mat_ldlt_f32(
break;
}
for(w=k+1;w<n;w++)
for(int w=k+1;w<n;w++)
{
int x;
for(x=k+1;x<n;x++)
@ -403,7 +402,7 @@ arm_status arm_mat_ldlt_f32(
}
}
for(w=k+1;w<n;w++)
for(int w=k+1;w<n;w++)
{
pA[w*n+k] = pA[w*n+k] / a;
}

@ -102,7 +102,7 @@ arm_status arm_mat_ldlt_f64(
{
/* Find pivot */
float64_t m=F64_MIN,a;
int w,r,j=k;
int r,j=k;
for(r=k;r<n;r++)
@ -125,14 +125,14 @@ arm_status arm_mat_ldlt_f64(
a = pA[k*n+k];
if (fabs(a) < 1.0e-18)
if (fabs(a) < 1.0e-18L)
{
fullRank = 0;
break;
}
for(w=k+1;w<n;w++)
for(int w=k+1;w<n;w++)
{
int x;
for(x=k+1;x<n;x++)
@ -141,7 +141,7 @@ arm_status arm_mat_ldlt_f64(
}
}
for(w=k+1;w<n;w++)
for(int w=k+1;w<n;w++)
{
pA[w*n+k] = pA[w*n+k] / a;
}

@ -108,7 +108,7 @@ arm_status arm_mat_solve_lower_triangular_f64(
vecA = vfmsq_f64(vecA,vdupq_n_f64(pLT[n*i + k]),vecX);
}
if (pLT[n*i + i]==0.0)
if (pLT[n*i + i]==0.0L)
{
return(ARM_MATH_SINGULAR);
}
@ -131,7 +131,7 @@ arm_status arm_mat_solve_lower_triangular_f64(
tmp -= lt_row[k] * pX[cols*k+j];
}
if (lt_row[i]==0.0)
if (lt_row[i]==0.0L)
{
return(ARM_MATH_SINGULAR);
}
@ -206,7 +206,7 @@ arm_status arm_mat_solve_lower_triangular_f64(
tmp -= lt_row[k] * pX[cols*k+j];
}
if (lt_row[i]==0.0)
if (lt_row[i]==0.0L)
{
return(ARM_MATH_SINGULAR);
}

@ -100,7 +100,7 @@ arm_status arm_mat_solve_upper_triangular_f64(
vecA = vfmsq_f64(vecA,vdupq_n_f64(pUT[n*i + k]),vecX);
}
if (pUT[n*i + i]==0.0)
if (pUT[n*i + i]==0.0L)
{
return(ARM_MATH_SINGULAR);
}
@ -125,7 +125,7 @@ arm_status arm_mat_solve_upper_triangular_f64(
tmp -= ut_row[k] * pX[cols*k+j];
}
if (ut_row[i]==0.0)
if (ut_row[i]==0.0L)
{
return(ARM_MATH_SINGULAR);
}
@ -194,7 +194,7 @@ arm_status arm_mat_solve_upper_triangular_f64(
tmp -= ut_row[k] * pX[cols*k+j];
}
if (ut_row[i]==0.0)
if (ut_row[i]==0.0L)
{
return(ARM_MATH_SINGULAR);
}

@ -297,7 +297,7 @@ void arm_cfft_f64(
if (ifftFlag == 1U)
{
invL = 1.0 / (float64_t)L;
invL = 1.0L / (float64_t)L;
/* Conjugate and scale output data */
pSrc = p1;
for(l=0; l<L; l++)

@ -63,8 +63,8 @@ void stage_rfft_f64(
// real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
// imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
*pOut++ = 0.5 * ( t1a + t1b );
*pOut++ = 0.5 * ( t1a - t1b );
*pOut++ = 0.5L * ( t1a + t1b );
*pOut++ = 0.5L * ( t1a - t1b );
// XA(1) = 1/2*( U1 - imag(U2) + i*( U1 +imag(U2) ));
pB = p + 2*k;
@ -105,8 +105,8 @@ void stage_rfft_f64(
p2 = twR * t1b;
p3 = twI * t1b;
*pOut++ = 0.5 * (xAR + xBR + p0 + p3 ); //xAR
*pOut++ = 0.5 * (xAI - xBI + p1 - p2 ); //xAI
*pOut++ = 0.5L * (xAR + xBR + p0 + p3 ); //xAR
*pOut++ = 0.5L * (xAI - xBI + p1 - p2 ); //xAI
pA += 2;
pB -= 2;
@ -135,8 +135,8 @@ void merge_rfft_f64(
pCoeff += 2 ;
*pOut++ = 0.5 * ( xAR + xAI );
*pOut++ = 0.5 * ( xAR - xAI );
*pOut++ = 0.5L * ( xAR + xAI );
*pOut++ = 0.5L * ( xAR - xAI );
pB = p + 2*k ;
pA += 2 ;
@ -164,8 +164,8 @@ void merge_rfft_f64(
// real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
// imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
*pOut++ = 0.5 * (xAR + xBR - r - s ); //xAR
*pOut++ = 0.5 * (xAI - xBI + t - u ); //xAI
*pOut++ = 0.5L * (xAR + xBR - r - s ); //xAR
*pOut++ = 0.5L * (xAI - xBI + t - u ); //xAI
pA += 2;
pB -= 2;

Loading…
Cancel
Save