@ -42,29 +42,29 @@ extern "C"
# define SCALE_COL_T(T,CAST,A,ROW,v,i) \
{ \
int32_t w; \
int32_t _ w; \
T * data = ( A ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t _ numCols = ( A ) - > numCols ; \
const int32_t nb = ( A ) - > numRows - ROW ; \
\
data + = i + numCols * ( ROW ) ; \
data + = i + _ numCols * ( ROW ) ; \
\
for ( w= 0 ; w < nb ; w+ + ) \
for ( _ w= 0 ; _ w < nb ; _ w+ + ) \
{ \
* data * = CAST v ; \
data + = numCols; \
data + = _ numCols; \
} \
}
# define COPY_COL_T(T,A,ROW,COL,DST) \
{ \
uint32_t row; \
T * pb= DST ; \
T * pa = ( A ) - > pData + ROW * ( A ) - > numCols + COL ; \
for ( row = ROW ; row < ( A ) - > numRows ; row + + ) \
uint32_t _ row; \
T * _ pb= DST ; \
T * _ pa = ( A ) - > pData + ROW * ( A ) - > numCols + COL ; \
for ( _ row = ROW ; _ row < ( A ) - > numRows ; _ row + + ) \
{ \
* pb+ + = * pa; \
pa + = ( A ) - > numCols ; \
* _ pb+ + = * _ pa; \
_ pa + = ( A ) - > numCols ; \
} \
}
@ -74,20 +74,20 @@ extern "C"
# define SWAP_ROWS_F16(A,COL,i,j) \
{ \
int cnt = ( ( A ) - > numCols ) - ( COL ) ; \
int32_t w; \
int32_t _ w; \
float16_t * data = ( A ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t _ numCols = ( A ) - > numCols ; \
\
for ( w= ( COL ) ; w < numCols; w+ = 8 ) \
for ( _ w= ( COL ) ; _ w < _ numCols; _ w+ = 8 ) \
{ \
f16x8_t tmpa , tmpb ; \
mve_pred16_t p0 = vctp16q ( cnt ) ; \
\
tmpa = vldrhq_z_f16 ( & data [ i * numCols + w] , p0 ) ; \
tmpb = vldrhq_z_f16 ( & data [ j * numCols + w] , p0 ) ; \
tmpa = vldrhq_z_f16 ( & data [ i * _ numCols + _ w] , p0 ) ; \
tmpb = vldrhq_z_f16 ( & data [ j * _ numCols + _ w] , p0 ) ; \
\
vstrhq_p ( & data [ i * numCols + w] , tmpb , p0 ) ; \
vstrhq_p ( & data [ j * numCols + w] , tmpa , p0 ) ; \
vstrhq_p ( & data [ i * _ numCols + _ w] , tmpb , p0 ) ; \
vstrhq_p ( & data [ j * _ numCols + _ w] , tmpa , p0 ) ; \
\
cnt - = 8 ; \
} \
@ -96,17 +96,17 @@ extern "C"
# define SCALE_ROW_F16(A,COL,v,i) \
{ \
int cnt = ( ( A ) - > numCols ) - ( COL ) ; \
int32_t w; \
int32_t _ w; \
float16_t * data = ( A ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t _ numCols = ( A ) - > numCols ; \
\
for ( w= ( COL ) ; w < numCols; w+ = 8 ) \
for ( _ w= ( COL ) ; _ w < _ numCols; _ w+ = 8 ) \
{ \
f16x8_t tmpa ; \
mve_pred16_t p0 = vctp16q ( cnt ) ; \
tmpa = vldrhq_z_f16 ( & data [ i * numCols + w] , p0 ) ; \
tmpa = vldrhq_z_f16 ( & data [ i * _ numCols + _ w] , p0 ) ; \
tmpa = vmulq_n_f16 ( tmpa , ( _Float16 ) v ) ; \
vstrhq_p ( & data [ i * numCols + w] , tmpa , p0 ) ; \
vstrhq_p ( & data [ i * _ numCols + _ w] , tmpa , p0 ) ; \
cnt - = 8 ; \
} \
\
@ -115,19 +115,19 @@ extern "C"
# define MAC_ROW_F16(COL,A,i,v,B,j) \
{ \
int cnt = ( ( A ) - > numCols ) - ( COL ) ; \
int32_t w; \
int32_t _ w; \
float16_t * dataA = ( A ) - > pData ; \
float16_t * dataB = ( B ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t _ numCols = ( A ) - > numCols ; \
\
for ( w= ( COL ) ; w < numCols; w+ = 8 ) \
for ( _ w= ( COL ) ; _ w < _ numCols; _ w+ = 8 ) \
{ \
f16x8_t tmpa , tmpb ; \
mve_pred16_t p0 = vctp16q ( cnt ) ; \
tmpa = vldrhq_z_f16 ( & dataA [ i * numCols + w] , p0 ) ; \
tmpb = vldrhq_z_f16 ( & dataB [ j * numCols + w] , p0 ) ; \
tmpa = vldrhq_z_f16 ( & dataA [ i * _ numCols + _ w] , p0 ) ; \
tmpb = vldrhq_z_f16 ( & dataB [ j * _ numCols + _ w] , p0 ) ; \
tmpa = vfmaq_n_f16 ( tmpa , tmpb , v ) ; \
vstrhq_p ( & dataA [ i * numCols + w] , tmpa , p0 ) ; \
vstrhq_p ( & dataA [ i * _ numCols + _ w] , tmpa , p0 ) ; \
cnt - = 8 ; \
} \
\
@ -136,20 +136,20 @@ extern "C"
# define MAS_ROW_F16(COL,A,i,v,B,j) \
{ \
int cnt = ( ( A ) - > numCols ) - ( COL ) ; \
int32_t w; \
int32_t _ w; \
float16_t * dataA = ( A ) - > pData ; \
float16_t * dataB = ( B ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t _ numCols = ( A ) - > numCols ; \
f16x8_t vec = vdupq_n_f16 ( v ) ; \
\
for ( w= ( COL ) ; w < numCols; w+ = 8 ) \
for ( _ w= ( COL ) ; _ w < _ numCols; _ w+ = 8 ) \
{ \
f16x8_t tmpa , tmpb ; \
mve_pred16_t p0 = vctp16q ( cnt ) ; \
tmpa = vldrhq_z_f16 ( & dataA [ i * numCols + w] , p0 ) ; \
tmpb = vldrhq_z_f16 ( & dataB [ j * numCols + w] , p0 ) ; \
tmpa = vldrhq_z_f16 ( & dataA [ i * _ numCols + _ w] , p0 ) ; \
tmpb = vldrhq_z_f16 ( & dataB [ j * _ numCols + _ w] , p0 ) ; \
tmpa = vfmsq_f16 ( tmpa , tmpb , vec ) ; \
vstrhq_p ( & dataA [ i * numCols + w] , tmpa , p0 ) ; \
vstrhq_p ( & dataA [ i * _ numCols + _ w] , tmpa , p0 ) ; \
cnt - = 8 ; \
} \
\
@ -160,16 +160,16 @@ extern "C"
# define SWAP_ROWS_F16(A,COL,i,j) \
{ \
int32_t w; \
int32_t _ w; \
float16_t * dataI = ( A ) - > pData ; \
float16_t * dataJ = ( A ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t nb = numCols- ( COL ) ; \
const int32_t _ numCols = ( A ) - > numCols ; \
const int32_t nb = _ numCols- ( COL ) ; \
\
dataI + = i * numCols + ( COL ) ; \
dataJ + = j * numCols + ( COL ) ; \
dataI + = i * _ numCols + ( COL ) ; \
dataJ + = j * _ numCols + ( COL ) ; \
\
for ( w= 0 ; w < nb ; w+ + ) \
for ( _ w= 0 ; _ w < nb ; _ w+ + ) \
{ \
float16_t tmp ; \
tmp = * dataI ; \
@ -180,14 +180,14 @@ extern "C"
# define SCALE_ROW_F16(A,COL,v,i) \
{ \
int32_t w; \
int32_t _ w; \
float16_t * data = ( A ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t nb = numCols- ( COL ) ; \
const int32_t _ numCols = ( A ) - > numCols ; \
const int32_t nb = _ numCols- ( COL ) ; \
\
data + = i * numCols + ( COL ) ; \
data + = i * _ numCols + ( COL ) ; \
\
for ( w= 0 ; w < nb ; w+ + ) \
for ( _ w= 0 ; _ w < nb ; _ w+ + ) \
{ \
* data + + * = ( _Float16 ) v ; \
} \
@ -196,16 +196,16 @@ extern "C"
# define MAC_ROW_F16(COL,A,i,v,B,j) \
{ \
int32_t w; \
int32_t _ w; \
float16_t * dataA = ( A ) - > pData ; \
float16_t * dataB = ( B ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t nb = numCols- ( COL ) ; \
const int32_t _ numCols = ( A ) - > numCols ; \
const int32_t nb = _ numCols- ( COL ) ; \
\
dataA + = i * numCols + ( COL ) ; \
dataB + = j * numCols + ( COL ) ; \
dataA + = i * _ numCols + ( COL ) ; \
dataB + = j * _ numCols + ( COL ) ; \
\
for ( w= 0 ; w < nb ; w+ + ) \
for ( _ w= 0 ; _ w < nb ; _ w+ + ) \
{ \
* dataA + + + = ( _Float16 ) v * ( _Float16 ) * dataB + + ; \
} \
@ -213,16 +213,16 @@ extern "C"
# define MAS_ROW_F16(COL,A,i,v,B,j) \
{ \
int32_t w; \
int32_t _ w; \
float16_t * dataA = ( A ) - > pData ; \
float16_t * dataB = ( B ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t nb = numCols- ( COL ) ; \
const int32_t _ numCols = ( A ) - > numCols ; \
const int32_t nb = _ numCols- ( COL ) ; \
\
dataA + = i * numCols + ( COL ) ; \
dataB + = j * numCols + ( COL ) ; \
dataA + = i * _ numCols + ( COL ) ; \
dataB + = j * _ numCols + ( COL ) ; \
\
for ( w= 0 ; w < nb ; w+ + ) \
for ( _ w= 0 ; _ w < nb ; _ w+ + ) \
{ \
* dataA + + - = ( _Float16 ) v * ( _Float16 ) * dataB + + ; \
} \
@ -245,19 +245,19 @@ extern "C"
{ \
int cnt = ( ( A ) - > numCols ) - ( COL ) ; \
float32_t * data = ( A ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
int32_t w; \
const int32_t _ numCols = ( A ) - > numCols ; \
int32_t _ w; \
\
for ( w= ( COL ) ; w < numCols; w+ = 4 ) \
for ( _ w= ( COL ) ; _ w < _ numCols; _ w+ = 4 ) \
{ \
f32x4_t tmpa , tmpb ; \
mve_pred16_t p0 = vctp32q ( cnt ) ; \
\
tmpa = vldrwq_z_f32 ( & data [ i * numCols + w] , p0 ) ; \
tmpb = vldrwq_z_f32 ( & data [ j * numCols + w] , p0 ) ; \
tmpa = vldrwq_z_f32 ( & data [ i * _ numCols + _ w] , p0 ) ; \
tmpb = vldrwq_z_f32 ( & data [ j * _ numCols + _ w] , p0 ) ; \
\
vstrwq_p ( & data [ i * numCols + w] , tmpb , p0 ) ; \
vstrwq_p ( & data [ j * numCols + w] , tmpa , p0 ) ; \
vstrwq_p ( & data [ i * _ numCols + _ w] , tmpb , p0 ) ; \
vstrwq_p ( & data [ j * _ numCols + _ w] , tmpa , p0 ) ; \
\
cnt - = 4 ; \
} \
@ -268,17 +268,17 @@ extern "C"
int cnt = ( ( A ) - > numCols ) - ( COL ) ; \
float32_t * dataA = ( A ) - > pData ; \
float32_t * dataB = ( B ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
int32_t w; \
const int32_t _ numCols = ( A ) - > numCols ; \
int32_t _ w; \
\
for ( w= ( COL ) ; w < numCols; w+ = 4 ) \
for ( _ w= ( COL ) ; _ w < _ numCols; _ w+ = 4 ) \
{ \
f32x4_t tmpa , tmpb ; \
mve_pred16_t p0 = vctp32q ( cnt ) ; \
tmpa = vldrwq_z_f32 ( & dataA [ i * numCols + w] , p0 ) ; \
tmpb = vldrwq_z_f32 ( & dataB [ j * numCols + w] , p0 ) ; \
tmpa = vldrwq_z_f32 ( & dataA [ i * _ numCols + _ w] , p0 ) ; \
tmpb = vldrwq_z_f32 ( & dataB [ j * _ numCols + _ w] , p0 ) ; \
tmpa = vfmaq_n_f32 ( tmpa , tmpb , v ) ; \
vstrwq_p ( & dataA [ i * numCols + w] , tmpa , p0 ) ; \
vstrwq_p ( & dataA [ i * _ numCols + _ w] , tmpa , p0 ) ; \
cnt - = 4 ; \
} \
\
@ -289,18 +289,18 @@ extern "C"
int cnt = ( ( A ) - > numCols ) - ( COL ) ; \
float32_t * dataA = ( A ) - > pData ; \
float32_t * dataB = ( B ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
int32_t w; \
const int32_t _ numCols = ( A ) - > numCols ; \
int32_t _ w; \
f32x4_t vec = vdupq_n_f32 ( v ) ; \
\
for ( w= ( COL ) ; w < numCols; w+ = 4 ) \
for ( _ w= ( COL ) ; _ w < _ numCols; _ w+ = 4 ) \
{ \
f32x4_t tmpa , tmpb ; \
mve_pred16_t p0 = vctp32q ( cnt ) ; \
tmpa = vldrwq_z_f32 ( & dataA [ i * numCols + w] , p0 ) ; \
tmpb = vldrwq_z_f32 ( & dataB [ j * numCols + w] , p0 ) ; \
tmpa = vldrwq_z_f32 ( & dataA [ i * _ numCols + _ w] , p0 ) ; \
tmpb = vldrwq_z_f32 ( & dataB [ j * _ numCols + _ w] , p0 ) ; \
tmpa = vfmsq_f32 ( tmpa , tmpb , vec ) ; \
vstrwq_p ( & dataA [ i * numCols + w] , tmpa , p0 ) ; \
vstrwq_p ( & dataA [ i * _ numCols + _ w] , tmpa , p0 ) ; \
cnt - = 4 ; \
} \
\
@ -310,16 +310,16 @@ extern "C"
{ \
int cnt = ( ( A ) - > numCols ) - ( COL ) ; \
float32_t * data = ( A ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
int32_t w; \
const int32_t _ numCols = ( A ) - > numCols ; \
int32_t _ w; \
\
for ( w= ( COL ) ; w < numCols; w+ = 4 ) \
for ( _ w= ( COL ) ; _ w < _ numCols; _ w+ = 4 ) \
{ \
f32x4_t tmpa ; \
mve_pred16_t p0 = vctp32q ( cnt ) ; \
tmpa = vldrwq_z_f32 ( & data [ i * numCols + w] , p0 ) ; \
tmpa = vldrwq_z_f32 ( & data [ i * _ numCols + _ w] , p0 ) ; \
tmpa = vmulq_n_f32 ( tmpa , v ) ; \
vstrwq_p ( & data [ i * numCols + w] , tmpa , p0 ) ; \
vstrwq_p ( & data [ i * _ numCols + _ w] , tmpa , p0 ) ; \
cnt - = 4 ; \
} \
\
@ -329,18 +329,18 @@ extern "C"
# define SWAP_ROWS_F32(A,COL,i,j) \
{ \
int32_t w; \
int32_t _ w; \
float32_t * dataI = ( A ) - > pData ; \
float32_t * dataJ = ( A ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t nb = numCols - COL ; \
const int32_t _ numCols = ( A ) - > numCols ; \
const int32_t nb = _ numCols - COL ; \
\
dataI + = i * numCols + ( COL ) ; \
dataJ + = j * numCols + ( COL ) ; \
dataI + = i * _ numCols + ( COL ) ; \
dataJ + = j * _ numCols + ( COL ) ; \
\
float32_t tmp ; \
\
for ( w= 0 ; w < nb ; w+ + ) \
for ( _ w= 0 ; _ w < nb ; _ w+ + ) \
{ \
tmp = * dataI ; \
* dataI + + = * dataJ ; \
@ -352,15 +352,15 @@ extern "C"
{ \
float32_t * dataA = ( A ) - > pData ; \
float32_t * dataB = ( B ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t nb = numCols - ( COL ) ; \
const int32_t _ numCols = ( A ) - > numCols ; \
const int32_t nb = _ numCols - ( COL ) ; \
int32_t nbElems ; \
f32x4_t vec = vdupq_n_f32 ( v ) ; \
\
nbElems = nb > > 2 ; \
\
dataA + = i * numCols + ( COL ) ; \
dataB + = j * numCols + ( COL ) ; \
dataA + = i * _ numCols + ( COL ) ; \
dataB + = j * _ numCols + ( COL ) ; \
\
while ( nbElems > 0 ) \
{ \
@ -386,15 +386,15 @@ extern "C"
{ \
float32_t * dataA = ( A ) - > pData ; \
float32_t * dataB = ( B ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t nb = numCols - ( COL ) ; \
const int32_t _ numCols = ( A ) - > numCols ; \
const int32_t nb = _ numCols - ( COL ) ; \
int32_t nbElems ; \
f32x4_t vec = vdupq_n_f32 ( v ) ; \
\
nbElems = nb > > 2 ; \
\
dataA + = i * numCols + ( COL ) ; \
dataB + = j * numCols + ( COL ) ; \
dataA + = i * _ numCols + ( COL ) ; \
dataB + = j * _ numCols + ( COL ) ; \
\
while ( nbElems > 0 ) \
{ \
@ -419,14 +419,14 @@ extern "C"
# define SCALE_ROW_F32(A,COL,v,i) \
{ \
float32_t * data = ( A ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t nb = numCols - ( COL ) ; \
const int32_t _ numCols = ( A ) - > numCols ; \
const int32_t nb = _ numCols - ( COL ) ; \
int32_t nbElems ; \
f32x4_t vec = vdupq_n_f32 ( v ) ; \
\
nbElems = nb > > 2 ; \
\
data + = i * numCols + ( COL ) ; \
data + = i * _ numCols + ( COL ) ; \
while ( nbElems > 0 ) \
{ \
f32x4_t tmpa ; \
@ -450,18 +450,18 @@ extern "C"
# define SWAP_ROWS_F32(A,COL,i,j) \
{ \
int32_t w; \
int32_t _ w; \
float32_t tmp ; \
float32_t * dataI = ( A ) - > pData ; \
float32_t * dataJ = ( A ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t nb = numCols - COL ; \
const int32_t _ numCols = ( A ) - > numCols ; \
const int32_t nb = _ numCols - COL ; \
\
dataI + = i * numCols + ( COL ) ; \
dataJ + = j * numCols + ( COL ) ; \
dataI + = i * _ numCols + ( COL ) ; \
dataJ + = j * _ numCols + ( COL ) ; \
\
\
for ( w= 0 ; w < nb ; w+ + ) \
for ( _ w= 0 ; _ w < nb ; _ w+ + ) \
{ \
tmp = * dataI ; \
* dataI + + = * dataJ ; \
@ -471,14 +471,14 @@ extern "C"
# define SCALE_ROW_F32(A,COL,v,i) \
{ \
int32_t w; \
int32_t _ w; \
float32_t * data = ( A ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t nb = numCols - COL ; \
const int32_t _ numCols = ( A ) - > numCols ; \
const int32_t nb = _ numCols - COL ; \
\
data + = i * numCols + ( COL ) ; \
data + = i * _ numCols + ( COL ) ; \
\
for ( w= 0 ; w < nb ; w+ + ) \
for ( _ w= 0 ; _ w < nb ; _ w+ + ) \
{ \
* data + + * = v ; \
} \
@ -487,16 +487,16 @@ extern "C"
# define MAC_ROW_F32(COL,A,i,v,B,j) \
{ \
int32_t w; \
int32_t _ w; \
float32_t * dataA = ( A ) - > pData ; \
float32_t * dataB = ( B ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t nb = numCols- ( COL ) ; \
const int32_t _ numCols = ( A ) - > numCols ; \
const int32_t nb = _ numCols- ( COL ) ; \
\
dataA = dataA + i * numCols + ( COL ) ; \
dataB = dataB + j * numCols + ( COL ) ; \
dataA = dataA + i * _ numCols + ( COL ) ; \
dataB = dataB + j * _ numCols + ( COL ) ; \
\
for ( w= 0 ; w < nb ; w+ + ) \
for ( _ w= 0 ; _ w < nb ; _ w+ + ) \
{ \
* dataA + + + = v * * dataB + + ; \
} \
@ -504,16 +504,16 @@ extern "C"
# define MAS_ROW_F32(COL,A,i,v,B,j) \
{ \
int32_t w; \
int32_t _ w; \
float32_t * dataA = ( A ) - > pData ; \
float32_t * dataB = ( B ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t nb = numCols- ( COL ) ; \
const int32_t _ numCols = ( A ) - > numCols ; \
const int32_t nb = _ numCols- ( COL ) ; \
\
dataA = dataA + i * numCols + ( COL ) ; \
dataB = dataB + j * numCols + ( COL ) ; \
dataA = dataA + i * _ numCols + ( COL ) ; \
dataB = dataB + j * _ numCols + ( COL ) ; \
\
for ( w= 0 ; w < nb ; w+ + ) \
for ( _ w= 0 ; _ w < nb ; _ w+ + ) \
{ \
* dataA + + - = v * * dataB + + ; \
} \
@ -522,7 +522,7 @@ extern "C"
# endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/* Functions with only a scalar version */
/* Functions _ with only a scalar version */
# define COPY_COL_F32(A,ROW,COL,DST) \
COPY_COL_T ( float32_t , A , ROW , COL , DST )
@ -532,15 +532,15 @@ extern "C"
# define SWAP_COLS_F32(A,COL,i,j) \
{ \
int32_t w; \
int32_t _ w; \
float32_t * data = ( A ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
for ( w= ( COL ) ; w < numCols; w+ + ) \
const int32_t _ numCols = ( A ) - > numCols ; \
for ( _ w= ( COL ) ; _ w < _ numCols; _ w+ + ) \
{ \
float32_t tmp ; \
tmp = data [ w* numCols + i ] ; \
data [ w* numCols + i ] = data [ w* numCols + j ] ; \
data [ w* numCols + j ] = tmp ; \
tmp = data [ _ w* _ numCols + i ] ; \
data [ _ w* _ numCols + i ] = data [ _ w* _ numCols + j ] ; \
data [ _ w* _ numCols + j ] = tmp ; \
} \
}
@ -549,16 +549,16 @@ extern "C"
# define SWAP_ROWS_F64(A,COL,i,j) \
{ \
int32_t w; \
int32_t _ w; \
float64_t * dataI = ( A ) - > pData ; \
float64_t * dataJ = ( A ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t nb = numCols- ( COL ) ; \
const int32_t _ numCols = ( A ) - > numCols ; \
const int32_t nb = _ numCols- ( COL ) ; \
\
dataI + = i * numCols + ( COL ) ; \
dataJ + = j * numCols + ( COL ) ; \
dataI + = i * _ numCols + ( COL ) ; \
dataJ + = j * _ numCols + ( COL ) ; \
\
for ( w= 0 ; w < nb ; w+ + ) \
for ( _ w= 0 ; _ w < nb ; _ w+ + ) \
{ \
float64_t tmp ; \
tmp = * dataI ; \
@ -569,28 +569,28 @@ extern "C"
# define SWAP_COLS_F64(A,COL,i,j) \
{ \
int32_t w; \
int32_t _ w; \
float64_t * data = ( A ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
for ( w= ( COL ) ; w < numCols; w+ + ) \
const int32_t _ numCols = ( A ) - > numCols ; \
for ( _ w= ( COL ) ; _ w < _ numCols; _ w+ + ) \
{ \
float64_t tmp ; \
tmp = data [ w* numCols + i ] ; \
data [ w* numCols + i ] = data [ w* numCols + j ] ; \
data [ w* numCols + j ] = tmp ; \
tmp = data [ _ w* _ numCols + i ] ; \
data [ _ w* _ numCols + i ] = data [ _ w* _ numCols + j ] ; \
data [ _ w* _ numCols + j ] = tmp ; \
} \
}
# define SCALE_ROW_F64(A,COL,v,i) \
{ \
int32_t w; \
int32_t _ w; \
float64_t * data = ( A ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t nb = numCols- ( COL ) ; \
const int32_t _ numCols = ( A ) - > numCols ; \
const int32_t nb = _ numCols- ( COL ) ; \
\
data + = i * numCols + ( COL ) ; \
data + = i * _ numCols + ( COL ) ; \
\
for ( w= 0 ; w < nb ; w+ + ) \
for ( _ w= 0 ; _ w < nb ; _ w+ + ) \
{ \
* data + + * = v ; \
} \
@ -601,16 +601,16 @@ extern "C"
# define MAC_ROW_F64(COL,A,i,v,B,j) \
{ \
int32_t w; \
int32_t _ w; \
float64_t * dataA = ( A ) - > pData ; \
float64_t * dataB = ( B ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t nb = numCols- ( COL ) ; \
const int32_t _ numCols = ( A ) - > numCols ; \
const int32_t nb = _ numCols- ( COL ) ; \
\
dataA + = i * numCols + ( COL ) ; \
dataB + = j * numCols + ( COL ) ; \
dataA + = i * _ numCols + ( COL ) ; \
dataB + = j * _ numCols + ( COL ) ; \
\
for ( w= 0 ; w < nb ; w+ + ) \
for ( _ w= 0 ; _ w < nb ; _ w+ + ) \
{ \
* dataA + + + = v * * dataB + + ; \
} \
@ -618,16 +618,16 @@ extern "C"
# define MAS_ROW_F64(COL,A,i,v,B,j) \
{ \
int32_t w; \
int32_t _ w; \
float64_t * dataA = ( A ) - > pData ; \
float64_t * dataB = ( B ) - > pData ; \
const int32_t numCols = ( A ) - > numCols ; \
const int32_t nb = numCols- ( COL ) ; \
const int32_t _ numCols = ( A ) - > numCols ; \
const int32_t nb = _ numCols- ( COL ) ; \
\
dataA + = i * numCols + ( COL ) ; \
dataB + = j * numCols + ( COL ) ; \
dataA + = i * _ numCols + ( COL ) ; \
dataB + = j * _ numCols + ( COL ) ; \
\
for ( w= 0 ; w < nb ; w+ + ) \
for ( _ w= 0 ; _ w < nb ; _ w+ + ) \
{ \
* dataA + + - = v * * dataB + + ; \
} \