CMSIS-DSP: Added new matrix functions and their MVE implementation

arm_mat_cmplx_trans_f32.c
arm_mat_cmplx_trans_q15.c
arm_mat_cmplx_trans_q31.c
pull/19/head
Christophe Favergeon 6 years ago
parent 8268b079d5
commit a20e215bfc

@ -231,6 +231,172 @@ __STATIC_INLINE arm_status arm_mat_trans_32bit_generic_mve(
return (ARM_MATH_SUCCESS);
}
__STATIC_INLINE arm_status arm_mat_cmplx_trans_32bit(
uint16_t srcRows,
uint16_t srcCols,
uint32_t *pDataSrc,
uint16_t dstRows,
uint16_t dstCols,
uint32_t *pDataDest)
{
uint32_t i;
uint32_t const *pDataC;
uint32_t *pDataRow;
uint32_t *pDataDestR, *pDataDestRow;
uint32x4_t vecOffsRef, vecOffsCur;
uint32_t blkCnt;
uint32x4_t vecIn;
#ifdef ARM_MATH_MATRIX_CHECK
/*
* Check for matrix mismatch condition
*/
if ((srcRows != dstCols) || (srcCols != dstRows))
{
/*
* Set status as ARM_MATH_SIZE_MISMATCH
*/
return = ARM_MATH_SIZE_MISMATCH;
}
#else
(void)dstRows;
(void)dstCols;
#endif
/* 2x2, 3x3 and 4x4 specialization to be added */
vecOffsRef[0] = 0;
vecOffsRef[1] = 1;
vecOffsRef[2] = srcCols << 1;
vecOffsRef[3] = (srcCols << 1) + 1;
pDataRow = pDataSrc;
pDataDestRow = pDataDest;
i = srcCols;
do
{
pDataC = (uint32_t const *) pDataRow;
pDataDestR = pDataDestRow;
vecOffsCur = vecOffsRef;
blkCnt = (srcRows * CMPLX_DIM) >> 2;
while (blkCnt > 0U)
{
vecIn = vldrwq_gather_shifted_offset(pDataC, vecOffsCur);
vstrwq(pDataDestR, vecIn);
pDataDestR += 4;
vecOffsCur = vaddq(vecOffsCur, (srcCols << 2));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = (srcRows * CMPLX_DIM) & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecIn = vldrwq_gather_shifted_offset(pDataC, vecOffsCur);
vstrwq_p(pDataDestR, vecIn, p0);
}
pDataRow += CMPLX_DIM;
pDataDestRow += (srcRows * CMPLX_DIM);
}
while (--i);
return (ARM_MATH_SUCCESS);
}
__STATIC_INLINE arm_status arm_mat_cmplx_trans_16bit(
uint16_t srcRows,
uint16_t srcCols,
uint16_t *pDataSrc,
uint16_t dstRows,
uint16_t dstCols,
uint16_t *pDataDest)
{
static const uint16_t loadCmplxCol[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
int i;
uint16x8_t vecOffsRef, vecOffsCur;
uint16_t const *pDataC;
uint16_t *pDataRow;
uint16_t *pDataDestR, *pDataDestRow;
uint32_t blkCnt;
uint16x8_t vecIn;
#ifdef ARM_MATH_MATRIX_CHECK
/*
* Check for matrix mismatch condition
*/
if ((srcRows != dstCols) || (srcCols != dstRows))
{
/*
* Set status as ARM_MATH_SIZE_MISMATCH
*/
return = ARM_MATH_SIZE_MISMATCH;
}
#else
(void)dstRows;
(void)dstCols;
#endif
/*
* 2x2, 3x3 and 4x4 specialization to be added
*/
/*
* build [0, 1, 2xcol, 2xcol+1, 4xcol, 4xcol+1, 6xcol, 6xcol+1]
*/
vecOffsRef = vldrhq_u16((uint16_t const *) loadCmplxCol);
vecOffsRef = vmulq(vecOffsRef, (uint16_t) (srcCols * CMPLX_DIM))
+ viwdupq_u16((uint32_t)0, (uint16_t) 2, 1);
pDataRow = pDataSrc;
pDataDestRow = pDataDest;
i = srcCols;
do
{
pDataC = (uint16_t const *) pDataRow;
pDataDestR = pDataDestRow;
vecOffsCur = vecOffsRef;
blkCnt = (srcRows * CMPLX_DIM) >> 3;
while (blkCnt > 0U)
{
vecIn = vldrhq_gather_shifted_offset(pDataC, vecOffsCur);
vstrhq(pDataDestR, vecIn);
pDataDestR+= 8; // VEC_LANES_U16
vecOffsCur = vaddq(vecOffsCur, (srcCols << 3));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = (srcRows * CMPLX_DIM) & 0x7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecIn = vldrhq_gather_shifted_offset(pDataC, vecOffsCur);
vstrhq_p(pDataDestR, vecIn, p0);
}
pDataRow += CMPLX_DIM;
pDataDestRow += (srcRows * CMPLX_DIM);
}
while (--i);
return (ARM_MATH_SUCCESS);
}
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q31_MVE)
__STATIC_INLINE q31x4_t FAST_VSQRT_Q31(q31x4_t vecIn)
{

@ -2502,6 +2502,18 @@ arm_status arm_mat_trans_f32(
const arm_matrix_instance_f32 * pSrc,
arm_matrix_instance_f32 * pDst);
/**
* @brief Floating-point complex matrix transpose.
* @param[in] pSrc points to the input matrix
* @param[out] pDst points to the output matrix
* @return The function returns either <code>ARM_MATH_SIZE_MISMATCH</code>
* or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/
arm_status arm_mat_cmplx_trans_f32(
const arm_matrix_instance_f32 * pSrc,
arm_matrix_instance_f32 * pDst);
/**
* @brief Q15 matrix transpose.
* @param[in] pSrc points to the input matrix
@ -2513,6 +2525,17 @@ arm_status arm_mat_trans_q15(
const arm_matrix_instance_q15 * pSrc,
arm_matrix_instance_q15 * pDst);
/**
* @brief Q15 complex matrix transpose.
* @param[in] pSrc points to the input matrix
* @param[out] pDst points to the output matrix
* @return The function returns either <code>ARM_MATH_SIZE_MISMATCH</code>
* or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/
arm_status arm_mat_cmplx_trans_q15(
const arm_matrix_instance_q15 * pSrc,
arm_matrix_instance_q15 * pDst);
/**
* @brief Q7 matrix transpose.
* @param[in] pSrc points to the input matrix
@ -2535,6 +2558,17 @@ arm_status arm_mat_trans_q31(
const arm_matrix_instance_q31 * pSrc,
arm_matrix_instance_q31 * pDst);
/**
* @brief Q31 complex matrix transpose.
* @param[in] pSrc points to the input matrix
* @param[out] pDst points to the output matrix
* @return The function returns either <code>ARM_MATH_SIZE_MISMATCH</code>
* or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/
arm_status arm_mat_cmplx_trans_q31(
const arm_matrix_instance_q31 * pSrc,
arm_matrix_instance_q31 * pDst);
/**
* @brief Floating-point matrix multiplication
* @param[in] pSrcA points to the first input matrix structure

@ -5,9 +5,31 @@ project(CMSISDSPMatrix)
include(configLib)
include(configDsp)
file(GLOB SRC "./*_*.c")
add_library(CMSISDSPMatrix STATIC ${SRC})
file(GLOB SRCF64 "./*_f64.c")
file(GLOB SRCF32 "./*_f32.c")
file(GLOB SRCF16 "./*_f16.c")
file(GLOB SRCQ31 "./*_q31.c")
file(GLOB SRCQ15 "./*_q15.c")
file(GLOB SRCQ7 "./*_q7.c")
file(GLOB SRCU32 "./*_u32.c")
file(GLOB SRCU16 "./*_u16.c")
file(GLOB SRCU8 "./*_u8.c")
add_library(CMSISDSPMatrix STATIC ${SRCF64})
target_sources(CMSISDSPMatrix PRIVATE ${SRCF32})
if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
target_sources(CMSISDSPMatrix PRIVATE ${SRCF16})
endif()
target_sources(CMSISDSPMatrix PRIVATE ${SRCQ31})
target_sources(CMSISDSPMatrix PRIVATE ${SRCQ15})
target_sources(CMSISDSPMatrix PRIVATE ${SRCQ7})
target_sources(CMSISDSPMatrix PRIVATE ${SRCU32})
target_sources(CMSISDSPMatrix PRIVATE ${SRCU16})
target_sources(CMSISDSPMatrix PRIVATE ${SRCU8})
configLib(CMSISDSPMatrix ${ROOT})
configDsp(CMSISDSPMatrix ${ROOT})

@ -57,3 +57,7 @@
#include "arm_mat_vec_mult_q31.c"
#include "arm_mat_vec_mult_q15.c"
#include "arm_mat_vec_mult_q7.c"
#include "arm_mat_cmplx_trans_f32.c"
#include "arm_mat_cmplx_trans_q31.c"
#include "arm_mat_cmplx_trans_q15.c"

@ -0,0 +1,133 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mat_cmplx_trans_f32.c
* Description: Floating-point complex matrix transpose
*
* $Date: 08. July 2020
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMatrix
*/
/**
@defgroup MatrixTrans Matrix Transpose
Tranposes a matrix.
Transposing an <code>M x N</code> matrix flips it around the center diagonal and results in an <code>N x M</code> matrix.
\image html MatrixTranspose.gif "Transpose of a 3 x 3 matrix"
*/
/**
@addtogroup MatrixTrans
@{
*/
/**
@brief Floating-point matrix transpose.
@param[in] pSrc points to input matrix
@param[out] pDst points to output matrix
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
arm_status arm_mat_cmplx_trans_f32(const arm_matrix_instance_f32 * pSrc, arm_matrix_instance_f32 * pDst)
{
return arm_mat_cmplx_trans_32bit(pSrc->numRows, pSrc->numCols, (uint32_t *) pSrc->pData,
pDst->numRows, pDst->numCols, (uint32_t *) pDst->pData);
}
#else
arm_status arm_mat_cmplx_trans_f32(
const arm_matrix_instance_f32 * pSrc,
arm_matrix_instance_f32 * pDst)
{
float32_t *pIn = pSrc->pData; /* input data matrix pointer */
float32_t *pOut = pDst->pData; /* output data matrix pointer */
float32_t *px; /* Temporary output data matrix pointer */
uint16_t nRows = pSrc->numRows; /* number of rows */
uint16_t nColumns = pSrc->numCols; /* number of columns */
uint16_t col, i = 0U, row = nRows; /* loop counters */
arm_status status; /* status of matrix transpose */
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
{
/* Matrix transpose by exchanging the rows with columns */
/* row loop */
do
{
/* The pointer px is set to starting address of the column being processed */
px = pOut + CMPLX_DIM * i;
/* Initialize column loop counter */
col = nColumns;
while (col > 0U)
{
/* Read and store the input element in the destination */
px[0] = *pIn++; // real
px[1] = *pIn++; // imag
/* Update the pointer px to point to the next row of the transposed matrix */
px += CMPLX_DIM * nRows;
/* Decrement the column loop counter */
col--;
}
i++;
/* Decrement the row loop counter */
row--;
} while (row > 0U); /* row loop end */
/* Set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return (status);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of MatrixTrans group
*/

@ -0,0 +1,133 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mat_cmplx_trans_q31.c
* Description: Q15 complex matrix transpose
*
* $Date: 08. July 2020
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMatrix
*/
/**
@defgroup MatrixTrans Matrix Transpose
Tranposes a matrix.
Transposing an <code>M x N</code> matrix flips it around the center diagonal and results in an <code>N x M</code> matrix.
\image html MatrixTranspose.gif "Transpose of a 3 x 3 matrix"
*/
/**
@addtogroup MatrixTrans
@{
*/
/**
@brief Q15 complex matrix transpose.
@param[in] pSrc points to input matrix
@param[out] pDst points to output matrix
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
arm_status arm_mat_cmplx_trans_q15(const arm_matrix_instance_q15 * pSrc, arm_matrix_instance_q15 * pDst)
{
return arm_mat_cmplx_trans_16bit(pSrc->numRows, pSrc->numCols, (uint16_t *) pSrc->pData,
pDst->numRows, pDst->numCols, (uint16_t *) pDst->pData);
}
#else
arm_status arm_mat_cmplx_trans_q15(
const arm_matrix_instance_q15 * pSrc,
arm_matrix_instance_q15 * pDst)
{
q15_t *pSrcA = pSrc->pData; /* input data matrix pointer */
q15_t *pOut = pDst->pData; /* output data matrix pointer */
uint16_t nRows = pSrc->numRows; /* number of nRows */
uint16_t nColumns = pSrc->numCols; /* number of nColumns */
uint16_t col, row = nRows, i = 0U; /* row and column loop counters */
arm_status status; /* status of matrix transpose */
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
{
/* Matrix transpose by exchanging the rows with columns */
/* row loop */
do
{
/* The pointer pOut is set to starting address of the column being processed */
pOut = pDst->pData + CMPLX_DIM * i;
/* Initialize column loop counter */
col = nColumns;
while (col > 0U)
{
/* Read and store the input element in the destination */
pOut[0] = *pSrcA++; //real
pOut[1] = *pSrcA++; //imag
/* Update the pointer pOut to point to the next row of the transposed matrix */
pOut += CMPLX_DIM *nRows;
/* Decrement the column loop counter */
col--;
}
i++;
/* Decrement the row loop counter */
row--;
} while (row > 0U);
/* set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return (status);
}
#endif /* defined(ARM_MATH_MVEI) */
/**
* @} end of MatrixTrans group
*/

@ -0,0 +1,136 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mat_cmplx_trans_q31.c
* Description: Q31 complex matrix transpose
*
* $Date: 08. July 2020
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
@ingroup groupMatrix
*/
/**
@defgroup MatrixTrans Matrix Transpose
Tranposes a matrix.
Transposing an <code>M x N</code> matrix flips it around the center diagonal and results in an <code>N x M</code> matrix.
\image html MatrixTranspose.gif "Transpose of a 3 x 3 matrix"
*/
/**
@addtogroup MatrixTrans
@{
*/
/**
@brief Q31 complex matrix transpose.
@param[in] pSrc points to input matrix
@param[out] pDst points to output matrix
@return execution status
- \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
arm_status arm_mat_cmplx_trans_q31(const arm_matrix_instance_q31 * pSrc, arm_matrix_instance_q31 * pDst)
{
return arm_mat_cmplx_trans_32bit(pSrc->numRows, pSrc->numCols, (uint32_t *) pSrc->pData,
pDst->numRows, pDst->numCols, (uint32_t *) pDst->pData);
}
#else
arm_status arm_mat_cmplx_trans_q31(
const arm_matrix_instance_q31 * pSrc,
arm_matrix_instance_q31 * pDst)
{
q31_t *pIn = pSrc->pData; /* input data matrix pointer */
q31_t *pOut = pDst->pData; /* output data matrix pointer */
q31_t *px; /* Temporary output data matrix pointer */
uint16_t nRows = pSrc->numRows; /* number of nRows */
uint16_t nColumns = pSrc->numCols; /* number of nColumns */
uint16_t col, i = 0U, row = nRows; /* loop counters */
arm_status status; /* status of matrix transpose */
#ifdef ARM_MATH_MATRIX_CHECK
/* Check for matrix mismatch condition */
if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
{
/* Set status as ARM_MATH_SIZE_MISMATCH */
status = ARM_MATH_SIZE_MISMATCH;
}
else
#endif /* #ifdef ARM_MATH_MATRIX_CHECK */
{
/* Matrix transpose by exchanging the rows with columns */
/* row loop */
do
{
/* The pointer px is set to starting address of the column being processed */
px = pOut + CMPLX_DIM * i;
/* Initialize column loop counter */
col = nColumns;
while (col > 0U)
{
/* Read and store the input element in the destination */
px[0] = *pIn++; // real
px[1] = *pIn++; // imag
/* Update the pointer px to point to the next row of the transposed matrix */
px += CMPLX_DIM * nRows;
/* Decrement the column loop counter */
col--;
}
i++;
/* Decrement the row loop counter */
row--;
}
while (row > 0U); /* row loop end */
/* set status as ARM_MATH_SUCCESS */
status = ARM_MATH_SUCCESS;
}
/* Return to application */
return (status);
}
#endif /* defined(ARM_MATH_MVEI) */
/**
* @} end of MatrixTrans group
*/

@ -602,6 +602,11 @@ def writeUnaryTests(config,format):
if format == Tools.Q7:
data1 = data1 / 4.0
data1C=randComplex(NBSAMPLES)
if format == Tools.Q7:
data1C = data1C / 4.0
data2=np.random.randn(NBSAMPLES)
data2 = Tools.normalize(data2)
@ -612,6 +617,8 @@ def writeUnaryTests(config,format):
config.writeInput(1, data1,"InputA")
config.writeInput(1, asReal(data1C),"InputAC")
config.writeInput(1, data2,"InputB")
config.writeInput(1, vecdata,"InputVec")
@ -664,6 +671,14 @@ def writeUnaryTests(config,format):
vals = vals + r
config.writeReference(1, vals,"RefTranspose")
vals = []
for (a,b) in unarySizes:
ma = np.copy(data1C[0:a*b]).reshape(a,b)
r = np.transpose(ma)
r = list(asReal(r.reshape(a*b)))
vals = vals + r
config.writeReference(1, vals,"RefTransposeC")
vals = []
for (a,b) in unarySizes:
ma = np.copy(data1[0:a*b]).reshape(a,b)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -17,6 +17,11 @@
arm_mat_trans_f32(&this->in1,&this->out);
}
void UnaryF32::test_mat_cmplx_trans_f32()
{
arm_mat_cmplx_trans_f32(&this->in1,&this->out);
}
void UnaryF32::test_mat_add_f32()
{
arm_mat_add_f32(&this->in1,&this->in1,&this->out);
@ -43,12 +48,30 @@
switch(id)
{
case TEST_MAT_VEC_MULT_F32_6:
input1.reload(UnaryF32::INPUTA_F32_ID,mgr,this->nbr*this->nbc);
vec.reload(UnaryF32::INPUTVEC1_F32_ID,mgr,this->nbc);
output.create(this->nbr,UnaryF32::OUT_F32_ID,mgr);
vecp=vec.ptr();
outp=output.ptr();
break;
case TEST_MAT_TRANS_F32_3:
input1.reload(UnaryF32::INPUTA_F32_ID,mgr,this->nbr*this->nbc);
output.create(this->nbr*this->nbc,UnaryF32::OUT_F32_ID,mgr);
this->out.numRows = this->nbc;
this->out.numCols = this->nbr;
this->out.pData = output.ptr();
break;
case TEST_MAT_CMPLX_TRANS_F32_7:
input1.reload(UnaryF32::INPUTAC_F32_ID,mgr,2*this->nbr*this->nbc);
output.create(2*this->nbr*this->nbc,UnaryF32::OUT_F32_ID,mgr);
this->out.numRows = this->nbc;
this->out.numCols = this->nbr;
this->out.pData = output.ptr();
break;
default:
input1.reload(UnaryF32::INPUTA_F32_ID,mgr,this->nbr*this->nbc);
output.create(this->nbr*this->nbc,UnaryF32::OUT_F32_ID,mgr);
this->out.numRows = this->nbr;
@ -57,7 +80,6 @@
break;
}
input1.reload(UnaryF32::INPUTA_F32_ID,mgr,this->nbr*this->nbc);

@ -12,6 +12,11 @@
arm_mat_trans_q15(&this->in1,&this->out);
}
void UnaryQ15::test_mat_cmplx_trans_q15()
{
arm_mat_cmplx_trans_q15(&this->in1,&this->out);
}
void UnaryQ15::test_mat_add_q15()
{
arm_mat_add_q15(&this->in1,&this->in1,&this->out);
@ -38,12 +43,30 @@
switch(id)
{
case TEST_MAT_VEC_MULT_Q15_5:
input1.reload(UnaryQ15::INPUTA_Q15_ID,mgr,this->nbr*this->nbc);
vec.reload(UnaryQ15::INPUTVEC1_Q15_ID,mgr,this->nbc);
output.create(this->nbr,UnaryQ15::OUT_Q15_ID,mgr);
vecp=vec.ptr();
outp=output.ptr();
break;
case TEST_MAT_TRANS_Q15_2:
input1.reload(UnaryQ15::INPUTA_Q15_ID,mgr,this->nbr*this->nbc);
output.create(this->nbr*this->nbc,UnaryQ15::OUT_Q15_ID,mgr);
this->out.numRows = this->nbc;
this->out.numCols = this->nbr;
this->out.pData = output.ptr();
break;
case TEST_MAT_CMPLX_TRANS_Q15_6:
input1.reload(UnaryQ15::INPUTAC_Q15_ID,mgr,2*this->nbr*this->nbc);
output.create(2*this->nbr*this->nbc,UnaryQ15::OUT_Q15_ID,mgr);
this->out.numRows = this->nbc;
this->out.numCols = this->nbr;
this->out.pData = output.ptr();
break;
default:
input1.reload(UnaryQ15::INPUTA_Q15_ID,mgr,this->nbr*this->nbc);
output.create(this->nbr*this->nbc,UnaryQ15::OUT_Q15_ID,mgr);
this->out.numRows = this->nbr;
@ -52,7 +75,7 @@
break;
}
input1.reload(UnaryQ15::INPUTA_Q15_ID,mgr,this->nbr*this->nbc);
this->in1.numRows = this->nbr;

@ -12,6 +12,11 @@
arm_mat_trans_q31(&this->in1,&this->out);
}
void UnaryQ31::test_mat_cmplx_trans_q31()
{
arm_mat_cmplx_trans_q31(&this->in1,&this->out);
}
void UnaryQ31::test_mat_add_q31()
{
arm_mat_add_q31(&this->in1,&this->in1,&this->out);
@ -38,12 +43,30 @@
switch(id)
{
case TEST_MAT_VEC_MULT_Q31_5:
input1.reload(UnaryQ31::INPUTA_Q31_ID,mgr,this->nbr*this->nbc);
vec.reload(UnaryQ31::INPUTVEC1_Q31_ID,mgr,this->nbc);
output.create(this->nbr,UnaryQ31::OUT_Q31_ID,mgr);
vecp=vec.ptr();
outp=output.ptr();
break;
case TEST_MAT_TRANS_Q31_2:
input1.reload(UnaryQ31::INPUTA_Q31_ID,mgr,this->nbr*this->nbc);
output.create(this->nbr*this->nbc,UnaryQ31::OUT_Q31_ID,mgr);
this->out.numRows = this->nbc;
this->out.numCols = this->nbr;
this->out.pData = output.ptr();
break;
case TEST_MAT_CMPLX_TRANS_Q31_6:
input1.reload(UnaryQ31::INPUTAC_Q31_ID,mgr,2*this->nbr*this->nbc);
output.create(2*this->nbr*this->nbc,UnaryQ31::OUT_Q31_ID,mgr);
this->out.numRows = this->nbc;
this->out.numCols = this->nbr;
this->out.pData = output.ptr();
break;
default:
input1.reload(UnaryQ31::INPUTA_Q31_ID,mgr,this->nbr*this->nbc);
output.create(this->nbr*this->nbc,UnaryQ31::OUT_Q31_ID,mgr);
this->out.numRows = this->nbr;
@ -52,7 +75,6 @@
break;
}
input1.reload(UnaryQ31::INPUTA_Q31_ID,mgr,this->nbr*this->nbc);
this->in1.numRows = this->nbr;

@ -86,6 +86,24 @@ But big matrix needed for checking the vectorized code */
} \
out.pData = outp;
#define PREPAREDATA1C(TRANSPOSED) \
in1.numRows=rows; \
in1.numCols=columns; \
memcpy((void*)ap,(const void*)inp1,2*sizeof(float32_t)*rows*columns);\
in1.pData = ap; \
\
if (TRANSPOSED) \
{ \
out.numRows=columns; \
out.numCols=rows; \
} \
else \
{ \
out.numRows=rows; \
out.numCols=columns; \
} \
out.pData = outp;
#define LOADVECDATA2() \
const float32_t *inp1=input1.ptr(); \
const float32_t *inp2=input2.ptr(); \
@ -234,6 +252,31 @@ void UnaryTestsF32::test_mat_trans_f32()
}
void UnaryTestsF32::test_mat_cmplx_trans_f32()
{
LOADDATA1();
for(i=0;i < nbMatrixes ; i ++)
{
rows = *dimsp++;
columns = *dimsp++;
PREPAREDATA1C(true);
arm_mat_cmplx_trans_f32(&this->in1,&this->out);
outp += 2*(rows * columns);
}
ASSERT_EMPTY_TAIL(output);
ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
ASSERT_CLOSE_ERROR(output,ref,ABS_ERROR,REL_ERROR);
}
void UnaryTestsF32::test_mat_inverse_f32()
{
const float32_t *inp1=input1.ptr();
@ -342,6 +385,16 @@ void UnaryTestsF32::test_mat_inverse_f32()
a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPA_F32_ID,mgr);
b.create(MAXMATRIXDIM,UnaryTestsF32::TMPB_F32_ID,mgr);
break;
case TEST_MAT_CMPLX_TRANS_F32_7:
input1.reload(UnaryTestsF32::INPUTSC1_F32_ID,mgr);
dims.reload(UnaryTestsF32::DIMSUNARY1_S16_ID,mgr);
ref.reload(UnaryTestsF32::REFTRANSC1_F32_ID,mgr);
output.create(ref.nbSamples(),UnaryTestsF32::OUT_F32_ID,mgr);
a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPA_F32_ID,mgr);
break;
}

@ -75,6 +75,24 @@ a double precision computation.
} \
out.pData = outp;
#define PREPAREDATA1C(TRANSPOSED) \
in1.numRows=rows; \
in1.numCols=columns; \
memcpy((void*)ap,(const void*)inp1,2*sizeof(q15_t)*rows*columns);\
in1.pData = ap; \
\
if (TRANSPOSED) \
{ \
out.numRows=columns; \
out.numCols=rows; \
} \
else \
{ \
out.numRows=rows; \
out.numCols=columns; \
} \
out.pData = outp;
#define LOADVECDATA2() \
const q15_t *inp1=input1.ptr(); \
const q15_t *inp2=input2.ptr(); \
@ -222,6 +240,31 @@ void UnaryTestsQ15::test_mat_trans_q15()
}
void UnaryTestsQ15::test_mat_cmplx_trans_q15()
{
LOADDATA1();
for(i=0;i < nbMatrixes ; i ++)
{
rows = *dimsp++;
columns = *dimsp++;
PREPAREDATA1C(true);
arm_mat_cmplx_trans_q15(&this->in1,&this->out);
outp += 2*(rows * columns);
}
ASSERT_EMPTY_TAIL(output);
ASSERT_SNR(output,ref,(q15_t)SNR_THRESHOLD);
ASSERT_NEAR_EQ(output,ref,ABS_ERROR_Q15);
}
void UnaryTestsQ15::setUp(Testing::testID_t id,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr)
{
@ -286,6 +329,16 @@ void UnaryTestsQ15::test_mat_trans_q15()
b.create(MAXMATRIXDIM,UnaryTestsQ15::TMPB_Q15_ID,mgr);
break;
case TEST_MAT_CMPLX_TRANS_Q15_6:
input1.reload(UnaryTestsQ15::INPUTSC1_Q15_ID,mgr);
dims.reload(UnaryTestsQ15::DIMSUNARY1_S16_ID,mgr);
ref.reload(UnaryTestsQ15::REFTRANSC1_Q15_ID,mgr);
output.create(ref.nbSamples(),UnaryTestsQ15::OUT_Q15_ID,mgr);
a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsQ15::TMPA_Q15_ID,mgr);
break;
}

@ -75,6 +75,24 @@ a double precision computation.
} \
out.pData = outp;
#define PREPAREDATA1C(TRANSPOSED) \
in1.numRows=rows; \
in1.numCols=columns; \
memcpy((void*)ap,(const void*)inp1,2*sizeof(q31_t)*rows*columns);\
in1.pData = ap; \
\
if (TRANSPOSED) \
{ \
out.numRows=columns; \
out.numCols=rows; \
} \
else \
{ \
out.numRows=rows; \
out.numCols=columns; \
} \
out.pData = outp;
#define LOADVECDATA2() \
const q31_t *inp1=input1.ptr(); \
const q31_t *inp2=input2.ptr(); \
@ -222,6 +240,31 @@ void UnaryTestsQ31::test_mat_trans_q31()
}
void UnaryTestsQ31::test_mat_cmplx_trans_q31()
{
LOADDATA1();
for(i=0;i < nbMatrixes ; i ++)
{
rows = *dimsp++;
columns = *dimsp++;
PREPAREDATA1C(true);
arm_mat_cmplx_trans_q31(&this->in1,&this->out);
outp += 2*(rows * columns);
}
ASSERT_EMPTY_TAIL(output);
ASSERT_SNR(output,ref,(q31_t)SNR_THRESHOLD);
ASSERT_NEAR_EQ(output,ref,ABS_ERROR_Q31);
}
void UnaryTestsQ31::setUp(Testing::testID_t id,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr)
{
@ -286,6 +329,16 @@ void UnaryTestsQ31::test_mat_trans_q31()
b.create(MAXMATRIXDIM,UnaryTestsQ31::TMPB_Q31_ID,mgr);
break;
case TEST_MAT_CMPLX_TRANS_Q31_6:
input1.reload(UnaryTestsQ31::INPUTSC1_Q31_ID,mgr);
dims.reload(UnaryTestsQ31::DIMSUNARY1_S16_ID,mgr);
ref.reload(UnaryTestsQ31::REFTRANSC1_Q31_ID,mgr);
output.create(ref.nbSamples(),UnaryTestsQ31::OUT_Q31_ID,mgr);
a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsQ31::TMPA_Q31_ID,mgr);
break;
}

@ -923,6 +923,7 @@ group Root {
}
Pattern INPUTA_F32_ID : InputA1_f32.txt
Pattern INPUTAC_F32_ID : InputAC1_f32.txt
Pattern INPUTVEC1_F32_ID : InputVec1_f32.txt
Output OUT_F32_ID : Output
@ -939,6 +940,7 @@ group Root {
Matrix Addition:test_mat_add_f32
Matrix Substraction:test_mat_sub_f32
Matrix Vector Multiplication:test_mat_vec_mult_f32
Matrix Complex Transpose:test_mat_cmplx_trans_f32
} -> PARAM1_ID
}
@ -954,6 +956,7 @@ group Root {
}
Pattern INPUTA_Q31_ID : InputA1_q31.txt
Pattern INPUTAC_Q31_ID : InputAC1_q31.txt
Pattern INPUTVEC1_Q31_ID : InputVec1_q31.txt
Output OUT_Q31_ID : Output
@ -969,6 +972,7 @@ group Root {
Matrix Addition:test_mat_add_q31
Matrix Substraction:test_mat_sub_q31
Matrix Vector Multiplication:test_mat_vec_mult_q31
Matrix Complex Transpose:test_mat_cmplx_trans_q31
} -> PARAM1_ID
}
@ -984,6 +988,7 @@ group Root {
}
Pattern INPUTA_Q15_ID : InputA1_q15.txt
Pattern INPUTAC_Q15_ID : InputAC1_q15.txt
Pattern INPUTVEC1_Q15_ID : InputVec1_q15.txt
Output OUT_Q15_ID : Output
@ -998,6 +1003,7 @@ group Root {
Matrix Addition:test_mat_add_q15
Matrix Substraction:test_mat_sub_q15
Matrix Vector Multiplication:test_mat_vec_mult_q15
Matrix Complex Transpose:test_mat_cmplx_trans_q15
} -> PARAM1_ID
}

@ -2284,6 +2284,8 @@ group Root {
folder = UnaryF32
Pattern INPUTS1_F32_ID : InputA1_f32.txt
Pattern INPUTSC1_F32_ID : InputAC1_f32.txt
Pattern INPUTS2_F32_ID : InputB1_f32.txt
Pattern INPUTVEC1_F32_ID : InputVec1_f32.txt
@ -2295,6 +2297,7 @@ group Root {
Pattern REFSUB1_F32_ID : RefSub1_f32.txt
Pattern REFSCALE1_F32_ID : RefScale1_f32.txt
Pattern REFTRANS1_F32_ID : RefTranspose1_f32.txt
Pattern REFTRANSC1_F32_ID : RefTransposeC1_f32.txt
Pattern REFINV1_F32_ID : RefInvert1_f32.txt
Pattern REFVECMUL1_F32_ID : RefVecMul1_f32.txt
@ -2310,6 +2313,7 @@ group Root {
test matrix transpose:test_mat_trans_f32
test matrix inverse:test_mat_inverse_f32
test mat mult vec:test_mat_vec_mult_f32
test matrix complex transpose:test_mat_cmplx_trans_f32
}
}
@ -2319,6 +2323,7 @@ group Root {
folder = UnaryQ31
Pattern INPUTS1_Q31_ID : InputA1_q31.txt
Pattern INPUTSC1_Q31_ID : InputAC1_q31.txt
Pattern INPUTS2_Q31_ID : InputB1_q31.txt
Pattern INPUTVEC1_Q31_ID : InputVec1_q31.txt
@ -2330,6 +2335,7 @@ group Root {
Pattern REFSUB1_Q31_ID : RefSub1_q31.txt
Pattern REFSCALE1_Q31_ID : RefScale1_q31.txt
Pattern REFTRANS1_Q31_ID : RefTranspose1_q31.txt
Pattern REFTRANSC1_Q31_ID : RefTransposeC1_q31.txt
Pattern REFINV1_Q31_ID : RefInvert1_q31.txt
Pattern REFVECMUL1_Q31_ID : RefVecMul1_q31.txt
@ -2343,6 +2349,7 @@ group Root {
test matrix scale:test_mat_scale_q31
test matrix transpose:test_mat_trans_q31
test mat mult vec:test_mat_vec_mult_q31
test matrix complex transpose:test_mat_cmplx_trans_q31
}
}
@ -2352,6 +2359,7 @@ group Root {
folder = UnaryQ15
Pattern INPUTS1_Q15_ID : InputA1_q15.txt
Pattern INPUTSC1_Q15_ID : InputAC1_q15.txt
Pattern INPUTS2_Q15_ID : InputB1_q15.txt
Pattern INPUTVEC1_Q15_ID : InputVec1_q15.txt
@ -2363,6 +2371,7 @@ group Root {
Pattern REFSUB1_Q15_ID : RefSub1_q15.txt
Pattern REFSCALE1_Q15_ID : RefScale1_q15.txt
Pattern REFTRANS1_Q15_ID : RefTranspose1_q15.txt
Pattern REFTRANSC1_Q15_ID : RefTransposeC1_q15.txt
Pattern REFINV1_Q15_ID : RefInvert1_q15.txt
Pattern REFVECMUL1_Q15_ID : RefVecMul1_q15.txt
@ -2376,6 +2385,7 @@ group Root {
test matrix scale:test_mat_scale_q15
test matrix transpose:test_mat_trans_q15
test mat mult vec:test_mat_vec_mult_q15
test matrix complex transpose:test_mat_cmplx_trans_q15
}
}
@ -2385,6 +2395,7 @@ group Root {
folder = UnaryQ7
Pattern INPUTS1_Q7_ID : InputA1_q7.txt
Pattern INPUTSC1_Q7_ID : InputAC1_q7.txt
Pattern INPUTS2_Q7_ID : InputB1_q7.txt
Pattern INPUTVEC1_Q7_ID : InputVec1_q7.txt
@ -2396,6 +2407,7 @@ group Root {
Pattern REFSUB1_Q7_ID : RefSub1_q7.txt
Pattern REFSCALE1_Q7_ID : RefScale1_q7.txt
Pattern REFTRANS1_Q7_ID : RefTranspose1_q7.txt
Pattern REFTRANSC1_Q7_ID : RefTransposeC1_q7.txt
Pattern REFINV1_Q7_ID : RefInvert1_q7.txt
Pattern REFVECMUL1_Q7_ID : RefVecMul1_q7.txt

Loading…
Cancel
Save