CMSIS-DSP: Added new matrix functions and their MVE implementation

arm_mat_cmplx_trans_f32.c arm_mat_cmplx_trans_q15.c arm_mat_cmplx_trans_q31.c
6 years ago · a20e215bfc
parent 8268b079d5
commit a20e215bfc
26 changed files with 110693 additions and 6 deletions
--- a/Include/arm_helium_utils.h
+++ b/Include/arm_helium_utils.h
@ -231,6 +231,172 @@ __STATIC_INLINE arm_status arm_mat_trans_32bit_generic_mve(
    return (ARM_MATH_SUCCESS);
 }

+__STATIC_INLINE arm_status arm_mat_cmplx_trans_32bit(
+    uint16_t    srcRows,
+    uint16_t    srcCols,
+    uint32_t   *pDataSrc,
+    uint16_t    dstRows,
+    uint16_t    dstCols,
+    uint32_t   *pDataDest)
+{
+    uint32_t        i;
+    uint32_t const *pDataC;
+    uint32_t       *pDataRow;
+    uint32_t       *pDataDestR, *pDataDestRow;
+    uint32x4_t      vecOffsRef, vecOffsCur;
+    uint32_t        blkCnt;
+    uint32x4_t      vecIn;
+
+#ifdef ARM_MATH_MATRIX_CHECK
+    /*
+     * Check for matrix mismatch condition
+     */
+    if ((srcRows != dstCols) || (srcCols != dstRows))
+    {
+        /*
+         * Set status as ARM_MATH_SIZE_MISMATCH
+         */
+        return = ARM_MATH_SIZE_MISMATCH;
+    }
+#else
+    (void)dstRows;
+    (void)dstCols;
+#endif
+
+    /* 2x2, 3x3 and 4x4 specialization to be added */
+
+    vecOffsRef[0] = 0;
+    vecOffsRef[1] = 1;
+    vecOffsRef[2] = srcCols << 1;
+    vecOffsRef[3] = (srcCols << 1) + 1;
+
+    pDataRow = pDataSrc;
+    pDataDestRow = pDataDest;
+    i = srcCols;
+    do
+    {
+        pDataC = (uint32_t const *) pDataRow;
+        pDataDestR = pDataDestRow;
+        vecOffsCur = vecOffsRef;
+
+        blkCnt = (srcRows * CMPLX_DIM) >> 2;
+        while (blkCnt > 0U)
+        {
+            vecIn = vldrwq_gather_shifted_offset(pDataC, vecOffsCur);
+            vstrwq(pDataDestR, vecIn); 
+            pDataDestR += 4;
+            vecOffsCur = vaddq(vecOffsCur, (srcCols << 2));
+            /*
+             * Decrement the blockSize loop counter
+             */
+             blkCnt--;
+        }
+        /*
+         * tail
+         * (will be merged thru tail predication)
+         */
+        blkCnt = (srcRows * CMPLX_DIM) & 3;
+        if (blkCnt > 0U)
+        {
+            mve_pred16_t p0 = vctp32q(blkCnt);
+            vecIn = vldrwq_gather_shifted_offset(pDataC, vecOffsCur);
+            vstrwq_p(pDataDestR, vecIn, p0);
+        }
+
+        pDataRow += CMPLX_DIM;
+        pDataDestRow += (srcRows * CMPLX_DIM);
+    }
+    while (--i);
+
+    return (ARM_MATH_SUCCESS);
+}
+
+__STATIC_INLINE arm_status arm_mat_cmplx_trans_16bit(
+    uint16_t    srcRows,
+    uint16_t    srcCols,
+    uint16_t   *pDataSrc,
+    uint16_t    dstRows,
+    uint16_t    dstCols,
+    uint16_t   *pDataDest)
+{
+    static const uint16_t loadCmplxCol[8] = { 0, 0, 1, 1, 2, 2, 3, 3 };
+    int             i;
+    uint16x8_t    vecOffsRef, vecOffsCur;
+    uint16_t const *pDataC;
+    uint16_t       *pDataRow;
+    uint16_t       *pDataDestR, *pDataDestRow;
+    uint32_t        blkCnt;
+    uint16x8_t    vecIn;
+
+#ifdef ARM_MATH_MATRIX_CHECK
+    /*
+     * Check for matrix mismatch condition
+     */
+    if ((srcRows != dstCols) || (srcCols != dstRows))
+    {
+        /*
+         * Set status as ARM_MATH_SIZE_MISMATCH
+         */
+        return = ARM_MATH_SIZE_MISMATCH;
+    }
+#else
+    (void)dstRows;
+    (void)dstCols;
+#endif
+
+    /*
+     * 2x2, 3x3 and 4x4 specialization to be added
+     */
+
+
+    /*
+     * build  [0, 1, 2xcol, 2xcol+1, 4xcol, 4xcol+1, 6xcol, 6xcol+1]
+     */
+    vecOffsRef = vldrhq_u16((uint16_t const *) loadCmplxCol);
+    vecOffsRef = vmulq(vecOffsRef, (uint16_t) (srcCols * CMPLX_DIM))
+                    + viwdupq_u16((uint32_t)0, (uint16_t) 2, 1);
+
+    pDataRow = pDataSrc;
+    pDataDestRow = pDataDest;
+    i = srcCols;
+    do
+    {
+        pDataC = (uint16_t const *) pDataRow;
+        pDataDestR = pDataDestRow;
+        vecOffsCur = vecOffsRef;
+
+        blkCnt = (srcRows * CMPLX_DIM) >> 3;
+        while (blkCnt > 0U)
+        {
+            vecIn = vldrhq_gather_shifted_offset(pDataC, vecOffsCur);
+            vstrhq(pDataDestR, vecIn);  
+            pDataDestR+= 8; // VEC_LANES_U16
+            vecOffsCur = vaddq(vecOffsCur, (srcCols << 3));
+            /*
+             * Decrement the blockSize loop counter
+             */
+            blkCnt--;
+        }
+        /*
+         * tail
+         * (will be merged thru tail predication)
+         */
+        blkCnt = (srcRows * CMPLX_DIM) & 0x7;
+        if (blkCnt > 0U)
+        {
+            mve_pred16_t p0 = vctp16q(blkCnt);
+            vecIn = vldrhq_gather_shifted_offset(pDataC, vecOffsCur);
+            vstrhq_p(pDataDestR, vecIn, p0);
+        }
+
+        pDataRow += CMPLX_DIM;
+        pDataDestRow += (srcRows * CMPLX_DIM);
+    }
+    while (--i);
+
+    return (ARM_MATH_SUCCESS);
+}
+
 #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q31_MVE)
 __STATIC_INLINE q31x4_t FAST_VSQRT_Q31(q31x4_t vecIn)
 {
--- a/Include/arm_math.h
+++ b/Include/arm_math.h
@ -2502,6 +2502,18 @@ arm_status arm_mat_trans_f32(
  const arm_matrix_instance_f32 * pSrc,
        arm_matrix_instance_f32 * pDst);

+  /**
+   * @brief Floating-point complex matrix transpose.
+   * @param[in]  pSrc  points to the input matrix
+   * @param[out] pDst  points to the output matrix
+   * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
+   * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+arm_status arm_mat_cmplx_trans_f32(
+  const arm_matrix_instance_f32 * pSrc,
+  arm_matrix_instance_f32 * pDst);
+
+
  /**
   * @brief Q15 matrix transpose.
   * @param[in]  pSrc  points to the input matrix
@ -2513,6 +2525,17 @@ arm_status arm_mat_trans_q15(
  const arm_matrix_instance_q15 * pSrc,
        arm_matrix_instance_q15 * pDst);

+  /**
+   * @brief Q15 complex matrix transpose.
+   * @param[in]  pSrc  points to the input matrix
+   * @param[out] pDst  points to the output matrix
+   * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
+   * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+arm_status arm_mat_cmplx_trans_q15(
+  const arm_matrix_instance_q15 * pSrc,
+  arm_matrix_instance_q15 * pDst);
+
  /**
   * @brief Q7 matrix transpose.
   * @param[in]  pSrc  points to the input matrix
@ -2535,6 +2558,17 @@ arm_status arm_mat_trans_q31(
  const arm_matrix_instance_q31 * pSrc,
        arm_matrix_instance_q31 * pDst);

+  /**
+   * @brief Q31 complex matrix transpose.
+   * @param[in]  pSrc  points to the input matrix
+   * @param[out] pDst  points to the output matrix
+   * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
+   * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+arm_status arm_mat_cmplx_trans_q31(
+  const arm_matrix_instance_q31 * pSrc,
+  arm_matrix_instance_q31 * pDst);
+
  /**
   * @brief Floating-point matrix multiplication
   * @param[in]  pSrcA  points to the first input matrix structure
--- a/Source/MatrixFunctions/CMakeLists.txt
+++ b/Source/MatrixFunctions/CMakeLists.txt
@ -5,9 +5,31 @@ project(CMSISDSPMatrix)
 include(configLib)
 include(configDsp)

-file(GLOB SRC "./*_*.c")
-
-add_library(CMSISDSPMatrix STATIC ${SRC})
+file(GLOB SRCF64 "./*_f64.c")
+file(GLOB SRCF32 "./*_f32.c")
+file(GLOB SRCF16 "./*_f16.c")
+file(GLOB SRCQ31 "./*_q31.c")
+file(GLOB SRCQ15 "./*_q15.c")
+file(GLOB SRCQ7  "./*_q7.c")
+
+file(GLOB SRCU32 "./*_u32.c")
+file(GLOB SRCU16 "./*_u16.c")
+file(GLOB SRCU8  "./*_u8.c")
+
+add_library(CMSISDSPMatrix STATIC ${SRCF64})
+target_sources(CMSISDSPMatrix PRIVATE ${SRCF32})
+
+if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
+target_sources(CMSISDSPMatrix PRIVATE ${SRCF16})
+endif()
+
+target_sources(CMSISDSPMatrix PRIVATE ${SRCQ31})
+target_sources(CMSISDSPMatrix PRIVATE ${SRCQ15})
+target_sources(CMSISDSPMatrix PRIVATE ${SRCQ7})
+
+target_sources(CMSISDSPMatrix PRIVATE ${SRCU32})
+target_sources(CMSISDSPMatrix PRIVATE ${SRCU16})
+target_sources(CMSISDSPMatrix PRIVATE ${SRCU8})

 configLib(CMSISDSPMatrix ${ROOT})
 configDsp(CMSISDSPMatrix ${ROOT})
--- a/Source/MatrixFunctions/MatrixFunctions.c
+++ b/Source/MatrixFunctions/MatrixFunctions.c
@ -57,3 +57,7 @@
 #include "arm_mat_vec_mult_q31.c"
 #include "arm_mat_vec_mult_q15.c"
 #include "arm_mat_vec_mult_q7.c"
+#include "arm_mat_cmplx_trans_f32.c"
+#include "arm_mat_cmplx_trans_q31.c"
+#include "arm_mat_cmplx_trans_q15.c"
+
--- a/Source/MatrixFunctions/arm_mat_cmplx_trans_f32.c
+++ b/Source/MatrixFunctions/arm_mat_cmplx_trans_f32.c
@ -0,0 +1,133 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mat_cmplx_trans_f32.c
+ * Description:  Floating-point complex matrix transpose
+ *
+ * $Date:        08. July 2020
+ * $Revision:    V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+  @ingroup groupMatrix
+ */
+
+/**
+  @defgroup MatrixTrans Matrix Transpose
+
+  Tranposes a matrix.
+
+  Transposing an <code>M x N</code> matrix flips it around the center diagonal and results in an <code>N x M</code> matrix.
+  \image html MatrixTranspose.gif "Transpose of a 3 x 3 matrix"
+ */
+
+/**
+  @addtogroup MatrixTrans
+  @{
+ */
+
+/**
+  @brief         Floating-point matrix transpose.
+  @param[in]     pSrc      points to input matrix
+  @param[out]    pDst      points to output matrix
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS       : Operation successful
+                   - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
+ */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+
+arm_status arm_mat_cmplx_trans_f32(const arm_matrix_instance_f32 * pSrc, arm_matrix_instance_f32 * pDst)
+{
+    return arm_mat_cmplx_trans_32bit(pSrc->numRows, pSrc->numCols, (uint32_t *) pSrc->pData,
+                                   pDst->numRows, pDst->numCols, (uint32_t *) pDst->pData);
+}
+
+#else
+arm_status arm_mat_cmplx_trans_f32(
+  const arm_matrix_instance_f32 * pSrc,
+  arm_matrix_instance_f32 * pDst)
+{
+  float32_t *pIn = pSrc->pData;                  /* input data matrix pointer */
+  float32_t *pOut = pDst->pData;                 /* output data matrix pointer */
+  float32_t *px;                                 /* Temporary output data matrix pointer */
+  uint16_t nRows = pSrc->numRows;                /* number of rows */
+  uint16_t nColumns = pSrc->numCols;             /* number of columns */
+  uint16_t col, i = 0U, row = nRows;             /* loop counters */
+  arm_status status;                             /* status of matrix transpose  */
+
+
+#ifdef ARM_MATH_MATRIX_CHECK
+
+  /* Check for matrix mismatch condition */
+  if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
+  {
+    /* Set status as ARM_MATH_SIZE_MISMATCH */
+    status = ARM_MATH_SIZE_MISMATCH;
+  }
+  else
+#endif /*      #ifdef ARM_MATH_MATRIX_CHECK    */
+
+  {
+    /* Matrix transpose by exchanging the rows with columns */
+    /* row loop     */
+    do
+    {
+      /* The pointer px is set to starting address of the column being processed */
+      px = pOut + CMPLX_DIM * i;
+
+      /* Initialize column loop counter */
+      col = nColumns;
+
+      while (col > 0U)
+      {
+        /* Read and store the input element in the destination */
+        px[0] = *pIn++; // real
+        px[1] = *pIn++; // imag
+
+        /* Update the pointer px to point to the next row of the transposed matrix */
+        px += CMPLX_DIM * nRows;
+
+        /* Decrement the column loop counter */
+        col--;
+      }
+      i++;
+
+      /* Decrement the row loop counter */
+      row--;
+
+    } while (row > 0U);          /* row loop end  */
+
+    /* Set status as ARM_MATH_SUCCESS */
+    status = ARM_MATH_SUCCESS;
+  }
+
+  /* Return to application */
+  return (status);
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+ * @} end of MatrixTrans group
+ */
--- a/Source/MatrixFunctions/arm_mat_cmplx_trans_q15.c
+++ b/Source/MatrixFunctions/arm_mat_cmplx_trans_q15.c
@ -0,0 +1,133 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mat_cmplx_trans_q31.c
+ * Description:  Q15 complex matrix transpose
+ *
+ * $Date:        08. July 2020
+ * $Revision:    V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+  @ingroup groupMatrix
+ */
+
+/**
+  @defgroup MatrixTrans Matrix Transpose
+
+  Tranposes a matrix.
+
+  Transposing an <code>M x N</code> matrix flips it around the center diagonal and results in an <code>N x M</code> matrix.
+  \image html MatrixTranspose.gif "Transpose of a 3 x 3 matrix"
+ */
+
+/**
+  @addtogroup MatrixTrans
+  @{
+ */
+
+/**
+  @brief         Q15 complex matrix transpose.
+  @param[in]     pSrc      points to input matrix
+  @param[out]    pDst      points to output matrix
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS       : Operation successful
+                   - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
+ */
+#if defined(ARM_MATH_MVEI)
+
+#include "arm_helium_utils.h"
+
+arm_status arm_mat_cmplx_trans_q15(const arm_matrix_instance_q15 * pSrc, arm_matrix_instance_q15 * pDst)
+{
+    return arm_mat_cmplx_trans_16bit(pSrc->numRows, pSrc->numCols, (uint16_t *) pSrc->pData,
+                                   pDst->numRows, pDst->numCols, (uint16_t *) pDst->pData);
+}
+
+
+#else
+arm_status arm_mat_cmplx_trans_q15(
+  const arm_matrix_instance_q15 * pSrc,
+  arm_matrix_instance_q15 * pDst)
+{
+  q15_t *pSrcA = pSrc->pData;                    /* input data matrix pointer */
+  q15_t *pOut = pDst->pData;                     /* output data matrix pointer */
+  uint16_t nRows = pSrc->numRows;                /* number of nRows */
+  uint16_t nColumns = pSrc->numCols;             /* number of nColumns */
+  uint16_t col, row = nRows, i = 0U;             /* row and column loop counters */
+  arm_status status;                             /* status of matrix transpose */
+
+
+#ifdef ARM_MATH_MATRIX_CHECK
+
+  /* Check for matrix mismatch condition */
+  if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
+  {
+    /* Set status as ARM_MATH_SIZE_MISMATCH */
+    status = ARM_MATH_SIZE_MISMATCH;
+  }
+  else
+#endif /*    #ifdef ARM_MATH_MATRIX_CHECK    */
+
+  {
+    /* Matrix transpose by exchanging the rows with columns */
+    /* row loop     */
+    do
+    {
+      /* The pointer pOut is set to starting address of the column being processed */
+      pOut = pDst->pData + CMPLX_DIM * i;
+
+      /* Initialize column loop counter */
+      col = nColumns;
+
+      while (col > 0U)
+      {
+        /* Read and store the input element in the destination */
+        pOut[0] = *pSrcA++; //real
+        pOut[1] = *pSrcA++; //imag
+
+        /* Update the pointer pOut to point to the next row of the transposed matrix */
+        pOut += CMPLX_DIM *nRows;
+
+        /* Decrement the column loop counter */
+        col--;
+      }
+
+      i++;
+
+      /* Decrement the row loop counter */
+      row--;
+
+    } while (row > 0U);
+
+    /* set status as ARM_MATH_SUCCESS */
+    status = ARM_MATH_SUCCESS;
+  }
+  /* Return to application */
+  return (status);
+}
+#endif /* defined(ARM_MATH_MVEI) */
+
+/**
+ * @} end of MatrixTrans group
+ */
--- a/Source/MatrixFunctions/arm_mat_cmplx_trans_q31.c
+++ b/Source/MatrixFunctions/arm_mat_cmplx_trans_q31.c
@ -0,0 +1,136 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mat_cmplx_trans_q31.c
+ * Description:  Q31 complex matrix transpose
+ *
+ * $Date:        08. July 2020
+ * $Revision:    V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+  @ingroup groupMatrix
+ */
+
+/**
+  @defgroup MatrixTrans Matrix Transpose
+
+  Tranposes a matrix.
+
+  Transposing an <code>M x N</code> matrix flips it around the center diagonal and results in an <code>N x M</code> matrix.
+  \image html MatrixTranspose.gif "Transpose of a 3 x 3 matrix"
+ */
+
+/**
+  @addtogroup MatrixTrans
+  @{
+ */
+
+/**
+  @brief         Q31 complex matrix transpose.
+  @param[in]     pSrc      points to input matrix
+  @param[out]    pDst      points to output matrix
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS       : Operation successful
+                   - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
+ */
+#if defined(ARM_MATH_MVEI)
+
+#include "arm_helium_utils.h"
+
+
+arm_status arm_mat_cmplx_trans_q31(const arm_matrix_instance_q31 * pSrc, arm_matrix_instance_q31 * pDst)
+{
+    return arm_mat_cmplx_trans_32bit(pSrc->numRows, pSrc->numCols, (uint32_t *) pSrc->pData,
+                                   pDst->numRows, pDst->numCols, (uint32_t *) pDst->pData);
+}
+
+
+#else
+arm_status arm_mat_cmplx_trans_q31(
+  const arm_matrix_instance_q31 * pSrc,
+  arm_matrix_instance_q31 * pDst)
+{
+  q31_t *pIn = pSrc->pData;                      /* input data matrix pointer  */
+  q31_t *pOut = pDst->pData;                     /* output data matrix pointer  */
+  q31_t *px;                                     /* Temporary output data matrix pointer */
+  uint16_t nRows = pSrc->numRows;                /* number of nRows */
+  uint16_t nColumns = pSrc->numCols;             /* number of nColumns  */
+  uint16_t col, i = 0U, row = nRows;             /* loop counters */
+  arm_status status;                             /* status of matrix transpose */
+
+
+#ifdef ARM_MATH_MATRIX_CHECK
+
+  /* Check for matrix mismatch condition */
+  if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows))
+  {
+    /* Set status as ARM_MATH_SIZE_MISMATCH */
+    status = ARM_MATH_SIZE_MISMATCH;
+  }
+  else
+#endif /*    #ifdef ARM_MATH_MATRIX_CHECK    */
+
+  {
+    /* Matrix transpose by exchanging the rows with columns */
+    /* row loop     */
+    do
+    {
+      /* The pointer px is set to starting address of the column being processed */
+      px = pOut + CMPLX_DIM * i;
+
+      /* Initialize column loop counter */
+      col = nColumns;
+
+      while (col > 0U)
+      {
+        /* Read and store the input element in the destination */
+        px[0] = *pIn++; // real
+        px[1] = *pIn++; // imag
+
+        /* Update the pointer px to point to the next row of the transposed matrix */
+        px += CMPLX_DIM * nRows;
+
+        /* Decrement the column loop counter */
+        col--;
+      }
+
+      i++;
+
+      /* Decrement the row loop counter */
+      row--;
+
+    }
+    while (row > 0U);            /* row loop end */
+
+    /* set status as ARM_MATH_SUCCESS */
+    status = ARM_MATH_SUCCESS;
+  }
+  /* Return to application */
+  return (status);
+}
+#endif /* defined(ARM_MATH_MVEI) */
+
+/**
+ * @} end of MatrixTrans group
+ */
--- a/Testing/PatternGeneration/Matrix.py
+++ b/Testing/PatternGeneration/Matrix.py
@ -602,6 +602,11 @@ def writeUnaryTests(config,format):
    if format == Tools.Q7:
       data1 = data1 / 4.0

+    data1C=randComplex(NBSAMPLES)
+
+    if format == Tools.Q7:
+       data1C = data1C / 4.0
+
    data2=np.random.randn(NBSAMPLES)
    data2 = Tools.normalize(data2) 

@ -612,6 +617,8 @@ def writeUnaryTests(config,format):


    config.writeInput(1, data1,"InputA")
+    config.writeInput(1, asReal(data1C),"InputAC")
+
    config.writeInput(1, data2,"InputB")
    config.writeInput(1, vecdata,"InputVec")

@ -664,6 +671,14 @@ def writeUnaryTests(config,format):
       vals = vals + r
    config.writeReference(1, vals,"RefTranspose")

+    vals = []
+    for (a,b) in unarySizes:
+       ma = np.copy(data1C[0:a*b]).reshape(a,b)
+       r = np.transpose(ma)
+       r = list(asReal(r.reshape(a*b)))
+       vals = vals + r
+    config.writeReference(1, vals,"RefTransposeC")
+
    vals = []
    for (a,b) in unarySizes:
       ma = np.copy(data1[0:a*b]).reshape(a,b)
--- a/Testing/Patterns/DSP/Matrix/Unary/UnaryF32/InputAC1_f32.txt
+++ b/Testing/Patterns/DSP/Matrix/Unary/UnaryF32/InputAC1_f32.txt
--- a/Testing/Patterns/DSP/Matrix/Unary/UnaryF32/RefTransposeC1_f32.txt
+++ b/Testing/Patterns/DSP/Matrix/Unary/UnaryF32/RefTransposeC1_f32.txt
--- a/Testing/Patterns/DSP/Matrix/Unary/UnaryF64/InputAC1_f64.txt
+++ b/Testing/Patterns/DSP/Matrix/Unary/UnaryF64/InputAC1_f64.txt
--- a/Testing/Patterns/DSP/Matrix/Unary/UnaryF64/RefTransposeC1_f64.txt
+++ b/Testing/Patterns/DSP/Matrix/Unary/UnaryF64/RefTransposeC1_f64.txt
--- a/Testing/Patterns/DSP/Matrix/Unary/UnaryQ15/InputAC1_q15.txt
+++ b/Testing/Patterns/DSP/Matrix/Unary/UnaryQ15/InputAC1_q15.txt
--- a/Testing/Patterns/DSP/Matrix/Unary/UnaryQ15/RefTransposeC1_q15.txt
+++ b/Testing/Patterns/DSP/Matrix/Unary/UnaryQ15/RefTransposeC1_q15.txt
--- a/Testing/Patterns/DSP/Matrix/Unary/UnaryQ31/InputAC1_q31.txt
+++ b/Testing/Patterns/DSP/Matrix/Unary/UnaryQ31/InputAC1_q31.txt
--- a/Testing/Patterns/DSP/Matrix/Unary/UnaryQ31/RefTransposeC1_q31.txt
+++ b/Testing/Patterns/DSP/Matrix/Unary/UnaryQ31/RefTransposeC1_q31.txt
--- a/Testing/Patterns/DSP/Matrix/Unary/UnaryQ7/InputAC1_q7.txt
+++ b/Testing/Patterns/DSP/Matrix/Unary/UnaryQ7/InputAC1_q7.txt
--- a/Testing/Patterns/DSP/Matrix/Unary/UnaryQ7/RefTransposeC1_q7.txt
+++ b/Testing/Patterns/DSP/Matrix/Unary/UnaryQ7/RefTransposeC1_q7.txt
--- a/Testing/Source/Benchmarks/UnaryF32.cpp
+++ b/Testing/Source/Benchmarks/UnaryF32.cpp
@ -17,6 +17,11 @@
       arm_mat_trans_f32(&this->in1,&this->out);
    } 

+    void UnaryF32::test_mat_cmplx_trans_f32()
+    {     
+       arm_mat_cmplx_trans_f32(&this->in1,&this->out);
+    } 
+
    void UnaryF32::test_mat_add_f32()
    {     
       arm_mat_add_f32(&this->in1,&this->in1,&this->out);
@ -43,12 +48,30 @@
       switch(id)
       {
          case TEST_MAT_VEC_MULT_F32_6:
+             input1.reload(UnaryF32::INPUTA_F32_ID,mgr,this->nbr*this->nbc);
             vec.reload(UnaryF32::INPUTVEC1_F32_ID,mgr,this->nbc);
             output.create(this->nbr,UnaryF32::OUT_F32_ID,mgr);
             vecp=vec.ptr();
             outp=output.ptr();
          break;
+          case TEST_MAT_TRANS_F32_3:
+              input1.reload(UnaryF32::INPUTA_F32_ID,mgr,this->nbr*this->nbc);
+              output.create(this->nbr*this->nbc,UnaryF32::OUT_F32_ID,mgr);
+              
+              this->out.numRows = this->nbc;
+              this->out.numCols = this->nbr;
+              this->out.pData = output.ptr(); 
+          break;
+          case TEST_MAT_CMPLX_TRANS_F32_7:
+              input1.reload(UnaryF32::INPUTAC_F32_ID,mgr,2*this->nbr*this->nbc);
+              output.create(2*this->nbr*this->nbc,UnaryF32::OUT_F32_ID,mgr);
+              
+              this->out.numRows = this->nbc;
+              this->out.numCols = this->nbr;
+              this->out.pData = output.ptr(); 
+          break;
          default:
+              input1.reload(UnaryF32::INPUTA_F32_ID,mgr,this->nbr*this->nbc);
              output.create(this->nbr*this->nbc,UnaryF32::OUT_F32_ID,mgr);
              
              this->out.numRows = this->nbr;
@ -57,7 +80,6 @@
          break;
       }

-       input1.reload(UnaryF32::INPUTA_F32_ID,mgr,this->nbr*this->nbc);

       

--- a/Testing/Source/Benchmarks/UnaryQ15.cpp
+++ b/Testing/Source/Benchmarks/UnaryQ15.cpp
@ -12,6 +12,11 @@
       arm_mat_trans_q15(&this->in1,&this->out);
    } 

+    void UnaryQ15::test_mat_cmplx_trans_q15()
+    {     
+       arm_mat_cmplx_trans_q15(&this->in1,&this->out);
+    } 
+
    void UnaryQ15::test_mat_add_q15()
    {     
       arm_mat_add_q15(&this->in1,&this->in1,&this->out);
@ -38,12 +43,30 @@
       switch(id)
       {
          case TEST_MAT_VEC_MULT_Q15_5:
+             input1.reload(UnaryQ15::INPUTA_Q15_ID,mgr,this->nbr*this->nbc);
             vec.reload(UnaryQ15::INPUTVEC1_Q15_ID,mgr,this->nbc);
             output.create(this->nbr,UnaryQ15::OUT_Q15_ID,mgr);
             vecp=vec.ptr();
             outp=output.ptr();
          break;
+          case TEST_MAT_TRANS_Q15_2:
+              input1.reload(UnaryQ15::INPUTA_Q15_ID,mgr,this->nbr*this->nbc);
+              output.create(this->nbr*this->nbc,UnaryQ15::OUT_Q15_ID,mgr);
+              
+              this->out.numRows = this->nbc;
+              this->out.numCols = this->nbr;
+              this->out.pData = output.ptr(); 
+          break;
+          case TEST_MAT_CMPLX_TRANS_Q15_6:
+              input1.reload(UnaryQ15::INPUTAC_Q15_ID,mgr,2*this->nbr*this->nbc);
+              output.create(2*this->nbr*this->nbc,UnaryQ15::OUT_Q15_ID,mgr);
+              
+              this->out.numRows = this->nbc;
+              this->out.numCols = this->nbr;
+              this->out.pData = output.ptr(); 
+          break;
          default:
+              input1.reload(UnaryQ15::INPUTA_Q15_ID,mgr,this->nbr*this->nbc);
              output.create(this->nbr*this->nbc,UnaryQ15::OUT_Q15_ID,mgr);
              
              this->out.numRows = this->nbr;
@ -52,7 +75,7 @@
          break;
       }

-       input1.reload(UnaryQ15::INPUTA_Q15_ID,mgr,this->nbr*this->nbc);
+       

       
       this->in1.numRows = this->nbr;
--- a/Testing/Source/Benchmarks/UnaryQ31.cpp
+++ b/Testing/Source/Benchmarks/UnaryQ31.cpp
@ -12,6 +12,11 @@
       arm_mat_trans_q31(&this->in1,&this->out);
    } 

+    void UnaryQ31::test_mat_cmplx_trans_q31()
+    {     
+       arm_mat_cmplx_trans_q31(&this->in1,&this->out);
+    } 
+
    void UnaryQ31::test_mat_add_q31()
    {     
       arm_mat_add_q31(&this->in1,&this->in1,&this->out);
@ -38,12 +43,30 @@
       switch(id)
       {
          case TEST_MAT_VEC_MULT_Q31_5:
+             input1.reload(UnaryQ31::INPUTA_Q31_ID,mgr,this->nbr*this->nbc);
             vec.reload(UnaryQ31::INPUTVEC1_Q31_ID,mgr,this->nbc);
             output.create(this->nbr,UnaryQ31::OUT_Q31_ID,mgr);
             vecp=vec.ptr();
             outp=output.ptr();
          break;
+          case TEST_MAT_TRANS_Q31_2:
+              input1.reload(UnaryQ31::INPUTA_Q31_ID,mgr,this->nbr*this->nbc);
+              output.create(this->nbr*this->nbc,UnaryQ31::OUT_Q31_ID,mgr);
+              
+              this->out.numRows = this->nbc;
+              this->out.numCols = this->nbr;
+              this->out.pData = output.ptr(); 
+          break;
+          case TEST_MAT_CMPLX_TRANS_Q31_6:
+              input1.reload(UnaryQ31::INPUTAC_Q31_ID,mgr,2*this->nbr*this->nbc);
+              output.create(2*this->nbr*this->nbc,UnaryQ31::OUT_Q31_ID,mgr);
+              
+              this->out.numRows = this->nbc;
+              this->out.numCols = this->nbr;
+              this->out.pData = output.ptr(); 
+          break;
          default:
+              input1.reload(UnaryQ31::INPUTA_Q31_ID,mgr,this->nbr*this->nbc);
              output.create(this->nbr*this->nbc,UnaryQ31::OUT_Q31_ID,mgr);
              
              this->out.numRows = this->nbr;
@ -52,7 +75,6 @@
          break;
       }

-       input1.reload(UnaryQ31::INPUTA_Q31_ID,mgr,this->nbr*this->nbc);

      
       this->in1.numRows = this->nbr;
--- a/Testing/Source/Tests/UnaryTestsF32.cpp
+++ b/Testing/Source/Tests/UnaryTestsF32.cpp
@ -86,6 +86,24 @@ But big matrix needed for checking the vectorized code */
      }                                                                  \
      out.pData = outp;

+#define PREPAREDATA1C(TRANSPOSED)                                         \
+      in1.numRows=rows;                                                  \
+      in1.numCols=columns;                                               \
+      memcpy((void*)ap,(const void*)inp1,2*sizeof(float32_t)*rows*columns);\
+      in1.pData = ap;                                                    \
+                                                                         \
+      if (TRANSPOSED)                                                    \
+      {                                                                  \
+         out.numRows=columns;                                            \
+         out.numCols=rows;                                               \
+      }                                                                  \
+      else                                                               \
+      {                                                                  \
+      out.numRows=rows;                                                  \
+      out.numCols=columns;                                               \
+      }                                                                  \
+      out.pData = outp;
+
 #define LOADVECDATA2()                          \
      const float32_t *inp1=input1.ptr();    \
      const float32_t *inp2=input2.ptr();    \
@ -234,6 +252,31 @@ void UnaryTestsF32::test_mat_trans_f32()

    } 

+void UnaryTestsF32::test_mat_cmplx_trans_f32()
+    {     
+      LOADDATA1();
+
+      for(i=0;i < nbMatrixes ; i ++)
+      {
+          rows = *dimsp++;
+          columns = *dimsp++;
+
+          PREPAREDATA1C(true);
+
+          arm_mat_cmplx_trans_f32(&this->in1,&this->out);
+
+          outp += 2*(rows * columns);
+
+      }
+
+      ASSERT_EMPTY_TAIL(output);
+
+      ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD);
+
+      ASSERT_CLOSE_ERROR(output,ref,ABS_ERROR,REL_ERROR);
+
+    }
+
 void UnaryTestsF32::test_mat_inverse_f32()
    {     
      const float32_t *inp1=input1.ptr();    
@ -342,6 +385,16 @@ void UnaryTestsF32::test_mat_inverse_f32()
            a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPA_F32_ID,mgr);
            b.create(MAXMATRIXDIM,UnaryTestsF32::TMPB_F32_ID,mgr);
         break;
+
+          case TEST_MAT_CMPLX_TRANS_F32_7:
+            input1.reload(UnaryTestsF32::INPUTSC1_F32_ID,mgr);
+            dims.reload(UnaryTestsF32::DIMSUNARY1_S16_ID,mgr);
+
+            ref.reload(UnaryTestsF32::REFTRANSC1_F32_ID,mgr);
+
+            output.create(ref.nbSamples(),UnaryTestsF32::OUT_F32_ID,mgr);
+            a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsF32::TMPA_F32_ID,mgr);
+         break;
      }
       

--- a/Testing/Source/Tests/UnaryTestsQ15.cpp
+++ b/Testing/Source/Tests/UnaryTestsQ15.cpp
@ -75,6 +75,24 @@ a double precision computation.
      }                                                                  \
      out.pData = outp;

+#define PREPAREDATA1C(TRANSPOSED)                                         \
+      in1.numRows=rows;                                                  \
+      in1.numCols=columns;                                               \
+      memcpy((void*)ap,(const void*)inp1,2*sizeof(q15_t)*rows*columns);\
+      in1.pData = ap;                                                    \
+                                                                         \
+      if (TRANSPOSED)                                                    \
+      {                                                                  \
+         out.numRows=columns;                                            \
+         out.numCols=rows;                                               \
+      }                                                                  \
+      else                                                               \
+      {                                                                  \
+      out.numRows=rows;                                                  \
+      out.numCols=columns;                                               \
+      }                                                                  \
+      out.pData = outp;
+
 #define LOADVECDATA2()                          \
      const q15_t *inp1=input1.ptr();    \
      const q15_t *inp2=input2.ptr();    \
@ -222,6 +240,31 @@ void UnaryTestsQ15::test_mat_trans_q15()

    } 

+void UnaryTestsQ15::test_mat_cmplx_trans_q15()
+    {     
+      LOADDATA1();
+
+      for(i=0;i < nbMatrixes ; i ++)
+      {
+          rows = *dimsp++;
+          columns = *dimsp++;
+
+          PREPAREDATA1C(true);
+
+          arm_mat_cmplx_trans_q15(&this->in1,&this->out);
+
+          outp += 2*(rows * columns);
+
+      }
+
+      ASSERT_EMPTY_TAIL(output);
+
+      ASSERT_SNR(output,ref,(q15_t)SNR_THRESHOLD);
+
+      ASSERT_NEAR_EQ(output,ref,ABS_ERROR_Q15);
+
+    }
+

    void UnaryTestsQ15::setUp(Testing::testID_t id,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr)
    {
@ -286,6 +329,16 @@ void UnaryTestsQ15::test_mat_trans_q15()
            b.create(MAXMATRIXDIM,UnaryTestsQ15::TMPB_Q15_ID,mgr);
         break;

+         case TEST_MAT_CMPLX_TRANS_Q15_6:
+            input1.reload(UnaryTestsQ15::INPUTSC1_Q15_ID,mgr);
+            dims.reload(UnaryTestsQ15::DIMSUNARY1_S16_ID,mgr);
+
+            ref.reload(UnaryTestsQ15::REFTRANSC1_Q15_ID,mgr);
+
+            output.create(ref.nbSamples(),UnaryTestsQ15::OUT_Q15_ID,mgr);
+            a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsQ15::TMPA_Q15_ID,mgr);
+         break;
+
        
      }
       
--- a/Testing/Source/Tests/UnaryTestsQ31.cpp
+++ b/Testing/Source/Tests/UnaryTestsQ31.cpp
@ -75,6 +75,24 @@ a double precision computation.
      }                                                                  \
      out.pData = outp;

+#define PREPAREDATA1C(TRANSPOSED)                                         \
+      in1.numRows=rows;                                                  \
+      in1.numCols=columns;                                               \
+      memcpy((void*)ap,(const void*)inp1,2*sizeof(q31_t)*rows*columns);\
+      in1.pData = ap;                                                    \
+                                                                         \
+      if (TRANSPOSED)                                                    \
+      {                                                                  \
+         out.numRows=columns;                                            \
+         out.numCols=rows;                                               \
+      }                                                                  \
+      else                                                               \
+      {                                                                  \
+      out.numRows=rows;                                                  \
+      out.numCols=columns;                                               \
+      }                                                                  \
+      out.pData = outp;
+
 #define LOADVECDATA2()                          \
      const q31_t *inp1=input1.ptr();    \
      const q31_t *inp2=input2.ptr();    \
@ -222,6 +240,31 @@ void UnaryTestsQ31::test_mat_trans_q31()

    } 

+void UnaryTestsQ31::test_mat_cmplx_trans_q31()
+    {     
+      LOADDATA1();
+
+      for(i=0;i < nbMatrixes ; i ++)
+      {
+          rows = *dimsp++;
+          columns = *dimsp++;
+
+          PREPAREDATA1C(true);
+
+          arm_mat_cmplx_trans_q31(&this->in1,&this->out);
+
+          outp += 2*(rows * columns);
+
+      }
+
+      ASSERT_EMPTY_TAIL(output);
+
+      ASSERT_SNR(output,ref,(q31_t)SNR_THRESHOLD);
+
+      ASSERT_NEAR_EQ(output,ref,ABS_ERROR_Q31);
+
+    } 
+

    void UnaryTestsQ31::setUp(Testing::testID_t id,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr)
    {
@ -286,6 +329,16 @@ void UnaryTestsQ31::test_mat_trans_q31()
            b.create(MAXMATRIXDIM,UnaryTestsQ31::TMPB_Q31_ID,mgr);
         break;

+         case TEST_MAT_CMPLX_TRANS_Q31_6:
+            input1.reload(UnaryTestsQ31::INPUTSC1_Q31_ID,mgr);
+            dims.reload(UnaryTestsQ31::DIMSUNARY1_S16_ID,mgr);
+
+            ref.reload(UnaryTestsQ31::REFTRANSC1_Q31_ID,mgr);
+
+            output.create(ref.nbSamples(),UnaryTestsQ31::OUT_Q31_ID,mgr);
+            a.create(MAXMATRIXDIM*MAXMATRIXDIM,UnaryTestsQ31::TMPA_Q31_ID,mgr);
+         break;
+
        
      }
       
--- a/Testing/bench.txt
+++ b/Testing/bench.txt
@ -923,6 +923,7 @@ group Root {
                }

                Pattern INPUTA_F32_ID : InputA1_f32.txt 
+                Pattern INPUTAC_F32_ID : InputAC1_f32.txt 
                Pattern INPUTVEC1_F32_ID : InputVec1_f32.txt 

                Output  OUT_F32_ID : Output
@ -939,6 +940,7 @@ group Root {
                   Matrix Addition:test_mat_add_f32
                   Matrix Substraction:test_mat_sub_f32
                   Matrix Vector Multiplication:test_mat_vec_mult_f32
+                   Matrix Complex Transpose:test_mat_cmplx_trans_f32
                } -> PARAM1_ID
              }

@ -954,6 +956,7 @@ group Root {
                }

                Pattern INPUTA_Q31_ID : InputA1_q31.txt 
+                Pattern INPUTAC_Q31_ID : InputAC1_q31.txt 
                Pattern INPUTVEC1_Q31_ID : InputVec1_q31.txt 

                Output  OUT_Q31_ID : Output
@ -969,6 +972,7 @@ group Root {
                   Matrix Addition:test_mat_add_q31
                   Matrix Substraction:test_mat_sub_q31
                   Matrix Vector Multiplication:test_mat_vec_mult_q31
+                   Matrix Complex Transpose:test_mat_cmplx_trans_q31
                } -> PARAM1_ID
              }

@ -984,6 +988,7 @@ group Root {
                }

                Pattern INPUTA_Q15_ID : InputA1_q15.txt 
+                Pattern INPUTAC_Q15_ID : InputAC1_q15.txt 
                Pattern INPUTVEC1_Q15_ID : InputVec1_q15.txt 
                Output  OUT_Q15_ID : Output

@ -998,6 +1003,7 @@ group Root {
                   Matrix Addition:test_mat_add_q15
                   Matrix Substraction:test_mat_sub_q15
                   Matrix Vector Multiplication:test_mat_vec_mult_q15
+                   Matrix Complex Transpose:test_mat_cmplx_trans_q15
                } -> PARAM1_ID
              }

--- a/Testing/desc.txt
+++ b/Testing/desc.txt
@ -2284,6 +2284,8 @@ group Root {
                folder = UnaryF32

                Pattern INPUTS1_F32_ID : InputA1_f32.txt 
+                Pattern INPUTSC1_F32_ID : InputAC1_f32.txt 
+
                Pattern INPUTS2_F32_ID : InputB1_f32.txt 
                Pattern INPUTVEC1_F32_ID : InputVec1_f32.txt 

@ -2295,6 +2297,7 @@ group Root {
                Pattern REFSUB1_F32_ID : RefSub1_f32.txt
                Pattern REFSCALE1_F32_ID : RefScale1_f32.txt
                Pattern REFTRANS1_F32_ID : RefTranspose1_f32.txt
+                Pattern REFTRANSC1_F32_ID : RefTransposeC1_f32.txt
                Pattern REFINV1_F32_ID : RefInvert1_f32.txt
                Pattern REFVECMUL1_F32_ID : RefVecMul1_f32.txt

@ -2310,6 +2313,7 @@ group Root {
                 test matrix transpose:test_mat_trans_f32
                 test matrix inverse:test_mat_inverse_f32
                 test mat mult vec:test_mat_vec_mult_f32
+                 test matrix complex transpose:test_mat_cmplx_trans_f32
                }

              }
@ -2319,6 +2323,7 @@ group Root {
                folder = UnaryQ31

                Pattern INPUTS1_Q31_ID : InputA1_q31.txt 
+                Pattern INPUTSC1_Q31_ID : InputAC1_q31.txt 
                Pattern INPUTS2_Q31_ID : InputB1_q31.txt 
                Pattern INPUTVEC1_Q31_ID : InputVec1_q31.txt 

@ -2330,6 +2335,7 @@ group Root {
                Pattern REFSUB1_Q31_ID : RefSub1_q31.txt
                Pattern REFSCALE1_Q31_ID : RefScale1_q31.txt
                Pattern REFTRANS1_Q31_ID : RefTranspose1_q31.txt
+                Pattern REFTRANSC1_Q31_ID : RefTransposeC1_q31.txt
                Pattern REFINV1_Q31_ID : RefInvert1_q31.txt
                Pattern REFVECMUL1_Q31_ID : RefVecMul1_q31.txt

@ -2343,6 +2349,7 @@ group Root {
                 test matrix scale:test_mat_scale_q31
                 test matrix transpose:test_mat_trans_q31
                 test mat mult vec:test_mat_vec_mult_q31
+                 test matrix complex transpose:test_mat_cmplx_trans_q31
                }

              }
@ -2352,6 +2359,7 @@ group Root {
                folder = UnaryQ15

                Pattern INPUTS1_Q15_ID : InputA1_q15.txt 
+                Pattern INPUTSC1_Q15_ID : InputAC1_q15.txt 
                Pattern INPUTS2_Q15_ID : InputB1_q15.txt 
                Pattern INPUTVEC1_Q15_ID : InputVec1_q15.txt 

@ -2363,6 +2371,7 @@ group Root {
                Pattern REFSUB1_Q15_ID : RefSub1_q15.txt
                Pattern REFSCALE1_Q15_ID : RefScale1_q15.txt
                Pattern REFTRANS1_Q15_ID : RefTranspose1_q15.txt
+                Pattern REFTRANSC1_Q15_ID : RefTransposeC1_q15.txt
                Pattern REFINV1_Q15_ID : RefInvert1_q15.txt
                Pattern REFVECMUL1_Q15_ID : RefVecMul1_q15.txt

@ -2376,6 +2385,7 @@ group Root {
                 test matrix scale:test_mat_scale_q15
                 test matrix transpose:test_mat_trans_q15
                 test mat mult vec:test_mat_vec_mult_q15
+                 test matrix complex transpose:test_mat_cmplx_trans_q15
                }

              }
@ -2385,6 +2395,7 @@ group Root {
                folder = UnaryQ7

                Pattern INPUTS1_Q7_ID : InputA1_q7.txt 
+                Pattern INPUTSC1_Q7_ID : InputAC1_q7.txt 
                Pattern INPUTS2_Q7_ID : InputB1_q7.txt 
                Pattern INPUTVEC1_Q7_ID : InputVec1_q7.txt 

@ -2396,6 +2407,7 @@ group Root {
                Pattern REFSUB1_Q7_ID : RefSub1_q7.txt
                Pattern REFSCALE1_Q7_ID : RefScale1_q7.txt
                Pattern REFTRANS1_Q7_ID : RefTranspose1_q7.txt
+                Pattern REFTRANSC1_Q7_ID : RefTransposeC1_q7.txt
                Pattern REFINV1_Q7_ID : RefInvert1_q7.txt
                Pattern REFVECMUL1_Q7_ID : RefVecMul1_q7.txt