CMSIS-DSP: Added complex math f16

6 years ago · ebf9104c4e
parent 8b465544a1
commit ebf9104c4e
29 changed files with 8557 additions and 2 deletions
--- a/Include/arm_helium_utils.h
+++ b/Include/arm_helium_utils.h
@ -80,6 +80,8 @@ __STATIC_FORCEINLINE float16_t vecAddAcrossF16Mve(float16x8_t in)
 /* newton initial guess */
 #define INVSQRT_MAGIC_F32           0x5f3759df
 #define INV_NEWTON_INIT_F32         0x7EF127EA
 #define INVSQRT_NEWTON_MVE_F32(invSqrt, xHalf, xStart)\
 {                                                     \
@ -95,6 +97,74 @@ __STATIC_FORCEINLINE float16_t vecAddAcrossF16Mve(float16x8_t in)
 }
 #endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) */
 /***************************************
 Definitions available for f16 datatype with HW acceleration only
 ***************************************/
 #if defined (ARM_MATH_MVE_FLOAT16)
 __STATIC_FORCEINLINE float16x8_t __mve_cmplx_sum_intra_vec_f16(
    float16x8_t   vecIn)
 {
    float16x8_t   vecTmp, vecOut;
    uint32_t    tmp;
    vecTmp = (float16x8_t) vrev64q_s32((int32x4_t) vecIn);
    // TO TRACK : using canonical addition leads to unefficient code generation for f16
    // vecTmp = vecTmp + vecAccCpx0;
    /*
     * Compute
     *  re0+re1 | im0+im1 | re0+re1 | im0+im1
     *  re2+re3 | im2+im3 | re2+re3 | im2+im3
     */
    vecTmp = vaddq(vecTmp, vecIn);
    vecOut = vecTmp;
    /*
     * shift left, random tmp insertion in bottom
     */
    vecOut = vreinterpretq_f16_s32(vshlcq_s32(vreinterpretq_s32_f16(vecOut)   , &tmp, 32));
    /*
     * Compute:
     *    DONTCARE     |    DONTCARE     | re0+re1+re0+re1 |im0+im1+im0+im1
     * re0+re1+re2+re3 | im0+im1+im2+im3 | re2+re3+re2+re3 |im2+im3+im2+im3
     */
    vecOut = vaddq(vecOut, vecTmp);
    /*
     * Cmplx sum is in 4rd & 5th f16 elt
     * return full vector
     */
    return vecOut;
 }
 #define mve_cmplx_sum_intra_r_i_f16(vec, Re, Im)                \
 {                                                               \
    float16x8_t   vecOut = __mve_cmplx_sum_intra_vec_f16(vec);    \
    Re = vgetq_lane(vecOut, 4);                                 \
    Im = vgetq_lane(vecOut, 5);                                 \
 }
 #define INVSQRT_MAGIC_F16           0x59ba      /*  ( 0x1ba = 0x3759df >> 13) */
 #define INV_NEWTON_INIT_F16         0x7773
 /* canonical version of INVSQRT_NEWTON_MVE_F16 leads to bad performance */
 #define INVSQRT_NEWTON_MVE_F16(invSqrt, xHalf, xStart)                  \
 {                                                                       \
    float16x8_t tmp;                                                      \
                                                                        \
    /* tmp = xhalf * x * x */                                           \
    tmp = vmulq(xStart, xStart);                                        \
    tmp = vmulq(tmp, xHalf);                                            \
    /* (1.5f - xhalf * x * x) */                                        \
    tmp = vsubq(vdupq_n_f16((float16_t)1.5), tmp);                      \
    /* x = x*(1.5f-xhalf*x*x); */                                       \
    invSqrt = vmulq(tmp, xStart);                                       \
 }
 #endif
 /***************************************
 Definitions available for MVEI only
--- a/Include/dsp/basic_math_functions_f16.h
+++ b/Include/dsp/basic_math_functions_f16.h
@ -36,6 +36,7 @@ extern "C"
 #include "dsp/none.h"
 #include "dsp/utils.h"
 #include "dsp/fast_math_functions_f16.h"
 #if defined(ARM_FLOAT16_SUPPORTED)
--- a/Include/dsp/complex_math_functions_f16.h
+++ b/Include/dsp/complex_math_functions_f16.h
@ -26,12 +26,94 @@
 #ifndef _COMPLEX_MATH_FUNCTIONS_F16_H_
 #define _COMPLEX_MATH_FUNCTIONS_F16_H_
 #include "arm_math_types_f16.h"
 #include "arm_math_memory.h"
 #include "dsp/none.h"
 #include "dsp/utils.h"
 #include "dsp/fast_math_functions_f16.h"
 #ifdef   __cplusplus
 extern "C"
 {
 #endif
 #if defined(ARM_FLOAT16_SUPPORTED)
 /**
   * @brief  Floating-point complex conjugate.
   * @param[in]  pSrc        points to the input vector
   * @param[out] pDst        points to the output vector
   * @param[in]  numSamples  number of complex samples in each vector
   */
  void arm_cmplx_conj_f16(
  const float16_t * pSrc,
        float16_t * pDst,
        uint32_t numSamples);
 /**
   * @brief  Floating-point complex magnitude squared
   * @param[in]  pSrc        points to the complex input vector
   * @param[out] pDst        points to the real output vector
   * @param[in]  numSamples  number of complex samples in the input vector
   */
  void arm_cmplx_mag_squared_f16(
  const float16_t * pSrc,
        float16_t * pDst,
        uint32_t numSamples);
  /**
   * @brief  Floating-point complex magnitude
   * @param[in]  pSrc        points to the complex input vector
   * @param[out] pDst        points to the real output vector
   * @param[in]  numSamples  number of complex samples in the input vector
   */
  void arm_cmplx_mag_f16(
  const float16_t * pSrc,
        float16_t * pDst,
        uint32_t numSamples);
  /**
   * @brief  Floating-point complex dot product
   * @param[in]  pSrcA       points to the first input vector
   * @param[in]  pSrcB       points to the second input vector
   * @param[in]  numSamples  number of complex samples in each vector
   * @param[out] realResult  real part of the result returned here
   * @param[out] imagResult  imaginary part of the result returned here
   */
  void arm_cmplx_dot_prod_f16(
  const float16_t * pSrcA,
  const float16_t * pSrcB,
        uint32_t numSamples,
        float16_t * realResult,
        float16_t * imagResult);
   /**
   * @brief  Floating-point complex-by-real multiplication
   * @param[in]  pSrcCmplx   points to the complex input vector
   * @param[in]  pSrcReal    points to the real input vector
   * @param[out] pCmplxDst   points to the complex output vector
   * @param[in]  numSamples  number of samples in each vector
   */
  void arm_cmplx_mult_real_f16(
  const float16_t * pSrcCmplx,
  const float16_t * pSrcReal,
        float16_t * pCmplxDst,
        uint32_t numSamples);
  /**
   * @brief  Floating-point complex-by-complex multiplication
   * @param[in]  pSrcA       points to the first input vector
   * @param[in]  pSrcB       points to the second input vector
   * @param[out] pDst        points to the output vector
   * @param[in]  numSamples  number of complex samples in each vector
   */
  void arm_cmplx_mult_cmplx_f16(
  const float16_t * pSrcA,
  const float16_t * pSrcB,
        float16_t * pDst,
        uint32_t numSamples);
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
 #ifdef   __cplusplus
 }
--- a/Include/dsp/fast_math_functions_f16.h
+++ b/Include/dsp/fast_math_functions_f16.h
@ -26,12 +26,46 @@
 #ifndef _FAST_MATH_FUNCTIONS_F16_H_
 #define _FAST_MATH_FUNCTIONS_F16_H_
 #include "arm_math_types_f16.h"
 #include "arm_math_memory.h"
 #include "dsp/none.h"
 #include "dsp/utils.h"
 #include "dsp/fast_math_functions.h"
 #ifdef   __cplusplus
 extern "C"
 {
 #endif
 #if defined(ARM_FLOAT16_SUPPORTED)
 /**
   * @addtogroup SQRT
   * @{
   */
 /**
  @brief         Floating-point square root function.
  @param[in]     in    input value
  @param[out]    pOut  square root of input value
  @return        execution status
                   - \ref ARM_MATH_SUCCESS        : input value is positive
                   - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
 */
 __STATIC_FORCEINLINE arm_status arm_sqrt_f16(
  float16_t in,
  float16_t * pOut)
  {
    float32_t r;
    arm_status status;
    status=arm_sqrt_f32((float32_t)in,&r);
    *pOut=(float16_t)r;
    return(status);
  }
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
 #ifdef   __cplusplus
 }
--- a/Source/ComplexMathFunctions/CMakeLists.txt
+++ b/Source/ComplexMathFunctions/CMakeLists.txt
@ -5,8 +5,6 @@ project(CMSISDSPComplexMath)
 include(configLib)
 include(configDsp)
 file(GLOB SRC "./*_*.c")
 add_library(CMSISDSPComplexMath STATIC)
 configLib(CMSISDSPComplexMath ${ROOT})
@ -56,6 +54,14 @@ target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_f32.c)
 target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_q15.c)
 target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_q31.c)
 if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
 target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_conj_f16.c)
 target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_dot_prod_f16.c)
 target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mag_f16.c)
 target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mag_squared_f16.c)
 target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_cmplx_f16.c)
 target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_f16.c)
 endif()
 ### Includes
 target_include_directories(CMSISDSPComplexMath PUBLIC "${DSP}/Include")
--- a/Source/ComplexMathFunctions/ComplexMathFunctionsF16.c
+++ b/Source/ComplexMathFunctions/ComplexMathFunctionsF16.c
@ -0,0 +1,32 @@
 /* ----------------------------------------------------------------------
 * Project:      CMSIS DSP Library
 * Title:        CompexMathFunctionsF16.c
 * Description:  Combination of all complex math function f16 source files.
 *
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
 * Copyright (C) 2020 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "arm_cmplx_conj_f16.c"
 #include "arm_cmplx_dot_prod_f16.c"
 #include "arm_cmplx_mag_f16.c"
 #include "arm_cmplx_mag_squared_f16.c"
 #include "arm_cmplx_mult_cmplx_f16.c"
 #include "arm_cmplx_mult_real_f16.c"
--- a/Source/ComplexMathFunctions/arm_cmplx_conj_f16.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_conj_f16.c
@ -0,0 +1,183 @@
 /* ----------------------------------------------------------------------
 * Project:      CMSIS DSP Library
 * Title:        arm_cmplx_conj_f16.c
 * Description:  Floating-point complex conjugate
 *
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
 * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "dsp/complex_math_functions_f16.h"
 #if defined(ARM_FLOAT16_SUPPORTED)
 /**
  @ingroup groupCmplxMath
 */
 /**
  @defgroup cmplx_conj Complex Conjugate
  Conjugates the elements of a complex data vector.
  The <code>pSrc</code> points to the source data and
  <code>pDst</code> points to the destination data where the result should be written.
  <code>numSamples</code> specifies the number of complex samples
  and the data in each array is stored in an interleaved fashion
  (real, imag, real, imag, ...).
  Each array has a total of <code>2*numSamples</code> values.
  The underlying algorithm is used:
  <pre>
  for (n = 0; n < numSamples; n++) {
      pDst[(2*n)  ] =  pSrc[(2*n)  ];    // real part
      pDst[(2*n)+1] = -pSrc[(2*n)+1];    // imag part
  }
  </pre>
  There are separate functions for floating-point, Q15, and Q31 data types.
 */
 /**
  @addtogroup cmplx_conj
  @{
 */
 /**
  @brief         Floating-point complex conjugate.
  @param[in]     pSrc        points to the input vector
  @param[out]    pDst        points to the output vector
  @param[in]     numSamples  number of samples in each vector
  @return        none
 */
 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 void arm_cmplx_conj_f16(
    const float16_t * pSrc,
    float16_t * pDst,
    uint32_t numSamples)
 {
    static const float16_t cmplx_conj_sign[8] = { 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f };
    uint32_t blockSize = numSamples * CMPLX_DIM;   /* loop counters */
    uint32_t blkCnt;
    f16x8_t vecSrc;
    f16x8_t vecSign;
    /*
     * load sign vector
     */
    vecSign = *(f16x8_t *) cmplx_conj_sign;
    /* Compute 4 real samples at a time */
    blkCnt = blockSize >> 3U;
    while (blkCnt > 0U)
    {
        vecSrc = vld1q(pSrc);
        vst1q(pDst,vmulq(vecSrc, vecSign));
        /*
         * Decrement the blkCnt loop counter
         * Advance vector source and destination pointers
         */
        pSrc += 8;
        pDst += 8;
        blkCnt--;
    }
     /* Tail */
    blkCnt = (blockSize & 0x7) >> 1;
    while (blkCnt > 0U)
    {
      /* C[0] + jC[1] = A[0]+ j(-1)A[1] */
      /* Calculate Complex Conjugate and store result in destination buffer. */
      *pDst++ =  *pSrc++;
      *pDst++ = -*pSrc++;
      /* Decrement loop counter */
      blkCnt--;
    }
 }
 #else
 void arm_cmplx_conj_f16(
  const float16_t * pSrc,
        float16_t * pDst,
        uint32_t numSamples)
 {
        uint32_t blkCnt;                               /* Loop counter */
 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
  /* Loop unrolling: Compute 4 outputs at a time */
  blkCnt = numSamples >> 2U;
  while (blkCnt > 0U)
  {
    /* C[0] + jC[1] = A[0]+ j(-1)A[1] */
    /* Calculate Complex Conjugate and store result in destination buffer. */
    *pDst++ =  *pSrc++;
    *pDst++ = -*pSrc++;
    *pDst++ =  *pSrc++;
    *pDst++ = -*pSrc++;
    *pDst++ =  *pSrc++;
    *pDst++ = -*pSrc++;
    *pDst++ =  *pSrc++;
    *pDst++ = -*pSrc++;
    /* Decrement loop counter */
    blkCnt--;
  }
  /* Loop unrolling: Compute remaining outputs */
  blkCnt = numSamples % 0x4U;
 #else
  /* Initialize blkCnt with number of samples */
  blkCnt = numSamples;
 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  while (blkCnt > 0U)
  {
    /* C[0] + jC[1] = A[0]+ j(-1)A[1] */
    /* Calculate Complex Conjugate and store result in destination buffer. */
    *pDst++ =  *pSrc++;
    *pDst++ = -*pSrc++;
    /* Decrement loop counter */
    blkCnt--;
  }
 }
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 /**
  @} end of cmplx_conj group
 */
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
--- a/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f16.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f16.c
@ -0,0 +1,236 @@
 /* ----------------------------------------------------------------------
 * Project:      CMSIS DSP Library
 * Title:        arm_cmplx_dot_prod_f16.c
 * Description:  Floating-point complex dot product
 *
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
 * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "dsp/complex_math_functions_f16.h"
 #if defined(ARM_FLOAT16_SUPPORTED)
 /**
  @ingroup groupCmplxMath
 */
 /**
  @defgroup cmplx_dot_prod Complex Dot Product
  Computes the dot product of two complex vectors.
  The vectors are multiplied element-by-element and then summed.
  The <code>pSrcA</code> points to the first complex input vector and
  <code>pSrcB</code> points to the second complex input vector.
  <code>numSamples</code> specifies the number of complex samples
  and the data in each array is stored in an interleaved fashion
  (real, imag, real, imag, ...).
  Each array has a total of <code>2*numSamples</code> values.
  The underlying algorithm is used:
  <pre>
  realResult = 0;
  imagResult = 0;
  for (n = 0; n < numSamples; n++) {
      realResult += pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
      imagResult += pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
  }
  </pre>
  There are separate functions for floating-point, Q15, and Q31 data types.
 */
 /**
  @addtogroup cmplx_dot_prod
  @{
 */
 /**
  @brief         Floating-point complex dot product.
  @param[in]     pSrcA       points to the first input vector
  @param[in]     pSrcB       points to the second input vector
  @param[in]     numSamples  number of samples in each vector
  @param[out]    realResult  real part of the result returned here
  @param[out]    imagResult  imaginary part of the result returned here
  @return        none
 */
 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 #include "arm_helium_utils.h"
 void arm_cmplx_dot_prod_f16(
    const float16_t * pSrcA,
    const float16_t * pSrcB,
    uint32_t numSamples,
    float16_t * realResult,
    float16_t * imagResult)
 {
    uint32_t blockSize = numSamples * CMPLX_DIM;  /* loop counters */
    uint32_t blkCnt;
    float16_t real_sum, imag_sum;
    f16x8_t vecSrcA, vecSrcB;
    f16x8_t vec_acc = vdupq_n_f16(0.0f);
    /* Compute 2 complex samples at a time */
    blkCnt = blockSize >> 3U;
    while (blkCnt > 0U)
    {
        vecSrcA = vld1q(pSrcA);
        vecSrcB = vld1q(pSrcB);
        vec_acc = vcmlaq(vec_acc, vecSrcA, vecSrcB);
        vec_acc = vcmlaq_rot90(vec_acc, vecSrcA, vecSrcB);
        /*
         * Decrement the blkCnt loop counter
         * Advance vector source and destination pointers
         */
        pSrcA += 8;
        pSrcB += 8;
        blkCnt--;
    }
    /* Tail */
    blkCnt = (blockSize & 7);
    if (blkCnt > 0U)
    {
        mve_pred16_t p0 = vctp16q(blkCnt);
        vecSrcA = vld1q(pSrcA);
        vecSrcB = vld1q(pSrcB);
        vec_acc = vcmlaq_m(vec_acc, vecSrcA, vecSrcB, p0);
        vec_acc = vcmlaq_rot90_m(vec_acc, vecSrcA, vecSrcB, p0);
    }
    /* Sum the partial parts */
    mve_cmplx_sum_intra_r_i_f16(vec_acc, real_sum, imag_sum);
    /*
     * Store the real and imaginary results in the destination buffers
     */
    *realResult = real_sum;
    *imagResult = imag_sum;
 }
 #else
 void arm_cmplx_dot_prod_f16(
  const float16_t * pSrcA,
  const float16_t * pSrcB,
        uint32_t numSamples,
        float16_t * realResult,
        float16_t * imagResult)
 {
        uint32_t blkCnt;                               /* Loop counter */
        float16_t real_sum = 0.0f, imag_sum = 0.0f;    /* Temporary result variables */
        float16_t a0,b0,c0,d0;
 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
  /* Loop unrolling: Compute 4 outputs at a time */
  blkCnt = numSamples >> 2U;
  while (blkCnt > 0U)
  {
    a0 = *pSrcA++;
    b0 = *pSrcA++;
    c0 = *pSrcB++;
    d0 = *pSrcB++;
    real_sum += a0 * c0;
    imag_sum += a0 * d0;
    real_sum -= b0 * d0;
    imag_sum += b0 * c0;
    a0 = *pSrcA++;
    b0 = *pSrcA++;
    c0 = *pSrcB++;
    d0 = *pSrcB++;
    real_sum += a0 * c0;
    imag_sum += a0 * d0;
    real_sum -= b0 * d0;
    imag_sum += b0 * c0;
    a0 = *pSrcA++;
    b0 = *pSrcA++;
    c0 = *pSrcB++;
    d0 = *pSrcB++;
    real_sum += a0 * c0;
    imag_sum += a0 * d0;
    real_sum -= b0 * d0;
    imag_sum += b0 * c0;
    a0 = *pSrcA++;
    b0 = *pSrcA++;
    c0 = *pSrcB++;
    d0 = *pSrcB++;
    real_sum += a0 * c0;
    imag_sum += a0 * d0;
    real_sum -= b0 * d0;
    imag_sum += b0 * c0;
    /* Decrement loop counter */
    blkCnt--;
  }
  /* Loop unrolling: Compute remaining outputs */
  blkCnt = numSamples % 0x4U;
 #else
  /* Initialize blkCnt with number of samples */
  blkCnt = numSamples;
 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  while (blkCnt > 0U)
  {
    a0 = *pSrcA++;
    b0 = *pSrcA++;
    c0 = *pSrcB++;
    d0 = *pSrcB++;
    real_sum += a0 * c0;
    imag_sum += a0 * d0;
    real_sum -= b0 * d0;
    imag_sum += b0 * c0;
    /* Decrement loop counter */
    blkCnt--;
  }
  /* Store real and imaginary result in destination buffer. */
  *realResult = real_sum;
  *imagResult = imag_sum;
 }
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 /**
  @} end of cmplx_dot_prod group
 */
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
--- a/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c
@ -0,0 +1,239 @@
 /* ----------------------------------------------------------------------
 * Project:      CMSIS DSP Library
 * Title:        arm_cmplx_mag_f16.c
 * Description:  Floating-point complex magnitude
 *
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
 * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "dsp/complex_math_functions_f16.h"
 #if defined(ARM_FLOAT16_SUPPORTED)
 /**
  @ingroup groupCmplxMath
 */
 /**
  @defgroup cmplx_mag Complex Magnitude
  Computes the magnitude of the elements of a complex data vector.
  The <code>pSrc</code> points to the source data and
  <code>pDst</code> points to the where the result should be written.
  <code>numSamples</code> specifies the number of complex samples
  in the input array and the data is stored in an interleaved fashion
  (real, imag, real, imag, ...).
  The input array has a total of <code>2*numSamples</code> values;
  the output array has a total of <code>numSamples</code> values.
  The underlying algorithm is used:
  <pre>
  for (n = 0; n < numSamples; n++) {
      pDst[n] = sqrt(pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2);
  }
  </pre>
  There are separate functions for floating-point, Q15, and Q31 data types.
 */
 /**
  @addtogroup cmplx_mag
  @{
 */
 /**
  @brief         Floating-point complex magnitude.
  @param[in]     pSrc        points to input vector
  @param[out]    pDst        points to output vector
  @param[in]     numSamples  number of samples in each vector
  @return        none
 */
 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 #include "arm_helium_utils.h"
 void arm_cmplx_mag_f16(
  const float16_t * pSrc,
        float16_t * pDst,
        uint32_t numSamples)
 {
    int32_t blockSize = numSamples;  /* loop counters */
    uint32_t  blkCnt;           /* loop counters */
    f16x8x2_t vecSrc;
    f16x8_t sum;
    /* Compute 4 complex samples at a time */
    blkCnt = blockSize >> 3;
    while (blkCnt > 0U)
    {
        q15x8_t newtonStartVec;
        f16x8_t sumHalf, invSqrt;
        vecSrc = vld2q(pSrc);  
        pSrc += 16;
        sum = vmulq(vecSrc.val[0], vecSrc.val[0]);
        sum = vfmaq(sum, vecSrc.val[1], vecSrc.val[1]);
        /*
         * inlined Fast SQRT using inverse SQRT newton-raphson method
         */
        /* compute initial value */
        newtonStartVec = vdupq_n_s16(INVSQRT_MAGIC_F16) - vshrq((q15x8_t) sum, 1);
        sumHalf = sum * 0.5f;
        /*
         * compute 3 x iterations
         *
         * The more iterations, the more accuracy.
         * If you need to trade a bit of accuracy for more performance,
         * you can comment out the 3rd use of the macro.
         */
        INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, (f16x8_t) newtonStartVec);
        INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, invSqrt);
        INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, invSqrt);
        /*
         * set negative values to 0
         */
        invSqrt = vdupq_m(invSqrt, (float16_t)0.0f, vcmpltq(invSqrt, (float16_t)0.0f));
        /*
         * sqrt(x) = x * invSqrt(x)
         */
        sum = vmulq(sum, invSqrt);
        vstrhq_f16(pDst, sum); 
        pDst += 8;
        /*
         * Decrement the blockSize loop counter
         */
        blkCnt--;
    }
    /*
     * tail
     */
    blkCnt = blockSize & 7;
    if (blkCnt > 0U)
    {
        mve_pred16_t p0 = vctp16q(blkCnt);
        q15x8_t newtonStartVec;
        f16x8_t sumHalf, invSqrt;
        vecSrc = vld2q((float16_t const *)pSrc);
        sum = vmulq(vecSrc.val[0], vecSrc.val[0]);
        sum = vfmaq(sum, vecSrc.val[1], vecSrc.val[1]);
        /*
         * inlined Fast SQRT using inverse SQRT newton-raphson method
         */
        /* compute initial value */
        newtonStartVec = vdupq_n_s16(INVSQRT_MAGIC_F16) - vshrq((q15x8_t) sum, 1);
        sumHalf = vmulq(sum, (float16_t)0.5);
        /*
         * compute 2 x iterations
         */
        INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, (f16x8_t) newtonStartVec);
        INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, invSqrt);
        /*
         * set negative values to 0
         */
        invSqrt = vdupq_m(invSqrt, (float16_t)0.0, vcmpltq(invSqrt, (float16_t)0.0));
        /*
         * sqrt(x) = x * invSqrt(x)
         */
        sum = vmulq(sum, invSqrt);
        vstrhq_p_f16(pDst, sum, p0);
    }
 }
 #else
 void arm_cmplx_mag_f16(
  const float16_t * pSrc,
        float16_t * pDst,
        uint32_t numSamples)
 {
  uint32_t blkCnt;                               /* loop counter */
  float16_t real, imag;                      /* Temporary variables to hold input values */
 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
  /* Loop unrolling: Compute 4 outputs at a time */
  blkCnt = numSamples >> 2U;
  while (blkCnt > 0U)
  {
    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
    real = *pSrc++;
    imag = *pSrc++;
    /* store result in destination buffer. */
    arm_sqrt_f16((real * real) + (imag * imag), pDst++);
    real = *pSrc++;
    imag = *pSrc++;
    arm_sqrt_f16((real * real) + (imag * imag), pDst++);
    real = *pSrc++;
    imag = *pSrc++;
    arm_sqrt_f16((real * real) + (imag * imag), pDst++);
    real = *pSrc++;
    imag = *pSrc++;
    arm_sqrt_f16((real * real) + (imag * imag), pDst++);
    /* Decrement loop counter */
    blkCnt--;
  }
  /* Loop unrolling: Compute remaining outputs */
  blkCnt = numSamples % 0x4U;
 #else
  /* Initialize blkCnt with number of samples */
  blkCnt = numSamples;
 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  while (blkCnt > 0U)
  {
    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
    real = *pSrc++;
    imag = *pSrc++;
    /* store result in destination buffer. */
    arm_sqrt_f16((real * real) + (imag * imag), pDst++);
    /* Decrement loop counter */
    blkCnt--;
  }
 }
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 /**
  @} end of cmplx_mag group
 */
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
--- a/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c
@ -0,0 +1,172 @@
 /* ----------------------------------------------------------------------
 * Project:      CMSIS DSP Library
 * Title:        arm_cmplx_mag_squared_f16.c
 * Description:  Floating-point complex magnitude squared
 *
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
 * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "dsp/complex_math_functions_f16.h"
 #if defined(ARM_FLOAT16_SUPPORTED)
 /**
  @ingroup groupCmplxMath
 */
 /**
  @defgroup cmplx_mag_squared Complex Magnitude Squared
  Computes the magnitude squared of the elements of a complex data vector.
  The <code>pSrc</code> points to the source data and
  <code>pDst</code> points to the where the result should be written.
  <code>numSamples</code> specifies the number of complex samples
  in the input array and the data is stored in an interleaved fashion
  (real, imag, real, imag, ...).
  The input array has a total of <code>2*numSamples</code> values;
  the output array has a total of <code>numSamples</code> values.
  The underlying algorithm is used:
  <pre>
  for (n = 0; n < numSamples; n++) {
      pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
  }
  </pre>
  There are separate functions for floating-point, Q15, and Q31 data types.
 */
 /**
  @addtogroup cmplx_mag_squared
  @{
 */
 /**
  @brief         Floating-point complex magnitude squared.
  @param[in]     pSrc        points to input vector
  @param[out]    pDst        points to output vector
  @param[in]     numSamples  number of samples in each vector
  @return        none
 */
 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 void arm_cmplx_mag_squared_f16(
  const float16_t * pSrc,
        float16_t * pDst,
        uint32_t numSamples)
 {
    int32_t blockSize = numSamples;  /* loop counters */
    f16x8x2_t vecSrc;
    f16x8_t sum;
    /* Compute 4 complex samples at a time */
    while (blockSize > 0)
    {
        mve_pred16_t p = vctp16q(blockSize);
        vecSrc = vld2q(pSrc);
        sum = vmulq_m(vuninitializedq_f16(),vecSrc.val[0], vecSrc.val[0],p);
        sum = vfmaq_m(sum, vecSrc.val[1], vecSrc.val[1],p);
        vstrhq_p_f16(pDst, sum,p);
        pSrc += 16;
        pDst += 8;
        /*
         * Decrement the blockSize loop counter
         */
        blockSize-= 8;
    }
 }
 #else
 void arm_cmplx_mag_squared_f16(
  const float16_t * pSrc,
        float16_t * pDst,
        uint32_t numSamples)
 {
        uint32_t blkCnt;                               /* Loop counter */
        float16_t real, imag;                          /* Temporary input variables */
 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
  /* Loop unrolling: Compute 4 outputs at a time */
  blkCnt = numSamples >> 2U;
  while (blkCnt > 0U)
  {
    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
    real = *pSrc++;
    imag = *pSrc++;
    *pDst++ = (real * real) + (imag * imag);
    real = *pSrc++;
    imag = *pSrc++;
    *pDst++ = (real * real) + (imag * imag);
    real = *pSrc++;
    imag = *pSrc++;
    *pDst++ = (real * real) + (imag * imag);
    real = *pSrc++;
    imag = *pSrc++;
    *pDst++ = (real * real) + (imag * imag);
    /* Decrement loop counter */
    blkCnt--;
  }
  /* Loop unrolling: Compute remaining outputs */
  blkCnt = numSamples % 0x4U;
 #else
  /* Initialize blkCnt with number of samples */
  blkCnt = numSamples;
 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  while (blkCnt > 0U)
  {
    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
    real = *pSrc++;
    imag = *pSrc++;
    /* store result in destination buffer. */
    *pDst++ = (real * real) + (imag * imag);
    /* Decrement loop counter */
    blkCnt--;
  }
 }
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 /**
  @} end of cmplx_mag_squared group
 */
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
--- a/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c
@ -0,0 +1,217 @@
 /* ----------------------------------------------------------------------
 * Project:      CMSIS DSP Library
 * Title:        arm_cmplx_mult_cmplx_f16.c
 * Description:  Floating-point complex-by-complex multiplication
 *
 * $Date:        18. March 2019
 * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
 * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "dsp/complex_math_functions_f16.h"
 #if defined(ARM_FLOAT16_SUPPORTED)
 /**
  @ingroup groupCmplxMath
 */
 /**
  @defgroup CmplxByCmplxMult Complex-by-Complex Multiplication
  Multiplies a complex vector by another complex vector and generates a complex result.
  The data in the complex arrays is stored in an interleaved fashion
  (real, imag, real, imag, ...).
  The parameter <code>numSamples</code> represents the number of complex
  samples processed.  The complex arrays have a total of <code>2*numSamples</code>
  real values.
  The underlying algorithm is used:
  <pre>
  for (n = 0; n < numSamples; n++) {
      pDst[(2*n)+0] = pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
      pDst[(2*n)+1] = pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
  }
  </pre>
  There are separate functions for floating-point, Q15, and Q31 data types.
 */
 /**
  @addtogroup CmplxByCmplxMult
  @{
 */
 /**
  @brief         Floating-point complex-by-complex multiplication.
  @param[in]     pSrcA       points to first input vector
  @param[in]     pSrcB       points to second input vector
  @param[out]    pDst        points to output vector
  @param[in]     numSamples  number of samples in each vector
  @return        none
 */
 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 void arm_cmplx_mult_cmplx_f16(
  const float16_t * pSrcA,
  const float16_t * pSrcB,
        float16_t * pDst,
        uint32_t numSamples)
 {
    int32_t  blkCnt;           /* loop counters */
    int32_t  blockSize = numSamples;
    f16x8_t vecA;
    f16x8_t vecB;
    f16x8_t vecDst;
    blkCnt = blockSize * CMPLX_DIM;
    blkCnt = blkCnt >> 3;
    while (blkCnt > 0) 
    {
        vecA = vldrhq_f16(pSrcA);
        vecB = vldrhq_f16(pSrcB);
        /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
        vecDst = vcmulq(vecA, vecB);
        /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
        vecDst = vcmlaq_rot90(vecDst, vecA, vecB);
        vstrhq_f16(pDst, vecDst);
        blkCnt--;
        pSrcA += 8;
        pSrcB += 8;
        pDst += 8;
    }
    float16_t a, b, c, d;  /* Temporary variables to store real and imaginary values */
        /* Tail */
    blkCnt = (blockSize & 7) >> 1;
    while (blkCnt > 0)
    {
      /* C[2 * i    ] = A[2 * i] * B[2 * i    ] - A[2 * i + 1] * B[2 * i + 1]. */
      /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i    ]. */
      a = *pSrcA++;
      b = *pSrcA++;
      c = *pSrcB++;
      d = *pSrcB++;
      /* store result in destination buffer. */
      *pDst++ = (a * c) - (b * d);
      *pDst++ = (a * d) + (b * c);
      /* Decrement loop counter */
      blkCnt--;
    }
 }
 #else
 void arm_cmplx_mult_cmplx_f16(
  const float16_t * pSrcA,
  const float16_t * pSrcB,
        float16_t * pDst,
        uint32_t numSamples)
 {
    uint32_t blkCnt;                               /* Loop counter */
    float16_t a, b, c, d;  /* Temporary variables to store real and imaginary values */
 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
  /* Loop unrolling: Compute 4 outputs at a time */
  blkCnt = numSamples >> 2U;
  while (blkCnt > 0U)
  {
    /* C[2 * i    ] = A[2 * i] * B[2 * i    ] - A[2 * i + 1] * B[2 * i + 1]. */
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i    ]. */
    a = *pSrcA++;
    b = *pSrcA++;
    c = *pSrcB++;
    d = *pSrcB++;
    /* store result in destination buffer. */
    *pDst++ = (a * c) - (b * d);
    *pDst++ = (a * d) + (b * c);
    a = *pSrcA++;
    b = *pSrcA++;
    c = *pSrcB++;
    d = *pSrcB++;
    *pDst++ = (a * c) - (b * d);
    *pDst++ = (a * d) + (b * c);
    a = *pSrcA++;
    b = *pSrcA++;
    c = *pSrcB++;
    d = *pSrcB++;
    *pDst++ = (a * c) - (b * d);
    *pDst++ = (a * d) + (b * c);
    a = *pSrcA++;
    b = *pSrcA++;
    c = *pSrcB++;
    d = *pSrcB++;
    *pDst++ = (a * c) - (b * d);
    *pDst++ = (a * d) + (b * c);
    /* Decrement loop counter */
    blkCnt--;
  }
  /* Loop unrolling: Compute remaining outputs */
  blkCnt = numSamples % 0x4U;
 #else
  /* Initialize blkCnt with number of samples */
  blkCnt = numSamples;
 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  while (blkCnt > 0U)
  {
    /* C[2 * i    ] = A[2 * i] * B[2 * i    ] - A[2 * i + 1] * B[2 * i + 1]. */
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i    ]. */
    a = *pSrcA++;
    b = *pSrcA++;
    c = *pSrcB++;
    d = *pSrcB++;
    /* store result in destination buffer. */
    *pDst++ = (a * c) - (b * d);
    *pDst++ = (a * d) + (b * c);
    /* Decrement loop counter */
    blkCnt--;
  }
 }
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 /**
  @} end of CmplxByCmplxMult group
 */
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
--- a/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c
@ -0,0 +1,192 @@
 /* ----------------------------------------------------------------------
 * Project:      CMSIS DSP Library
 * Title:        arm_cmplx_mult_real_f16.c
 * Description:  Floating-point complex by real multiplication
 *
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
 * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "dsp/complex_math_functions_f16.h"
 #if defined(ARM_FLOAT16_SUPPORTED)
 /**
  @ingroup groupCmplxMath
 */
 /**
  @defgroup CmplxByRealMult Complex-by-Real Multiplication
  Multiplies a complex vector by a real vector and generates a complex result.
  The data in the complex arrays is stored in an interleaved fashion
  (real, imag, real, imag, ...).
  The parameter <code>numSamples</code> represents the number of complex
  samples processed.  The complex arrays have a total of <code>2*numSamples</code>
  real values while the real array has a total of <code>numSamples</code>
  real values.
  The underlying algorithm is used:
  <pre>
  for (n = 0; n < numSamples; n++) {
      pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n];
      pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n];
  }
  </pre>
  There are separate functions for floating-point, Q15, and Q31 data types.
 */
 /**
  @addtogroup CmplxByRealMult
  @{
 */
 /**
  @brief         Floating-point complex-by-real multiplication.
  @param[in]     pSrcCmplx   points to complex input vector
  @param[in]     pSrcReal    points to real input vector
  @param[out]    pCmplxDst   points to complex output vector
  @param[in]     numSamples  number of samples in each vector
  @return        none
 */
 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 void arm_cmplx_mult_real_f16(
  const float16_t * pSrcCmplx,
  const float16_t * pSrcReal,
        float16_t * pCmplxDst,
        uint32_t numSamples)
 {
    const static uint16_t stride_cmplx_x_real_16[8] = {
        0, 0, 1, 1, 2, 2, 3, 3
        };
    uint32_t blockSizeC = numSamples * CMPLX_DIM;   /* loop counters */
    uint32_t blkCnt;
    f16x8_t rVec;
    f16x8_t cmplxVec;
    f16x8_t dstVec;
    uint16x8_t strideVec;
    /* stride vector for pairs of real generation */
    strideVec = vld1q(stride_cmplx_x_real_16);
    /* Compute 4 complex outputs at a time */
    blkCnt = blockSizeC >> 3;
    while (blkCnt > 0U) 
    {
        cmplxVec = vld1q(pSrcCmplx);
        rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec);
        dstVec = vmulq(cmplxVec, rVec);
        vst1q(pCmplxDst, dstVec);
        pSrcReal += 4;
        pSrcCmplx += 8;
        pCmplxDst += 8;
        blkCnt--;
    }
    blkCnt = blockSizeC & 7;
    if (blkCnt > 0U) {
        mve_pred16_t p0 = vctp16q(blkCnt);
        cmplxVec = vld1q(pSrcCmplx);
        rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec);
        dstVec = vmulq(cmplxVec, rVec);
        vstrhq_p_f16(pCmplxDst, dstVec, p0);
    }
 }
 #else
 void arm_cmplx_mult_real_f16(
  const float16_t * pSrcCmplx,
  const float16_t * pSrcReal,
        float16_t * pCmplxDst,
        uint32_t numSamples)
 {
        uint32_t blkCnt;                               /* Loop counter */
        float16_t in;                                  /* Temporary variable */
 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
  /* Loop unrolling: Compute 4 outputs at a time */
  blkCnt = numSamples >> 2U;
  while (blkCnt > 0U)
  {
    /* C[2 * i    ] = A[2 * i    ] * B[i]. */
    /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
    in = *pSrcReal++;
    /* store result in destination buffer. */
    *pCmplxDst++ = *pSrcCmplx++ * in;
    *pCmplxDst++ = *pSrcCmplx++ * in;
    in = *pSrcReal++;
    *pCmplxDst++ = *pSrcCmplx++ * in;
    *pCmplxDst++ = *pSrcCmplx++ * in;
    in = *pSrcReal++;
    *pCmplxDst++ = *pSrcCmplx++ * in;
    *pCmplxDst++ = *pSrcCmplx++ * in;
    in = *pSrcReal++;
    *pCmplxDst++ = *pSrcCmplx++* in;
    *pCmplxDst++ = *pSrcCmplx++ * in;
    /* Decrement loop counter */
    blkCnt--;
  }
  /* Loop unrolling: Compute remaining outputs */
  blkCnt = numSamples % 0x4U;
 #else
  /* Initialize blkCnt with number of samples */
  blkCnt = numSamples;
 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
  while (blkCnt > 0U)
  {
    /* C[2 * i    ] = A[2 * i    ] * B[i]. */
    /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
    in = *pSrcReal++;
    /* store result in destination buffer. */
    *pCmplxDst++ = *pSrcCmplx++ * in;
    *pCmplxDst++ = *pSrcCmplx++ * in;
    /* Decrement loop counter */
    blkCnt--;
  }
 }
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 /**
  @} end of CmplxByRealMult group
 */
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
--- a/Testing/CMakeLists.txt
+++ b/Testing/CMakeLists.txt
@ -327,6 +327,7 @@ set(TESTSRC
 if ((NOT ARMAC5) AND (FLOAT16TESTS) AND ((FLOAT16) OR (MVEF) OR (HELIUM) OR (NEON) OR (NEONEXPERIMENTAL)))
 set(TESTSRC16 
  Source/Tests/BasicTestsF16.cpp
  Source/Tests/ComplexTestsF16.cpp
  Source/Tests/TransformCF16.cpp
  Source/Tests/TransformRF16.cpp
  )
--- a/Testing/Include/Tests/ComplexTestsF16.h
+++ b/Testing/Include/Tests/ComplexTestsF16.h
@ -0,0 +1,21 @@
 #include "Test.h"
 #include "Pattern.h"
 #include "dsp/complex_math_functions_f16.h"
 class ComplexTestsF16:public Client::Suite
    {
        public:
            ComplexTestsF16(Testing::testID_t id);
            virtual void setUp(Testing::testID_t,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr);
            virtual void tearDown(Testing::testID_t,Client::PatternMgr *mgr);
        private:
            #include "ComplexTestsF16_decl.h"
            Client::Pattern<float16_t> input1;
            Client::Pattern<float16_t> input2;
            Client::LocalPattern<float16_t> output;
            // Reference patterns are not loaded when we are in dump mode
            Client::RefPattern<float16_t> ref;
    };
--- a/Testing/PatternGeneration/ComplexMaths.py
+++ b/Testing/PatternGeneration/ComplexMaths.py
@ -105,11 +105,13 @@ def  generatePatterns():
     PARAMDIR = os.path.join("Parameters","DSP","ComplexMaths","ComplexMaths")
     configf32=Tools.Config(PATTERNDIR,PARAMDIR,"f32")
     configf16=Tools.Config(PATTERNDIR,PARAMDIR,"f16")
     configq31=Tools.Config(PATTERNDIR,PARAMDIR,"q31")
     configq15=Tools.Config(PATTERNDIR,PARAMDIR,"q15")
     writeTests(configf32,0)
     writeTests(configf16,16)
     writeTests(configq31,31)
     writeTests(configq15,15)
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Input1_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Input1_f16.txt
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Input2_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Input2_f16.txt
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Input3_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Input3_f16.txt
@ -0,0 +1,514 @@
 H
 256
 // 0.606399
 0x38da
 // 0.073125
 0x2cae
 // -0.140834
 0xb082
 // -0.900755
 0xbb35
 // 0.148560
 0x30c1
 // -0.261878
 0xb431
 // -0.545170
 0xb85d
 // -0.628326
 0xb907
 // 0.495511
 0x37ee
 // 0.063174
 0x2c0b
 // -0.169679
 0xb16e
 // 0.145953
 0x30ac
 // 0.496673
 0x37f2
 // 1.000000
 0x3c00
 // 0.131807
 0x3038
 // 0.036478
 0x28ab
 // 0.427452
 0x36d7
 // -0.526852
 0xb837
 // -0.153413
 0xb0e9
 // -0.180253
 0xb1c5
 // 0.192358
 0x3228
 // 0.534224
 0x3846
 // -0.143569
 0xb098
 // -0.378102
 0xb60d
 // -0.387182
 0xb632
 // -0.181926
 0xb1d2
 // 0.627082
 0x3904
 // -0.782546
 0xba43
 // 0.227872
 0x334b
 // -0.920057
 0xbb5c
 // 0.028790
 0x275f
 // -0.002111
 0x9853
 // -0.448033
 0xb72b
 // 0.259638
 0x3427
 // -0.284893
 0xb48f
 // -0.172468
 0xb185
 // 0.304282
 0x34de
 // 0.130491
 0x302d
 // -0.206776
 0xb29e
 // 0.384068
 0x3625
 // 0.208473
 0x32ac
 // -0.097562
 0xae3e
 // 0.121821
 0x2fcc
 // -0.611990
 0xb8e5
 // -0.062335
 0xabfb
 // 0.614710
 0x38eb
 // 0.438926
 0x3706
 // 0.195912
 0x3245
 // 0.081082
 0x2d30
 // 0.012720
 0x2283
 // 0.258657
 0x3423
 // -0.268801
 0xb44d
 // -0.183050
 0xb1dc
 // 0.087615
 0x2d9b
 // -0.427434
 0xb6d7
 // 0.301905
 0x34d5
 // -0.236141
 0xb38e
 // 0.188230
 0x3206
 // -0.089373
 0xadb8
 // -0.294443
 0xb4b6
 // 0.506253
 0x380d
 // 0.841140
 0x3abb
 // 0.104046
 0x2ea9
 // 0.285712
 0x3492
 // -0.725796
 0xb9ce
 // 0.649891
 0x3933
 // 0.663543
 0x394f
 // -0.062934
 0xac07
 // -0.387710
 0xb634
 // 0.531890
 0x3841
 // 0.277675
 0x3471
 // 0.319026
 0x351b
 // 0.072518
 0x2ca4
 // 0.637432
 0x3919
 // -0.316837
 0xb512
 // 0.031885
 0x2815
 // -0.109017
 0xaefa
 // -0.531561
 0xb841
 // -0.116513
 0xaf75
 // 0.005209
 0x1d56
 // -0.178215
 0xb1b4
 // 0.096452
 0x2e2c
 // -0.089155
 0xadb5
 // -0.193966
 0xb235
 // -0.318435
 0xb518
 // -0.137340
 0xb065
 // 0.325371
 0x3535
 // -0.413025
 0xb69c
 // -0.007530
 0x9fb6
 // -0.011499
 0xa1e3
 // -0.359144
 0xb5bf
 // -0.317911
 0xb516
 // 0.018568
 0x24c1
 // -0.366742
 0xb5de
 // 0.221775
 0x3319
 // 0.420017
 0x36b8
 // -0.040939
 0xa93d
 // -0.362740
 0xb5ce
 // 0.471219
 0x378a
 // -0.099621
 0xae60
 // 0.326052
 0x3538
 // 0.311047
 0x34fa
 // 0.156421
 0x3101
 // -0.068640
 0xac65
 // -0.095922
 0xae24
 // -0.013959
 0xa326
 // 0.607825
 0x38dd
 // 0.093427
 0x2dfb
 // 0.329927
 0x3547
 // 0.084851
 0x2d6e
 // 0.726006
 0x39cf
 // 0.248801
 0x33f6
 // 0.620044
 0x38f6
 // 0.164699
 0x3145
 // 0.045769
 0x29dc
 // 0.264445
 0x343b
 // -0.238087
 0xb39e
 // 0.220883
 0x3311
 // 0.018551
 0x24c0
 // -0.057134
 0xab50
 // -0.155357
 0xb0f9
 // 0.038525
 0x28ee
 // 0.208492
 0x32ac
 // -0.098819
 0xae53
 // 0.027140
 0x26f3
 // -0.333135
 0xb555
 // -0.423594
 0xb6c7
 // 0.521231
 0x382b
 // 0.406043
 0x367f
 // -0.457325
 0xb751
 // 0.089960
 0x2dc2
 // -0.107212
 0xaedd
 // 0.089652
 0x2dbd
 // -0.269460
 0xb450
 // 0.155036
 0x30f6
 // 0.024048
 0x2628
 // 0.220735
 0x3310
 // 0.032031
 0x281a
 // -0.567049
 0xb889
 // 0.145897
 0x30ab
 // -0.094783
 0xae11
 // 0.319032
 0x351b
 // -0.091891
 0xade2
 // 0.416962
 0x36ac
 // 0.093970
 0x2e04
 // 0.564895
 0x3885
 // -0.296964
 0xb4c0
 // -0.209322
 0xb2b3
 // 0.265009
 0x343d
 // 0.093215
 0x2df7
 // 0.622832
 0x38fc
 // -0.085788
 0xad7e
 // 0.670554
 0x395d
 // 0.032468
 0x2828
 // 0.118023
 0x2f8e
 // -0.269207
 0xb44f
 // 0.217617
 0x32f7
 // 0.213691
 0x32d7
 // 0.439040
 0x3706
 // 0.241885
 0x33be
 // -0.424515
 0xb6cb
 // 0.352380
 0x35a3
 // 0.588583
 0x38b5
 // -0.264797
 0xb43d
 // 0.329184
 0x3544
 // 0.034001
 0x285a
 // -0.423064
 0xb6c5
 // -0.608316
 0xb8de
 // -0.338928
 0xb56c
 // 0.419995
 0x36b8
 // 0.200555
 0x326b
 // 0.329638
 0x3546
 // -0.294240
 0xb4b5
 // -0.897858
 0xbb2f
 // 0.160219
 0x3121
 // 0.131756
 0x3037
 // 0.206411
 0x329b
 // 0.109237
 0x2efe
 // -0.367268
 0xb5e0
 // 0.292430
 0x34ae
 // -0.414400
 0xb6a1
 // -0.642448
 0xb924
 // 0.238399
 0x33a1
 // 0.090387
 0x2dc9
 // -0.512754
 0xb81a
 // 0.301373
 0x34d2
 // -0.466867
 0xb778
 // 0.204287
 0x328a
 // -0.229499
 0xb358
 // -0.119896
 0xafac
 // 0.440248
 0x370b
 // 0.649995
 0x3933
 // 0.129477
 0x3025
 // 0.241037
 0x33b7
 // -0.411964
 0xb697
 // 0.228133
 0x334d
 // 0.942283
 0x3b8a
 // -0.390976
 0xb641
 // 0.182779
 0x31d9
 // 0.228995
 0x3354
 // 0.126382
 0x300b
 // 0.225140
 0x3334
 // -0.214251
 0xb2db
 // 0.439711
 0x3709
 // -0.638072
 0xb91b
 // -0.667301
 0xb957
 // -0.353387
 0xb5a7
 // 0.329438
 0x3545
 // -0.543036
 0xb858
 // -0.195706
 0xb243
 // -0.000314
 0x8d26
 // -0.346311
 0xb58a
 // -0.040030
 0xa920
 // 0.309919
 0x34f5
 // 0.214685
 0x32df
 // -0.256227
 0xb41a
 // 0.256241
 0x341a
 // 0.423187
 0x36c5
 // -0.070894
 0xac8a
 // -0.408192
 0xb688
 // 0.258732
 0x3424
 // 0.743039
 0x39f2
 // -0.328534
 0xb542
 // -0.502412
 0xb805
 // -0.550943
 0xb868
 // 0.461636
 0x3763
 // -0.098335
 0xae4b
 // -0.331961
 0xb550
 // 0.502005
 0x3804
 // -0.060550
 0xabc0
 // -0.218616
 0xb2ff
 // 0.206607
 0x329d
 // 0.509390
 0x3813
 // 0.331278
 0x354d
 // -0.143708
 0xb099
 // 0.008236
 0x2038
 // -0.256486
 0xb41b
 // -0.154828
 0xb0f4
 // -0.606731
 0xb8db
 // 0.043363
 0x298d
 // 0.416313
 0x36a9
 // 0.132691
 0x303f
 // 0.716789
 0x39bc
 // 0.827380
 0x3a9e
 // 0.109746
 0x2f06
 // 0.480993
 0x37b2
 // -0.424777
 0xb6cc
 // -0.169704
 0xb16e
 // -0.095902
 0xae23
 // 0.022081
 0x25a7
 // -0.227175
 0xb345
 // 0.382023
 0x361d
 // 0.316215
 0x350f
 // -0.027787
 0xa71d
 // 0.107868
 0x2ee7
 // -0.091834
 0xade1
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference1_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference1_f16.txt
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference2_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference2_f16.txt
@ -0,0 +1,6 @@
 H
 2
 // -0.584459
 0xb8ad
 // 0.027514
 0x270b
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference3_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference3_f16.txt
@ -0,0 +1,6 @@
 H
 2
 // -1.063155
 0xbc41
 // 0.204536
 0x328c
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference4_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference4_f16.txt
@ -0,0 +1,6 @@
 H
 2
 // -2.020148
 0xc00a
 // -0.083691
 0xad5b
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference5_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference5_f16.txt
@ -0,0 +1,514 @@
 H
 256
 // 0.211657
 0x32c6
 // 0.164180
 0x3141
 // 0.394092
 0x364e
 // 0.837031
 0x3ab2
 // 0.203042
 0x327f
 // 0.594290
 0x38c1
 // 0.425438
 0x36cf
 // 0.692208
 0x398a
 // 0.478904
 0x37aa
 // 0.731179
 0x39d9
 // 0.228972
 0x3354
 // 0.423292
 0x36c6
 // 0.687425
 0x3980
 // 0.637676
 0x391a
 // 0.400567
 0x3669
 // 0.284900
 0x348f
 // 0.800826
 0x3a68
 // 0.530918
 0x383f
 // 0.899030
 0x3b31
 // 0.067538
 0x2c53
 // 0.418694
 0x36b3
 // 0.422016
 0x36c1
 // 0.600117
 0x38cd
 // 0.096654
 0x2e30
 // 0.382849
 0x3620
 // 0.553403
 0x386d
 // 0.109789
 0x2f07
 // 0.740600
 0x39ed
 // 0.830944
 0x3aa6
 // 0.426594
 0x36d3
 // 0.281352
 0x3480
 // 0.270642
 0x3455
 // 0.354207
 0x35ab
 // 1.046783
 0x3c30
 // 0.937881
 0x3b81
 // 0.476101
 0x379e
 // 0.369946
 0x35eb
 // 0.334837
 0x355b
 // 0.319491
 0x351d
 // 0.119662
 0x2fa9
 // 0.664848
 0x3952
 // 0.641369
 0x3922
 // 0.414986
 0x36a4
 // 0.824391
 0x3a98
 // 0.404576
 0x3679
 // 0.283363
 0x3489
 // 0.467747
 0x377c
 // 0.467828
 0x377c
 // 0.140330
 0x307e
 // 0.402280
 0x3670
 // 1.003391
 0x3c03
 // 0.144475
 0x30a0
 // 0.461048
 0x3760
 // 0.468613
 0x377f
 // 0.609266
 0x38e0
 // 0.560596
 0x387c
 // 0.410914
 0x3693
 // 0.196930
 0x324d
 // 0.256623
 0x341b
 // 0.435163
 0x36f6
 // 0.324492
 0x3531
 // 0.371618
 0x35f2
 // 0.218667
 0x32ff
 // 0.324592
 0x3532
 // 0.095539
 0x2e1d
 // 0.268452
 0x344c
 // 0.399025
 0x3662
 // 0.410560
 0x3692
 // 0.535278
 0x3848
 // 0.300637
 0x34cf
 // 0.413966
 0x36a0
 // 0.263911
 0x3439
 // 0.227363
 0x3347
 // 0.245900
 0x33de
 // 0.444928
 0x371e
 // 0.486777
 0x37ca
 // 0.388128
 0x3636
 // 0.303929
 0x34dd
 // 0.320661
 0x3521
 // 0.225175
 0x3335
 // 0.817364
 0x3a8a
 // 0.479373
 0x37ac
 // 0.857010
 0x3adb
 // 0.356348
 0x35b4
 // 0.134285
 0x304c
 // 0.324278
 0x3530
 // 0.568790
 0x388d
 // 0.302070
 0x34d5
 // 0.224479
 0x332f
 // 0.394179
 0x364f
 // 0.577446
 0x389f
 // 0.250274
 0x3401
 // 0.531677
 0x3841
 // 0.424735
 0x36cc
 // 0.653466
 0x393a
 // 0.316197
 0x350f
 // 0.227789
 0x334a
 // 0.184250
 0x31e5
 // 0.090642
 0x2dcd
 // 0.462727
 0x3767
 // 0.344118
 0x3582
 // 0.245310
 0x33da
 // 0.100303
 0x2e6b
 // 0.447675
 0x372a
 // 0.691568
 0x3988
 // 0.363492
 0x35d1
 // 0.459596
 0x375b
 // 0.559206
 0x3879
 // 0.119232
 0x2fa1
 // 0.691377
 0x3988
 // 0.423896
 0x36c8
 // 0.092387
 0x2dea
 // 0.016503
 0x243a
 // 0.389855
 0x363d
 // 0.310975
 0x34fa
 // 0.508563
 0x3812
 // 0.446708
 0x3726
 // 0.292022
 0x34ac
 // 0.280445
 0x347d
 // 0.371688
 0x35f2
 // 0.231185
 0x3366
 // 0.468586
 0x377f
 // 0.037421
 0x28ca
 // 0.483864
 0x37be
 // 0.434319
 0x36f3
 // 0.164206
 0x3141
 // 0.584744
 0x38ae
 // 0.264999
 0x343d
 // 0.371643
 0x35f2
 // 0.662050
 0x394c
 // 0.566702
 0x3889
 // 0.496748
 0x37f3
 // 0.529369
 0x383c
 // 0.397883
 0x365e
 // 0.503458
 0x3807
 // 0.154150
 0x30ef
 // 0.435808
 0x36f9
 // 0.939636
 0x3b84
 // 0.347287
 0x358e
 // 0.202447
 0x327a
 // 0.834662
 0x3aad
 // 0.522830
 0x382f
 // 0.539743
 0x3851
 // 0.390797
 0x3641
 // 0.269937
 0x3452
 // 0.528511
 0x383a
 // 0.142120
 0x308c
 // 0.507329
 0x380f
 // 0.206810
 0x329e
 // 0.793935
 0x3a5a
 // 0.835657
 0x3aaf
 // 0.670280
 0x395d
 // 0.113984
 0x2f4c
 // 0.472458
 0x378f
 // 0.575082
 0x389a
 // 0.732740
 0x39dd
 // 0.591795
 0x38bc
 // 0.546775
 0x3860
 // 0.296527
 0x34bf
 // 0.261418
 0x342f
 // 0.571263
 0x3892
 // 0.589192
 0x38b7
 // 0.822382
 0x3a94
 // 0.361338
 0x35c8
 // 0.153735
 0x30eb
 // 0.466681
 0x3778
 // 0.393489
 0x364c
 // 0.721908
 0x39c6
 // 0.086062
 0x2d82
 // 0.331373
 0x354d
 // 0.621595
 0x38f9
 // 0.516218
 0x3821
 // 0.771446
 0x3a2c
 // 0.420211
 0x36b9
 // 0.422246
 0x36c2
 // 0.338301
 0x356a
 // 0.745863
 0x39f8
 // 0.650429
 0x3934
 // 0.550667
 0x3868
 // 0.412579
 0x369a
 // 0.288576
 0x349e
 // 0.353537
 0x35a8
 // 0.225252
 0x3335
 // 0.194284
 0x3238
 // 0.547930
 0x3862
 // 0.300970
 0x34d1
 // 0.504146
 0x3808
 // 1.026896
 0x3c1c
 // 0.418343
 0x36b2
 // 0.627205
 0x3905
 // 0.230780
 0x3363
 // 0.145178
 0x30a5
 // 0.384564
 0x3627
 // 0.278455
 0x3475
 // 0.341747
 0x3578
 // 0.448953
 0x372f
 // 0.233506
 0x3379
 // 0.229044
 0x3354
 // 0.261151
 0x342e
 // 0.582315
 0x38a9
 // 0.214435
 0x32dd
 // 0.195282
 0x3240
 // 0.422011
 0x36c1
 // 1.204601
 0x3cd2
 // 0.681809
 0x3974
 // 0.108645
 0x2ef4
 // 0.054477
 0x2af9
 // 0.269794
 0x3451
 // 0.138462
 0x306e
 // 0.381523
 0x361b
 // 0.225135
 0x3334
 // 0.204132
 0x3288
 // 0.285641
 0x3492
 // 0.521935
 0x382d
 // 0.670786
 0x395e
 // 0.601863
 0x38d1
 // 0.117244
 0x2f81
 // 0.804322
 0x3a6f
 // 0.522114
 0x382d
 // 0.456285
 0x374d
 // 0.325889
 0x3537
 // 0.052211
 0x2aaf
 // 0.471914
 0x378d
 // 0.620330
 0x38f6
 // 0.183729
 0x31e1
 // 0.612658
 0x38e7
 // 0.364133
 0x35d3
 // 0.520880
 0x382b
 // 0.693829
 0x398d
 // 0.146768
 0x30b2
 // 0.350727
 0x359d
 // 0.572416
 0x3894
 // 0.145950
 0x30ac
 // 0.659216
 0x3946
 // 0.232441
 0x3370
 // 0.365908
 0x35db
 // 0.533640
 0x3845
 // 0.578456
 0x38a1
 // 0.166925
 0x3157
 // 0.439703
 0x3709
 // 0.427486
 0x36d7
 // 0.461784
 0x3763
 // 0.292177
 0x34ad
 // 0.461312
 0x3762
 // 0.543533
 0x3859
 // 0.435610
 0x36f8
 // 0.237356
 0x3398
 // 0.366056
 0x35db
 // 0.196488
 0x324a
 // 0.022935
 0x25df
 // 0.359284
 0x35c0
 // 0.409479
 0x368d
 // 0.228371
 0x334f
 // 0.124286
 0x2ff4
 // 0.434761
 0x36f5
 // 0.629995
 0x390a
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference6_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference6_f16.txt
@ -0,0 +1,514 @@
 H
 256
 // 0.044799
 0x29bc
 // 0.026955
 0x26e7
 // 0.155308
 0x30f8
 // 0.700622
 0x399b
 // 0.041226
 0x2947
 // 0.353181
 0x35a7
 // 0.180997
 0x31cb
 // 0.479153
 0x37ab
 // 0.229349
 0x3357
 // 0.534623
 0x3847
 // 0.052428
 0x2ab6
 // 0.179176
 0x31bc
 // 0.472553
 0x3790
 // 0.406630
 0x3682
 // 0.160454
 0x3122
 // 0.081168
 0x2d32
 // 0.641322
 0x3921
 // 0.281874
 0x3483
 // 0.808254
 0x3a77
 // 0.004561
 0x1cac
 // 0.175304
 0x319c
 // 0.178098
 0x31b3
 // 0.360140
 0x35c3
 // 0.009342
 0x20c8
 // 0.146573
 0x30b1
 // 0.306255
 0x34e6
 // 0.012054
 0x222c
 // 0.548488
 0x3863
 // 0.690467
 0x3986
 // 0.181983
 0x31d3
 // 0.079159
 0x2d11
 // 0.073247
 0x2cb0
 // 0.125462
 0x3004
 // 1.095754
 0x3c62
 // 0.879620
 0x3b09
 // 0.226672
 0x3341
 // 0.136860
 0x3061
 // 0.112116
 0x2f2d
 // 0.102075
 0x2e88
 // 0.014319
 0x2355
 // 0.442023
 0x3713
 // 0.411355
 0x3695
 // 0.172214
 0x3183
 // 0.679620
 0x3970
 // 0.163681
 0x313d
 // 0.080295
 0x2d24
 // 0.218787
 0x3300
 // 0.218863
 0x3301
 // 0.019693
 0x250b
 // 0.161829
 0x312e
 // 1.006794
 0x3c07
 // 0.020873
 0x2558
 // 0.212565
 0x32cd
 // 0.219598
 0x3307
 // 0.371205
 0x35f0
 // 0.314268
 0x3507
 // 0.168850
 0x3167
 // 0.038782
 0x28f7
 // 0.065855
 0x2c37
 // 0.189367
 0x320f
 // 0.105295
 0x2ebd
 // 0.138100
 0x306b
 // 0.047815
 0x2a1f
 // 0.105360
 0x2ebe
 // 0.009128
 0x20ac
 // 0.072067
 0x2c9d
 // 0.159221
 0x3118
 // 0.168559
 0x3165
 // 0.286522
 0x3496
 // 0.090382
 0x2dc9
 // 0.171368
 0x317c
 // 0.069649
 0x2c75
 // 0.051694
 0x2a9e
 // 0.060467
 0x2bbd
 // 0.197961
 0x3256
 // 0.236952
 0x3395
 // 0.150644
 0x30d2
 // 0.092373
 0x2de9
 // 0.102823
 0x2e95
 // 0.050704
 0x2a7d
 // 0.668084
 0x3958
 // 0.229798
 0x335b
 // 0.734466
 0x39e0
 // 0.126984
 0x3010
 // 0.018033
 0x249e
 // 0.105157
 0x2ebb
 // 0.323522
 0x352d
 // 0.091246
 0x2dd7
 // 0.050391
 0x2a73
 // 0.155377
 0x30f9
 // 0.333444
 0x3556
 // 0.062637
 0x2c02
 // 0.282681
 0x3486
 // 0.180400
 0x31c6
 // 0.427018
 0x36d5
 // 0.099980
 0x2e66
 // 0.051888
 0x2aa4
 // 0.033948
 0x2858
 // 0.008216
 0x2035
 // 0.214116
 0x32da
 // 0.118417
 0x2f94
 // 0.060177
 0x2bb4
 // 0.010061
 0x2127
 // 0.200412
 0x326a
 // 0.478267
 0x37a7
 // 0.132127
 0x303a
 // 0.211228
 0x32c2
 // 0.312711
 0x3501
 // 0.014216
 0x2347
 // 0.478003
 0x37a6
 // 0.179688
 0x31c0
 // 0.008535
 0x205f
 // 0.000272
 0xc76
 // 0.151987
 0x30dd
 // 0.096705
 0x2e30
 // 0.258636
 0x3423
 // 0.199548
 0x3263
 // 0.085277
 0x2d75
 // 0.078650
 0x2d09
 // 0.138152
 0x306c
 // 0.053447
 0x2ad7
 // 0.219573
 0x3307
 // 0.001400
 0x15bc
 // 0.234124
 0x337e
 // 0.188633
 0x3209
 // 0.026963
 0x26e7
 // 0.341926
 0x3579
 // 0.070225
 0x2c7f
 // 0.138118
 0x306b
 // 0.438310
 0x3703
 // 0.321151
 0x3523
 // 0.246759
 0x33e5
 // 0.280232
 0x347c
 // 0.158311
 0x3111
 // 0.253470
 0x340e
 // 0.023762
 0x2615
 // 0.189928
 0x3214
 // 0.882917
 0x3b10
 // 0.120608
 0x2fb8
 // 0.040985
 0x293f
 // 0.696661
 0x3993
 // 0.273351
 0x3460
 // 0.291322
 0x34a9
 // 0.152722
 0x30e3
 // 0.072866
 0x2caa
 // 0.279324
 0x3478
 // 0.020198
 0x252c
 // 0.257383
 0x341e
 // 0.042770
 0x2979
 // 0.630333
 0x390b
 // 0.698322
 0x3996
 // 0.449275
 0x3730
 // 0.012992
 0x22a7
 // 0.223217
 0x3325
 // 0.330719
 0x354b
 // 0.536907
 0x384c
 // 0.350221
 0x359b
 // 0.298963
 0x34c9
 // 0.087928
 0x2da1
 // 0.068340
 0x2c60
 // 0.326342
 0x3539
 // 0.347147
 0x358e
 // 0.676312
 0x3969
 // 0.130565
 0x302e
 // 0.023634
 0x260d
 // 0.217791
 0x32f8
 // 0.154834
 0x30f4
 // 0.521151
 0x382b
 // 0.007407
 0x1f96
 // 0.109808
 0x2f07
 // 0.386380
 0x362f
 // 0.266481
 0x3444
 // 0.595129
 0x38c3
 // 0.176577
 0x31a7
 // 0.178291
 0x31b5
 // 0.114447
 0x2f53
 // 0.556312
 0x3873
 // 0.423058
 0x36c5
 // 0.303234
 0x34da
 // 0.170221
 0x3172
 // 0.083276
 0x2d54
 // 0.124988
 0x3000
 // 0.050738
 0x2a7f
 // 0.037746
 0x28d5
 // 0.300227
 0x34ce
 // 0.090583
 0x2dcc
 // 0.254164
 0x3411
 // 1.054516
 0x3c38
 // 0.175011
 0x319a
 // 0.393386
 0x364b
 // 0.053260
 0x2ad1
 // 0.021077
 0x2565
 // 0.147889
 0x30bc
 // 0.077537
 0x2cf6
 // 0.116791
 0x2f7a
 // 0.201558
 0x3273
 // 0.054525
 0x2afb
 // 0.052461
 0x2ab7
 // 0.068200
 0x2c5d
 // 0.339091
 0x356d
 // 0.045982
 0x29e3
 // 0.038135
 0x28e2
 // 0.178093
 0x31b3
 // 1.451062
 0x3dce
 // 0.464864
 0x3770
 // 0.011804
 0x220b
 // 0.002968
 0x1a14
 // 0.072789
 0x2ca9
 // 0.019172
 0x24e8
 // 0.145560
 0x30a8
 // 0.050686
 0x2a7d
 // 0.041670
 0x2955
 // 0.081591
 0x2d39
 // 0.272416
 0x345c
 // 0.449953
 0x3733
 // 0.362239
 0x35cc
 // 0.013746
 0x230a
 // 0.646934
 0x392d
 // 0.272603
 0x345d
 // 0.208196
 0x32aa
 // 0.106203
 0x2ecc
 // 0.002726
 0x1995
 // 0.222703
 0x3320
 // 0.384809
 0x3628
 // 0.033756
 0x2852
 // 0.375349
 0x3601
 // 0.132593
 0x303e
 // 0.271316
 0x3457
 // 0.481399
 0x37b4
 // 0.021541
 0x2584
 // 0.123010
 0x2fdf
 // 0.327660
 0x353e
 // 0.021302
 0x2574
 // 0.434565
 0x36f4
 // 0.054029
 0x2aea
 // 0.133889
 0x3049
 // 0.284771
 0x348e
 // 0.334612
 0x355b
 // 0.027864
 0x2722
 // 0.193338
 0x3230
 // 0.182744
 0x31d9
 // 0.213245
 0x32d3
 // 0.085367
 0x2d77
 // 0.212809
 0x32cf
 // 0.295428
 0x34ba
 // 0.189756
 0x3212
 // 0.056338
 0x2b36
 // 0.133997
 0x304a
 // 0.038607
 0x28f1
 // 0.000526
 0x104f
 // 0.129085
 0x3021
 // 0.167673
 0x315e
 // 0.052153
 0x2aad
 // 0.015447
 0x23e9
 // 0.189017
 0x320c
 // 0.396894
 0x365a
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference7_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference7_f16.txt
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference8_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference8_f16.txt
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference9_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference9_f16.txt
@ -0,0 +1,6 @@
 H
 2
 // -0.368091
 0xb5e4
 // -0.861249
 0xbae4
--- a/Testing/Source/Tests/ComplexTestsF16.cpp
+++ b/Testing/Source/Tests/ComplexTestsF16.cpp
@ -0,0 +1,308 @@
 #include "ComplexTestsF16.h"
 #include <stdio.h>
 #include "Error.h"
 #define SNR_THRESHOLD 40
 #define REL_ERROR (6.0e-2)
    void ComplexTestsF16::test_cmplx_conj_f16()
    {
        const float16_t *inp1=input1.ptr();
        float16_t *outp=output.ptr();
        arm_cmplx_conj_f16(inp1,outp,input1.nbSamples() >> 1 );
        ASSERT_EMPTY_TAIL(output);
        ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
        ASSERT_REL_ERROR(output,ref,REL_ERROR);
    } 
    void ComplexTestsF16::test_cmplx_dot_prod_f16()
    {
        float16_t re,im;
        const float16_t *inp1=input1.ptr();
        const float16_t *inp2=input2.ptr();
        float16_t *outp=output.ptr();
        arm_cmplx_dot_prod_f16(inp1,inp2,input1.nbSamples() >> 1,&re,&im);
        outp[0] = re;
        outp[1] = im;
        ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
        ASSERT_REL_ERROR(output,ref,REL_ERROR);
        ASSERT_EMPTY_TAIL(output);
    } 
    void ComplexTestsF16::test_cmplx_mag_f16()
    {
        const float16_t *inp1=input1.ptr();
        float16_t *outp=output.ptr();
        arm_cmplx_mag_f16(inp1,outp,input1.nbSamples() >> 1 );
        ASSERT_EMPTY_TAIL(output);
        ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
        ASSERT_REL_ERROR(output,ref,REL_ERROR);
    } 
    void ComplexTestsF16::test_cmplx_mag_squared_f16()
    {
        const float16_t *inp1=input1.ptr();
        float16_t *outp=output.ptr();
        arm_cmplx_mag_squared_f16(inp1,outp,input1.nbSamples() >> 1 );
        ASSERT_EMPTY_TAIL(output);
        ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
        ASSERT_REL_ERROR(output,ref,REL_ERROR);
    } 
    void ComplexTestsF16::test_cmplx_mult_cmplx_f16()
    {
        const float16_t *inp1=input1.ptr();
        const float16_t *inp2=input2.ptr();
        float16_t *outp=output.ptr();
        arm_cmplx_mult_cmplx_f16(inp1,inp2,outp,input1.nbSamples() >> 1 );
        ASSERT_EMPTY_TAIL(output);
        ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
        ASSERT_REL_ERROR(output,ref,REL_ERROR);
    } 
    void ComplexTestsF16::test_cmplx_mult_real_f16()
    {
        const float16_t *inp1=input1.ptr();
        const float16_t *inp2=input2.ptr();
        float16_t *outp=output.ptr();
        arm_cmplx_mult_real_f16(inp1,inp2,outp,input1.nbSamples() >> 1 );
        ASSERT_EMPTY_TAIL(output);
        ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
        ASSERT_REL_ERROR(output,ref,REL_ERROR);
    } 
    void ComplexTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr)
    {
       Testing::nbSamples_t nb=MAX_NB_SAMPLES; 
       (void)params;
       switch(id)
       {
        case ComplexTestsF16::TEST_CMPLX_CONJ_F16_1:
          nb = 7;
          ref.reload(ComplexTestsF16::REF_CONJ_F16_ID,mgr,nb << 1);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_CONJ_F16_2:
          nb = 16;
          ref.reload(ComplexTestsF16::REF_CONJ_F16_ID,mgr,nb << 1);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_CONJ_F16_3:
          nb = 23;
          ref.reload(ComplexTestsF16::REF_CONJ_F16_ID,mgr,nb << 1);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_DOT_PROD_F16_4:
          nb = 7;
          ref.reload(ComplexTestsF16::REF_DOT_PROD_3_F16_ID,mgr);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_DOT_PROD_F16_5:
          nb = 16;
          ref.reload(ComplexTestsF16::REF_DOT_PROD_4N_F16_ID,mgr);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_DOT_PROD_F16_6:
          nb = 23;
          ref.reload(ComplexTestsF16::REF_DOT_PROD_4N1_F16_ID,mgr);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_MAG_F16_7:
          nb = 7;
          ref.reload(ComplexTestsF16::REF_MAG_F16_ID,mgr,nb);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_MAG_F16_8:
          nb = 16;
          ref.reload(ComplexTestsF16::REF_MAG_F16_ID,mgr,nb);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_MAG_F16_9:
          nb = 23;
          ref.reload(ComplexTestsF16::REF_MAG_F16_ID,mgr,nb);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_MAG_SQUARED_F16_10:
          nb = 7;
          ref.reload(ComplexTestsF16::REF_MAG_SQUARED_F16_ID,mgr,nb);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_MAG_SQUARED_F16_11:
          nb = 16;
          ref.reload(ComplexTestsF16::REF_MAG_SQUARED_F16_ID,mgr,nb);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_MAG_SQUARED_F16_12:
          nb = 23;
          ref.reload(ComplexTestsF16::REF_MAG_SQUARED_F16_ID,mgr,nb);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_MULT_CMPLX_F16_13:
          nb = 7;
          ref.reload(ComplexTestsF16::REF_CMPLX_MULT_CMPLX_F16_ID,mgr,nb << 1);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_MULT_CMPLX_F16_14:
          nb = 16;
          ref.reload(ComplexTestsF16::REF_CMPLX_MULT_CMPLX_F16_ID,mgr,nb << 1);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_MULT_CMPLX_F16_15:
          nb = 23;
          ref.reload(ComplexTestsF16::REF_CMPLX_MULT_CMPLX_F16_ID,mgr,nb << 1);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_MULT_REAL_F16_16:
          nb = 7;
          ref.reload(ComplexTestsF16::REF_CMPLX_MULT_REAL_F16_ID,mgr,nb << 1);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          input2.reload(ComplexTestsF16::INPUT3_F16_ID,mgr,nb);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_MULT_REAL_F16_17:
          nb = 16;
          ref.reload(ComplexTestsF16::REF_CMPLX_MULT_REAL_F16_ID,mgr,nb << 1);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          input2.reload(ComplexTestsF16::INPUT3_F16_ID,mgr,nb);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_MULT_REAL_F16_18:
          nb = 23;
          ref.reload(ComplexTestsF16::REF_CMPLX_MULT_REAL_F16_ID,mgr,nb << 1);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          input2.reload(ComplexTestsF16::INPUT3_F16_ID,mgr,nb);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
          break;
        case ComplexTestsF16::TEST_CMPLX_CONJ_F16_19:
          ref.reload(ComplexTestsF16::REF_CONJ_F16_ID,mgr,nb << 1);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
        break;
        case ComplexTestsF16::TEST_CMPLX_DOT_PROD_F16_20:
          ref.reload(ComplexTestsF16::REF_DOT_PROD_LONG_F16_ID,mgr);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
        break;
        case ComplexTestsF16::TEST_CMPLX_MAG_F16_21:
          ref.reload(ComplexTestsF16::REF_MAG_F16_ID,mgr,nb);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
        break;
        case ComplexTestsF16::TEST_CMPLX_MAG_SQUARED_F16_22:
          ref.reload(ComplexTestsF16::REF_MAG_SQUARED_F16_ID,mgr,nb);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
        break;
        case ComplexTestsF16::TEST_CMPLX_MULT_CMPLX_F16_23:
          ref.reload(ComplexTestsF16::REF_CMPLX_MULT_CMPLX_F16_ID,mgr,nb << 1);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
        break;
        case ComplexTestsF16::TEST_CMPLX_MULT_REAL_F16_24:
          ref.reload(ComplexTestsF16::REF_CMPLX_MULT_REAL_F16_ID,mgr,nb << 1);
          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
          input2.reload(ComplexTestsF16::INPUT3_F16_ID,mgr,nb);
          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
        break;
       }
    }
    void ComplexTestsF16::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)
    {
        (void)id;
        output.dump(mgr);
    }
--- a/Testing/desc_f16.txt
+++ b/Testing/desc_f16.txt
@ -77,6 +77,69 @@ group Root {
        }
        group Complex Tests {
           class = ComplexTests
           folder = ComplexMaths
           suite Complex Tests F16{
              class = ComplexTestsF16
              folder = ComplexMathsF16
              Pattern INPUT1_F16_ID : Input1_f16.txt 
              Pattern INPUT2_F16_ID : Input2_f16.txt 
              Pattern INPUT3_F16_ID : Input3_f16.txt 
              Pattern REF_CONJ_F16_ID : Reference1_f16.txt
              Pattern REF_DOT_PROD_3_F16_ID : Reference2_f16.txt
              Pattern REF_DOT_PROD_4N_F16_ID : Reference3_f16.txt
              Pattern REF_DOT_PROD_4N1_F16_ID : Reference4_f16.txt
              Pattern REF_MAG_F16_ID : Reference5_f16.txt
              Pattern REF_MAG_SQUARED_F16_ID : Reference6_f16.txt
              Pattern REF_CMPLX_MULT_CMPLX_F16_ID : Reference7_f16.txt
              Pattern REF_CMPLX_MULT_REAL_F16_ID : Reference8_f16.txt
              Pattern REF_DOT_PROD_LONG_F16_ID : Reference9_f16.txt
              Output  OUT_SAMPLES_F16_ID : Output
              Output  OUT_STATE_F16_ID : State
              Functions {
                Test nb=3    arm_cmplx_conj_f16:test_cmplx_conj_f16
                Test nb=4n   arm_cmplx_conj_f16:test_cmplx_conj_f16
                Test nb=4n+1 arm_cmplx_conj_f16:test_cmplx_conj_f16
                Test nb=3    arm_cmplx_dot_prod_f16:test_cmplx_dot_prod_f16
                Test nb=4n   arm_cmplx_dot_prod_f16:test_cmplx_dot_prod_f16
                Test nb=4n+1 arm_cmplx_dot_prod_f16:test_cmplx_dot_prod_f16
                Test nb=3    arm_cmplx_mag_f16:test_cmplx_mag_f16
                Test nb=4n   arm_cmplx_mag_f16:test_cmplx_mag_f16
                Test nb=4n+1 arm_cmplx_mag_f16:test_cmplx_mag_f16
                Test nb=3    arm_cmplx_mag_squared_f16:test_cmplx_mag_squared_f16
                Test nb=4n   arm_cmplx_mag_squared_f16:test_cmplx_mag_squared_f16
                Test nb=4n+1 arm_cmplx_mag_squared_f16:test_cmplx_mag_squared_f16
                Test nb=3    arm_cmplx_mult_cmplx_f16:test_cmplx_mult_cmplx_f16
                Test nb=4n   arm_cmplx_mult_cmplx_f16:test_cmplx_mult_cmplx_f16
                Test nb=4n+1 arm_cmplx_mult_cmplx_f16:test_cmplx_mult_cmplx_f16
                Test nb=3    arm_cmplx_mult_real_f16:test_cmplx_mult_real_f16
                Test nb=4n   arm_cmplx_mult_real_f16:test_cmplx_mult_real_f16
                Test nb=4n+1 arm_cmplx_mult_real_f16:test_cmplx_mult_real_f16
                Test long    arm_cmplx_conj_f16:test_cmplx_conj_f16
                Test long    arm_cmplx_dot_prod_f16:test_cmplx_dot_prod_f16
                Test long    arm_cmplx_mag_f16:test_cmplx_mag_f16
                Test long    arm_cmplx_mag_squared_f16:test_cmplx_mag_squared_f16
                Test long    arm_cmplx_mult_cmplx_f16:test_cmplx_mult_cmplx_f16
                Test long    arm_cmplx_mult_real_f16:test_cmplx_mult_real_f16
              }
           }
        }
        group Transform Tests {
           class = TransformTests
           folder = Transform