CMSIS-DSP: Added complex math f16

6 years ago · ebf9104c4e
parent 8b465544a1
commit ebf9104c4e
29 changed files with 8557 additions and 2 deletions
--- a/Include/arm_helium_utils.h
+++ b/Include/arm_helium_utils.h
@ -80,6 +80,8 @@ __STATIC_FORCEINLINE float16_t vecAddAcrossF16Mve(float16x8_t in)

 /* newton initial guess */
 #define INVSQRT_MAGIC_F32           0x5f3759df
+#define INV_NEWTON_INIT_F32         0x7EF127EA
+

 #define INVSQRT_NEWTON_MVE_F32(invSqrt, xHalf, xStart)\
 {                                                     \
@ -95,6 +97,74 @@ __STATIC_FORCEINLINE float16_t vecAddAcrossF16Mve(float16x8_t in)
 }
 #endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) */

+
+/***************************************
+
+Definitions available for f16 datatype with HW acceleration only
+
+***************************************/
+#if defined (ARM_MATH_MVE_FLOAT16)
+__STATIC_FORCEINLINE float16x8_t __mve_cmplx_sum_intra_vec_f16(
+    float16x8_t   vecIn)
+{
+    float16x8_t   vecTmp, vecOut;
+    uint32_t    tmp;
+
+    vecTmp = (float16x8_t) vrev64q_s32((int32x4_t) vecIn);
+    // TO TRACK : using canonical addition leads to unefficient code generation for f16
+    // vecTmp = vecTmp + vecAccCpx0;
+    /*
+     * Compute
+     *  re0+re1 | im0+im1 | re0+re1 | im0+im1
+     *  re2+re3 | im2+im3 | re2+re3 | im2+im3
+     */
+    vecTmp = vaddq(vecTmp, vecIn);
+    vecOut = vecTmp;
+    /*
+     * shift left, random tmp insertion in bottom
+     */
+    vecOut = vreinterpretq_f16_s32(vshlcq_s32(vreinterpretq_s32_f16(vecOut)   , &tmp, 32));
+    /*
+     * Compute:
+     *    DONTCARE     |    DONTCARE     | re0+re1+re0+re1 |im0+im1+im0+im1
+     * re0+re1+re2+re3 | im0+im1+im2+im3 | re2+re3+re2+re3 |im2+im3+im2+im3
+     */
+    vecOut = vaddq(vecOut, vecTmp);
+    /*
+     * Cmplx sum is in 4rd & 5th f16 elt
+     * return full vector
+     */
+    return vecOut;
+}
+
+
+#define mve_cmplx_sum_intra_r_i_f16(vec, Re, Im)                \
+{                                                               \
+    float16x8_t   vecOut = __mve_cmplx_sum_intra_vec_f16(vec);    \
+    Re = vgetq_lane(vecOut, 4);                                 \
+    Im = vgetq_lane(vecOut, 5);                                 \
+}
+
+
+#define INVSQRT_MAGIC_F16           0x59ba      /*  ( 0x1ba = 0x3759df >> 13) */
+#define INV_NEWTON_INIT_F16         0x7773
+
+/* canonical version of INVSQRT_NEWTON_MVE_F16 leads to bad performance */
+#define INVSQRT_NEWTON_MVE_F16(invSqrt, xHalf, xStart)                  \
+{                                                                       \
+    float16x8_t tmp;                                                      \
+                                                                        \
+    /* tmp = xhalf * x * x */                                           \
+    tmp = vmulq(xStart, xStart);                                        \
+    tmp = vmulq(tmp, xHalf);                                            \
+    /* (1.5f - xhalf * x * x) */                                        \
+    tmp = vsubq(vdupq_n_f16((float16_t)1.5), tmp);                      \
+    /* x = x*(1.5f-xhalf*x*x); */                                       \
+    invSqrt = vmulq(tmp, xStart);                                       \
+}
+
+#endif
+
 /***************************************

 Definitions available for MVEI only
--- a/Include/dsp/basic_math_functions_f16.h
+++ b/Include/dsp/basic_math_functions_f16.h
@ -36,6 +36,7 @@ extern "C"

 #include "dsp/none.h"
 #include "dsp/utils.h"
+#include "dsp/fast_math_functions_f16.h"


 #if defined(ARM_FLOAT16_SUPPORTED)
--- a/Include/dsp/complex_math_functions_f16.h
+++ b/Include/dsp/complex_math_functions_f16.h
@ -26,12 +26,94 @@
 #ifndef _COMPLEX_MATH_FUNCTIONS_F16_H_
 #define _COMPLEX_MATH_FUNCTIONS_F16_H_

+#include "arm_math_types_f16.h"
+#include "arm_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+#include "dsp/fast_math_functions_f16.h"
+
 #ifdef   __cplusplus
 extern "C"
 {
 #endif

 #if defined(ARM_FLOAT16_SUPPORTED)
+
+ /**
+   * @brief  Floating-point complex conjugate.
+   * @param[in]  pSrc        points to the input vector
+   * @param[out] pDst        points to the output vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   */
+  void arm_cmplx_conj_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t numSamples);
+
+ /**
+   * @brief  Floating-point complex magnitude squared
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
+  void arm_cmplx_mag_squared_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t numSamples);
+
+  /**
+   * @brief  Floating-point complex magnitude
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
+  void arm_cmplx_mag_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t numSamples);
+
+  /**
+   * @brief  Floating-point complex dot product
+   * @param[in]  pSrcA       points to the first input vector
+   * @param[in]  pSrcB       points to the second input vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   * @param[out] realResult  real part of the result returned here
+   * @param[out] imagResult  imaginary part of the result returned here
+   */
+  void arm_cmplx_dot_prod_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        uint32_t numSamples,
+        float16_t * realResult,
+        float16_t * imagResult);
+
+   /**
+   * @brief  Floating-point complex-by-real multiplication
+   * @param[in]  pSrcCmplx   points to the complex input vector
+   * @param[in]  pSrcReal    points to the real input vector
+   * @param[out] pCmplxDst   points to the complex output vector
+   * @param[in]  numSamples  number of samples in each vector
+   */
+  void arm_cmplx_mult_real_f16(
+  const float16_t * pSrcCmplx,
+  const float16_t * pSrcReal,
+        float16_t * pCmplxDst,
+        uint32_t numSamples);
+
+  /**
+   * @brief  Floating-point complex-by-complex multiplication
+   * @param[in]  pSrcA       points to the first input vector
+   * @param[in]  pSrcB       points to the second input vector
+   * @param[out] pDst        points to the output vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   */
+  void arm_cmplx_mult_cmplx_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        float16_t * pDst,
+        uint32_t numSamples);
+
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
 #ifdef   __cplusplus
 }
--- a/Include/dsp/fast_math_functions_f16.h
+++ b/Include/dsp/fast_math_functions_f16.h
@ -26,12 +26,46 @@
 #ifndef _FAST_MATH_FUNCTIONS_F16_H_
 #define _FAST_MATH_FUNCTIONS_F16_H_

+#include "arm_math_types_f16.h"
+#include "arm_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+#include "dsp/fast_math_functions.h"
+
 #ifdef   __cplusplus
 extern "C"
 {
 #endif

 #if defined(ARM_FLOAT16_SUPPORTED)
+
+ /**
+   * @addtogroup SQRT
+   * @{
+   */
+
+/**
+  @brief         Floating-point square root function.
+  @param[in]     in    input value
+  @param[out]    pOut  square root of input value
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : input value is positive
+                   - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
+ */
+__STATIC_FORCEINLINE arm_status arm_sqrt_f16(
+  float16_t in,
+  float16_t * pOut)
+  {
+    float32_t r;
+    arm_status status;
+    status=arm_sqrt_f32((float32_t)in,&r);
+    *pOut=(float16_t)r;
+    return(status);
+  }
+
+
+
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
 #ifdef   __cplusplus
 }
--- a/Source/ComplexMathFunctions/CMakeLists.txt
+++ b/Source/ComplexMathFunctions/CMakeLists.txt
@ -5,8 +5,6 @@ project(CMSISDSPComplexMath)
 include(configLib)
 include(configDsp)

-file(GLOB SRC "./*_*.c")
-
 add_library(CMSISDSPComplexMath STATIC)

 configLib(CMSISDSPComplexMath ${ROOT})
@ -56,6 +54,14 @@ target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_f32.c)
 target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_q15.c)
 target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_q31.c)

+if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
+target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_conj_f16.c)
+target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_dot_prod_f16.c)
+target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mag_f16.c)
+target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mag_squared_f16.c)
+target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_cmplx_f16.c)
+target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_f16.c)
+endif()

 ### Includes
 target_include_directories(CMSISDSPComplexMath PUBLIC "${DSP}/Include")
--- a/Source/ComplexMathFunctions/ComplexMathFunctionsF16.c
+++ b/Source/ComplexMathFunctions/ComplexMathFunctionsF16.c
@ -0,0 +1,32 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        CompexMathFunctionsF16.c
+ * Description:  Combination of all complex math function f16 source files.
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_cmplx_conj_f16.c"
+#include "arm_cmplx_dot_prod_f16.c"
+#include "arm_cmplx_mag_f16.c"
+#include "arm_cmplx_mag_squared_f16.c"
+#include "arm_cmplx_mult_cmplx_f16.c"
+#include "arm_cmplx_mult_real_f16.c"
--- a/Source/ComplexMathFunctions/arm_cmplx_conj_f16.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_conj_f16.c
@ -0,0 +1,183 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cmplx_conj_f16.c
+ * Description:  Floating-point complex conjugate
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/complex_math_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+/**
+  @ingroup groupCmplxMath
+ */
+
+/**
+  @defgroup cmplx_conj Complex Conjugate
+
+  Conjugates the elements of a complex data vector.
+
+  The <code>pSrc</code> points to the source data and
+  <code>pDst</code> points to the destination data where the result should be written.
+  <code>numSamples</code> specifies the number of complex samples
+  and the data in each array is stored in an interleaved fashion
+  (real, imag, real, imag, ...).
+  Each array has a total of <code>2*numSamples</code> values.
+
+  The underlying algorithm is used:
+  <pre>
+  for (n = 0; n < numSamples; n++) {
+      pDst[(2*n)  ] =  pSrc[(2*n)  ];    // real part
+      pDst[(2*n)+1] = -pSrc[(2*n)+1];    // imag part
+  }
+  </pre>
+
+  There are separate functions for floating-point, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup cmplx_conj
+  @{
+ */
+
+/**
+  @brief         Floating-point complex conjugate.
+  @param[in]     pSrc        points to the input vector
+  @param[out]    pDst        points to the output vector
+  @param[in]     numSamples  number of samples in each vector
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+void arm_cmplx_conj_f16(
+    const float16_t * pSrc,
+    float16_t * pDst,
+    uint32_t numSamples)
+{
+    static const float16_t cmplx_conj_sign[8] = { 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f };
+    uint32_t blockSize = numSamples * CMPLX_DIM;   /* loop counters */
+    uint32_t blkCnt;
+    f16x8_t vecSrc;
+    f16x8_t vecSign;
+
+    /*
+     * load sign vector
+     */
+    vecSign = *(f16x8_t *) cmplx_conj_sign;
+
+    /* Compute 4 real samples at a time */
+    blkCnt = blockSize >> 3U;
+
+    while (blkCnt > 0U)
+    {
+        vecSrc = vld1q(pSrc);
+        vst1q(pDst,vmulq(vecSrc, vecSign));
+        /*
+         * Decrement the blkCnt loop counter
+         * Advance vector source and destination pointers
+         */
+        pSrc += 8;
+        pDst += 8;
+        blkCnt--;
+    }
+
+     /* Tail */
+    blkCnt = (blockSize & 0x7) >> 1;
+
+    while (blkCnt > 0U)
+    {
+      /* C[0] + jC[1] = A[0]+ j(-1)A[1] */
+  
+      /* Calculate Complex Conjugate and store result in destination buffer. */
+      *pDst++ =  *pSrc++;
+      *pDst++ = -*pSrc++;
+  
+      /* Decrement loop counter */
+      blkCnt--;
+    }
+
+}
+
+#else
+void arm_cmplx_conj_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t numSamples)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = numSamples >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C[0] + jC[1] = A[0]+ j(-1)A[1] */
+
+    /* Calculate Complex Conjugate and store result in destination buffer. */
+    *pDst++ =  *pSrc++;
+    *pDst++ = -*pSrc++;
+
+    *pDst++ =  *pSrc++;
+    *pDst++ = -*pSrc++;
+
+    *pDst++ =  *pSrc++;
+    *pDst++ = -*pSrc++;
+
+    *pDst++ =  *pSrc++;
+    *pDst++ = -*pSrc++;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = numSamples % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = numSamples;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C[0] + jC[1] = A[0]+ j(-1)A[1] */
+
+    /* Calculate Complex Conjugate and store result in destination buffer. */
+    *pDst++ =  *pSrc++;
+    *pDst++ = -*pSrc++;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @} end of cmplx_conj group
+ */
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
--- a/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f16.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f16.c
@ -0,0 +1,236 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cmplx_dot_prod_f16.c
+ * Description:  Floating-point complex dot product
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/complex_math_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+
+/**
+  @ingroup groupCmplxMath
+ */
+
+/**
+  @defgroup cmplx_dot_prod Complex Dot Product
+
+  Computes the dot product of two complex vectors.
+  The vectors are multiplied element-by-element and then summed.
+
+  The <code>pSrcA</code> points to the first complex input vector and
+  <code>pSrcB</code> points to the second complex input vector.
+  <code>numSamples</code> specifies the number of complex samples
+  and the data in each array is stored in an interleaved fashion
+  (real, imag, real, imag, ...).
+  Each array has a total of <code>2*numSamples</code> values.
+
+  The underlying algorithm is used:
+
+  <pre>
+  realResult = 0;
+  imagResult = 0;
+  for (n = 0; n < numSamples; n++) {
+      realResult += pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
+      imagResult += pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
+  }
+  </pre>
+
+  There are separate functions for floating-point, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup cmplx_dot_prod
+  @{
+ */
+
+/**
+  @brief         Floating-point complex dot product.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     numSamples  number of samples in each vector
+  @param[out]    realResult  real part of the result returned here
+  @param[out]    imagResult  imaginary part of the result returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+
+void arm_cmplx_dot_prod_f16(
+    const float16_t * pSrcA,
+    const float16_t * pSrcB,
+    uint32_t numSamples,
+    float16_t * realResult,
+    float16_t * imagResult)
+{
+    uint32_t blockSize = numSamples * CMPLX_DIM;  /* loop counters */
+    uint32_t blkCnt;
+    float16_t real_sum, imag_sum;
+    f16x8_t vecSrcA, vecSrcB;
+    f16x8_t vec_acc = vdupq_n_f16(0.0f);
+
+    /* Compute 2 complex samples at a time */
+    blkCnt = blockSize >> 3U;
+
+    while (blkCnt > 0U)
+    {
+        vecSrcA = vld1q(pSrcA);
+        vecSrcB = vld1q(pSrcB);
+
+        vec_acc = vcmlaq(vec_acc, vecSrcA, vecSrcB);
+        vec_acc = vcmlaq_rot90(vec_acc, vecSrcA, vecSrcB);
+
+        /*
+         * Decrement the blkCnt loop counter
+         * Advance vector source and destination pointers
+         */
+        pSrcA += 8;
+        pSrcB += 8;
+        blkCnt--;
+    }
+   
+    /* Tail */
+    blkCnt = (blockSize & 7);
+
+    if (blkCnt > 0U)
+    {
+        mve_pred16_t p0 = vctp16q(blkCnt);
+        vecSrcA = vld1q(pSrcA);
+        vecSrcB = vld1q(pSrcB);
+        vec_acc = vcmlaq_m(vec_acc, vecSrcA, vecSrcB, p0);
+        vec_acc = vcmlaq_rot90_m(vec_acc, vecSrcA, vecSrcB, p0);
+    }
+
+    /* Sum the partial parts */
+    mve_cmplx_sum_intra_r_i_f16(vec_acc, real_sum, imag_sum);
+
+    /*
+     * Store the real and imaginary results in the destination buffers
+     */
+    *realResult = real_sum;
+    *imagResult = imag_sum;
+}
+
+#else
+void arm_cmplx_dot_prod_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        uint32_t numSamples,
+        float16_t * realResult,
+        float16_t * imagResult)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        float16_t real_sum = 0.0f, imag_sum = 0.0f;    /* Temporary result variables */
+        float16_t a0,b0,c0,d0;
+
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = numSamples >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    a0 = *pSrcA++;
+    b0 = *pSrcA++;
+    c0 = *pSrcB++;
+    d0 = *pSrcB++;
+
+    real_sum += a0 * c0;
+    imag_sum += a0 * d0;
+    real_sum -= b0 * d0;
+    imag_sum += b0 * c0;
+
+    a0 = *pSrcA++;
+    b0 = *pSrcA++;
+    c0 = *pSrcB++;
+    d0 = *pSrcB++;
+
+    real_sum += a0 * c0;
+    imag_sum += a0 * d0;
+    real_sum -= b0 * d0;
+    imag_sum += b0 * c0;
+
+    a0 = *pSrcA++;
+    b0 = *pSrcA++;
+    c0 = *pSrcB++;
+    d0 = *pSrcB++;
+
+    real_sum += a0 * c0;
+    imag_sum += a0 * d0;
+    real_sum -= b0 * d0;
+    imag_sum += b0 * c0;
+
+    a0 = *pSrcA++;
+    b0 = *pSrcA++;
+    c0 = *pSrcB++;
+    d0 = *pSrcB++;
+
+    real_sum += a0 * c0;
+    imag_sum += a0 * d0;
+    real_sum -= b0 * d0;
+    imag_sum += b0 * c0;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = numSamples % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = numSamples;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    a0 = *pSrcA++;
+    b0 = *pSrcA++;
+    c0 = *pSrcB++;
+    d0 = *pSrcB++;
+
+    real_sum += a0 * c0;
+    imag_sum += a0 * d0;
+    real_sum -= b0 * d0;
+    imag_sum += b0 * c0;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store real and imaginary result in destination buffer. */
+  *realResult = real_sum;
+  *imagResult = imag_sum;
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @} end of cmplx_dot_prod group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
--- a/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mag_f16.c
@ -0,0 +1,239 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cmplx_mag_f16.c
+ * Description:  Floating-point complex magnitude
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/complex_math_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+/**
+  @ingroup groupCmplxMath
+ */
+
+/**
+  @defgroup cmplx_mag Complex Magnitude
+
+  Computes the magnitude of the elements of a complex data vector.
+
+  The <code>pSrc</code> points to the source data and
+  <code>pDst</code> points to the where the result should be written.
+  <code>numSamples</code> specifies the number of complex samples
+  in the input array and the data is stored in an interleaved fashion
+  (real, imag, real, imag, ...).
+  The input array has a total of <code>2*numSamples</code> values;
+  the output array has a total of <code>numSamples</code> values.
+
+  The underlying algorithm is used:
+
+  <pre>
+  for (n = 0; n < numSamples; n++) {
+      pDst[n] = sqrt(pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2);
+  }
+  </pre>
+
+  There are separate functions for floating-point, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup cmplx_mag
+  @{
+ */
+
+/**
+  @brief         Floating-point complex magnitude.
+  @param[in]     pSrc        points to input vector
+  @param[out]    pDst        points to output vector
+  @param[in]     numSamples  number of samples in each vector
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+
+
+void arm_cmplx_mag_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t numSamples)
+{
+    int32_t blockSize = numSamples;  /* loop counters */
+    uint32_t  blkCnt;           /* loop counters */
+    f16x8x2_t vecSrc;
+    f16x8_t sum;
+
+    /* Compute 4 complex samples at a time */
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U)
+    {
+        q15x8_t newtonStartVec;
+        f16x8_t sumHalf, invSqrt;
+
+        vecSrc = vld2q(pSrc);  
+        pSrc += 16;
+        sum = vmulq(vecSrc.val[0], vecSrc.val[0]);
+        sum = vfmaq(sum, vecSrc.val[1], vecSrc.val[1]);
+
+        /*
+         * inlined Fast SQRT using inverse SQRT newton-raphson method
+         */
+
+        /* compute initial value */
+        newtonStartVec = vdupq_n_s16(INVSQRT_MAGIC_F16) - vshrq((q15x8_t) sum, 1);
+        sumHalf = sum * 0.5f;
+        /*
+         * compute 3 x iterations
+         *
+         * The more iterations, the more accuracy.
+         * If you need to trade a bit of accuracy for more performance,
+         * you can comment out the 3rd use of the macro.
+         */
+        INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, (f16x8_t) newtonStartVec);
+        INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, invSqrt);
+        INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, invSqrt);
+        /*
+         * set negative values to 0
+         */
+        invSqrt = vdupq_m(invSqrt, (float16_t)0.0f, vcmpltq(invSqrt, (float16_t)0.0f));
+        /*
+         * sqrt(x) = x * invSqrt(x)
+         */
+        sum = vmulq(sum, invSqrt);
+        vstrhq_f16(pDst, sum); 
+        pDst += 8;
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U)
+    {
+        mve_pred16_t p0 = vctp16q(blkCnt);
+        q15x8_t newtonStartVec;
+        f16x8_t sumHalf, invSqrt;
+
+        vecSrc = vld2q((float16_t const *)pSrc);
+        sum = vmulq(vecSrc.val[0], vecSrc.val[0]);
+        sum = vfmaq(sum, vecSrc.val[1], vecSrc.val[1]);
+
+        /*
+         * inlined Fast SQRT using inverse SQRT newton-raphson method
+         */
+
+        /* compute initial value */
+        newtonStartVec = vdupq_n_s16(INVSQRT_MAGIC_F16) - vshrq((q15x8_t) sum, 1);
+        sumHalf = vmulq(sum, (float16_t)0.5);
+        /*
+         * compute 2 x iterations
+         */
+        INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, (f16x8_t) newtonStartVec);
+        INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, invSqrt);
+        /*
+         * set negative values to 0
+         */
+        invSqrt = vdupq_m(invSqrt, (float16_t)0.0, vcmpltq(invSqrt, (float16_t)0.0));
+        /*
+         * sqrt(x) = x * invSqrt(x)
+         */
+        sum = vmulq(sum, invSqrt);
+        vstrhq_p_f16(pDst, sum, p0);
+    }
+}
+
+#else
+void arm_cmplx_mag_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t numSamples)
+{
+  uint32_t blkCnt;                               /* loop counter */
+  float16_t real, imag;                      /* Temporary variables to hold input values */
+
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = numSamples >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
+
+    real = *pSrc++;
+    imag = *pSrc++;
+
+    /* store result in destination buffer. */
+    arm_sqrt_f16((real * real) + (imag * imag), pDst++);
+
+    real = *pSrc++;
+    imag = *pSrc++;
+    arm_sqrt_f16((real * real) + (imag * imag), pDst++);
+
+    real = *pSrc++;
+    imag = *pSrc++;
+    arm_sqrt_f16((real * real) + (imag * imag), pDst++);
+
+    real = *pSrc++;
+    imag = *pSrc++;
+    arm_sqrt_f16((real * real) + (imag * imag), pDst++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = numSamples % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = numSamples;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
+
+    real = *pSrc++;
+    imag = *pSrc++;
+
+    /* store result in destination buffer. */
+    arm_sqrt_f16((real * real) + (imag * imag), pDst++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @} end of cmplx_mag group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
--- a/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f16.c
@ -0,0 +1,172 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cmplx_mag_squared_f16.c
+ * Description:  Floating-point complex magnitude squared
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/complex_math_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+/**
+  @ingroup groupCmplxMath
+ */
+
+/**
+  @defgroup cmplx_mag_squared Complex Magnitude Squared
+
+  Computes the magnitude squared of the elements of a complex data vector.
+
+  The <code>pSrc</code> points to the source data and
+  <code>pDst</code> points to the where the result should be written.
+  <code>numSamples</code> specifies the number of complex samples
+  in the input array and the data is stored in an interleaved fashion
+  (real, imag, real, imag, ...).
+  The input array has a total of <code>2*numSamples</code> values;
+  the output array has a total of <code>numSamples</code> values.
+
+  The underlying algorithm is used:
+
+  <pre>
+  for (n = 0; n < numSamples; n++) {
+      pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
+  }
+  </pre>
+
+  There are separate functions for floating-point, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup cmplx_mag_squared
+  @{
+ */
+
+/**
+  @brief         Floating-point complex magnitude squared.
+  @param[in]     pSrc        points to input vector
+  @param[out]    pDst        points to output vector
+  @param[in]     numSamples  number of samples in each vector
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+void arm_cmplx_mag_squared_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t numSamples)
+{
+    int32_t blockSize = numSamples;  /* loop counters */
+    f16x8x2_t vecSrc;
+    f16x8_t sum;
+
+    /* Compute 4 complex samples at a time */
+    while (blockSize > 0)
+    {
+        mve_pred16_t p = vctp16q(blockSize);
+        vecSrc = vld2q(pSrc);
+        sum = vmulq_m(vuninitializedq_f16(),vecSrc.val[0], vecSrc.val[0],p);
+        sum = vfmaq_m(sum, vecSrc.val[1], vecSrc.val[1],p);
+        vstrhq_p_f16(pDst, sum,p);
+
+        pSrc += 16;
+        pDst += 8;
+        
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blockSize-= 8;
+    }
+
+}
+
+#else
+void arm_cmplx_mag_squared_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t numSamples)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        float16_t real, imag;                          /* Temporary input variables */
+
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = numSamples >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
+
+    real = *pSrc++;
+    imag = *pSrc++;
+    *pDst++ = (real * real) + (imag * imag);
+
+    real = *pSrc++;
+    imag = *pSrc++;
+    *pDst++ = (real * real) + (imag * imag);
+
+    real = *pSrc++;
+    imag = *pSrc++;
+    *pDst++ = (real * real) + (imag * imag);
+
+    real = *pSrc++;
+    imag = *pSrc++;
+    *pDst++ = (real * real) + (imag * imag);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = numSamples % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = numSamples;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
+
+    real = *pSrc++;
+    imag = *pSrc++;
+
+    /* store result in destination buffer. */
+    *pDst++ = (real * real) + (imag * imag);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @} end of cmplx_mag_squared group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
--- a/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f16.c
@ -0,0 +1,217 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cmplx_mult_cmplx_f16.c
+ * Description:  Floating-point complex-by-complex multiplication
+ *
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/complex_math_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+/**
+  @ingroup groupCmplxMath
+ */
+
+/**
+  @defgroup CmplxByCmplxMult Complex-by-Complex Multiplication
+
+  Multiplies a complex vector by another complex vector and generates a complex result.
+  The data in the complex arrays is stored in an interleaved fashion
+  (real, imag, real, imag, ...).
+  The parameter <code>numSamples</code> represents the number of complex
+  samples processed.  The complex arrays have a total of <code>2*numSamples</code>
+  real values.
+
+  The underlying algorithm is used:
+
+  <pre>
+  for (n = 0; n < numSamples; n++) {
+      pDst[(2*n)+0] = pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
+      pDst[(2*n)+1] = pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
+  }
+  </pre>
+
+  There are separate functions for floating-point, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup CmplxByCmplxMult
+  @{
+ */
+
+/**
+  @brief         Floating-point complex-by-complex multiplication.
+  @param[in]     pSrcA       points to first input vector
+  @param[in]     pSrcB       points to second input vector
+  @param[out]    pDst        points to output vector
+  @param[in]     numSamples  number of samples in each vector
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+void arm_cmplx_mult_cmplx_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        float16_t * pDst,
+        uint32_t numSamples)
+{
+    int32_t  blkCnt;           /* loop counters */
+    int32_t  blockSize = numSamples;
+    f16x8_t vecA;
+    f16x8_t vecB;
+    f16x8_t vecDst;
+
+    blkCnt = blockSize * CMPLX_DIM;
+    blkCnt = blkCnt >> 3;
+
+    while (blkCnt > 0) 
+    {
+        vecA = vldrhq_f16(pSrcA);
+        vecB = vldrhq_f16(pSrcB);
+        /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
+        vecDst = vcmulq(vecA, vecB);
+        /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
+        vecDst = vcmlaq_rot90(vecDst, vecA, vecB);
+        vstrhq_f16(pDst, vecDst);
+
+        blkCnt--;
+        pSrcA += 8;
+        pSrcB += 8;
+        pDst += 8;
+    }
+
+    float16_t a, b, c, d;  /* Temporary variables to store real and imaginary values */
+        /* Tail */
+    blkCnt = (blockSize & 7) >> 1;
+    while (blkCnt > 0)
+    {
+      /* C[2 * i    ] = A[2 * i] * B[2 * i    ] - A[2 * i + 1] * B[2 * i + 1]. */
+      /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i    ]. */
+
+      a = *pSrcA++;
+      b = *pSrcA++;
+      c = *pSrcB++;
+      d = *pSrcB++;
+
+      /* store result in destination buffer. */
+      *pDst++ = (a * c) - (b * d);
+      *pDst++ = (a * d) + (b * c);
+
+      /* Decrement loop counter */
+      blkCnt--;
+    }
+}
+
+
+#else
+void arm_cmplx_mult_cmplx_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        float16_t * pDst,
+        uint32_t numSamples)
+{
+    uint32_t blkCnt;                               /* Loop counter */
+    float16_t a, b, c, d;  /* Temporary variables to store real and imaginary values */
+
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = numSamples >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C[2 * i    ] = A[2 * i] * B[2 * i    ] - A[2 * i + 1] * B[2 * i + 1]. */
+    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i    ]. */
+
+    a = *pSrcA++;
+    b = *pSrcA++;
+    c = *pSrcB++;
+    d = *pSrcB++;
+    /* store result in destination buffer. */
+    *pDst++ = (a * c) - (b * d);
+    *pDst++ = (a * d) + (b * c);
+
+    a = *pSrcA++;
+    b = *pSrcA++;
+    c = *pSrcB++;
+    d = *pSrcB++;
+    *pDst++ = (a * c) - (b * d);
+    *pDst++ = (a * d) + (b * c);
+
+    a = *pSrcA++;
+    b = *pSrcA++;
+    c = *pSrcB++;
+    d = *pSrcB++;
+    *pDst++ = (a * c) - (b * d);
+    *pDst++ = (a * d) + (b * c);
+
+    a = *pSrcA++;
+    b = *pSrcA++;
+    c = *pSrcB++;
+    d = *pSrcB++;
+    *pDst++ = (a * c) - (b * d);
+    *pDst++ = (a * d) + (b * c);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = numSamples % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = numSamples;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C[2 * i    ] = A[2 * i] * B[2 * i    ] - A[2 * i + 1] * B[2 * i + 1]. */
+    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i    ]. */
+
+    a = *pSrcA++;
+    b = *pSrcA++;
+    c = *pSrcB++;
+    d = *pSrcB++;
+
+    /* store result in destination buffer. */
+    *pDst++ = (a * c) - (b * d);
+    *pDst++ = (a * d) + (b * c);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @} end of CmplxByCmplxMult group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
--- a/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mult_real_f16.c
@ -0,0 +1,192 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cmplx_mult_real_f16.c
+ * Description:  Floating-point complex by real multiplication
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/complex_math_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+/**
+  @ingroup groupCmplxMath
+ */
+
+/**
+  @defgroup CmplxByRealMult Complex-by-Real Multiplication
+
+  Multiplies a complex vector by a real vector and generates a complex result.
+  The data in the complex arrays is stored in an interleaved fashion
+  (real, imag, real, imag, ...).
+  The parameter <code>numSamples</code> represents the number of complex
+  samples processed.  The complex arrays have a total of <code>2*numSamples</code>
+  real values while the real array has a total of <code>numSamples</code>
+  real values.
+
+  The underlying algorithm is used:
+
+  <pre>
+  for (n = 0; n < numSamples; n++) {
+      pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n];
+      pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n];
+  }
+  </pre>
+
+  There are separate functions for floating-point, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup CmplxByRealMult
+  @{
+ */
+
+/**
+  @brief         Floating-point complex-by-real multiplication.
+  @param[in]     pSrcCmplx   points to complex input vector
+  @param[in]     pSrcReal    points to real input vector
+  @param[out]    pCmplxDst   points to complex output vector
+  @param[in]     numSamples  number of samples in each vector
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+void arm_cmplx_mult_real_f16(
+  const float16_t * pSrcCmplx,
+  const float16_t * pSrcReal,
+        float16_t * pCmplxDst,
+        uint32_t numSamples)
+{
+    const static uint16_t stride_cmplx_x_real_16[8] = {
+        0, 0, 1, 1, 2, 2, 3, 3
+        };
+    uint32_t blockSizeC = numSamples * CMPLX_DIM;   /* loop counters */
+    uint32_t blkCnt;
+    f16x8_t rVec;
+    f16x8_t cmplxVec;
+    f16x8_t dstVec;
+    uint16x8_t strideVec;
+
+
+    /* stride vector for pairs of real generation */
+    strideVec = vld1q(stride_cmplx_x_real_16);
+
+    /* Compute 4 complex outputs at a time */
+    blkCnt = blockSizeC >> 3;
+    while (blkCnt > 0U) 
+    {
+        cmplxVec = vld1q(pSrcCmplx);
+        rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec);
+        dstVec = vmulq(cmplxVec, rVec);
+        vst1q(pCmplxDst, dstVec);
+
+        pSrcReal += 4;
+        pSrcCmplx += 8;
+        pCmplxDst += 8;
+        blkCnt--;
+    }
+
+    blkCnt = blockSizeC & 7;
+    if (blkCnt > 0U) {
+        mve_pred16_t p0 = vctp16q(blkCnt);
+
+        cmplxVec = vld1q(pSrcCmplx);
+        rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec);
+        dstVec = vmulq(cmplxVec, rVec);
+        vstrhq_p_f16(pCmplxDst, dstVec, p0);
+    }
+}
+
+#else
+void arm_cmplx_mult_real_f16(
+  const float16_t * pSrcCmplx,
+  const float16_t * pSrcReal,
+        float16_t * pCmplxDst,
+        uint32_t numSamples)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        float16_t in;                                  /* Temporary variable */
+
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = numSamples >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C[2 * i    ] = A[2 * i    ] * B[i]. */
+    /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
+
+    in = *pSrcReal++;
+    /* store result in destination buffer. */
+    *pCmplxDst++ = *pSrcCmplx++ * in;
+    *pCmplxDst++ = *pSrcCmplx++ * in;
+
+    in = *pSrcReal++;
+    *pCmplxDst++ = *pSrcCmplx++ * in;
+    *pCmplxDst++ = *pSrcCmplx++ * in;
+
+    in = *pSrcReal++;
+    *pCmplxDst++ = *pSrcCmplx++ * in;
+    *pCmplxDst++ = *pSrcCmplx++ * in;
+
+    in = *pSrcReal++;
+    *pCmplxDst++ = *pSrcCmplx++* in;
+    *pCmplxDst++ = *pSrcCmplx++ * in;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = numSamples % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = numSamples;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C[2 * i    ] = A[2 * i    ] * B[i]. */
+    /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
+
+    in = *pSrcReal++;
+    /* store result in destination buffer. */
+    *pCmplxDst++ = *pSrcCmplx++ * in;
+    *pCmplxDst++ = *pSrcCmplx++ * in;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @} end of CmplxByRealMult group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
--- a/Testing/CMakeLists.txt
+++ b/Testing/CMakeLists.txt
@ -327,6 +327,7 @@ set(TESTSRC
 if ((NOT ARMAC5) AND (FLOAT16TESTS) AND ((FLOAT16) OR (MVEF) OR (HELIUM) OR (NEON) OR (NEONEXPERIMENTAL)))
 set(TESTSRC16 
  Source/Tests/BasicTestsF16.cpp
+  Source/Tests/ComplexTestsF16.cpp
  Source/Tests/TransformCF16.cpp
  Source/Tests/TransformRF16.cpp
  )
--- a/Testing/Include/Tests/ComplexTestsF16.h
+++ b/Testing/Include/Tests/ComplexTestsF16.h
@ -0,0 +1,21 @@
+#include "Test.h"
+#include "Pattern.h"
+
+#include "dsp/complex_math_functions_f16.h"
+
+
+class ComplexTestsF16:public Client::Suite
+    {
+        public:
+            ComplexTestsF16(Testing::testID_t id);
+            virtual void setUp(Testing::testID_t,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr);
+            virtual void tearDown(Testing::testID_t,Client::PatternMgr *mgr);
+        private:
+            #include "ComplexTestsF16_decl.h"
+            
+            Client::Pattern<float16_t> input1;
+            Client::Pattern<float16_t> input2;
+            Client::LocalPattern<float16_t> output;
+            // Reference patterns are not loaded when we are in dump mode
+            Client::RefPattern<float16_t> ref;
+    };
--- a/Testing/PatternGeneration/ComplexMaths.py
+++ b/Testing/PatternGeneration/ComplexMaths.py
@ -105,11 +105,13 @@ def  generatePatterns():
     PARAMDIR = os.path.join("Parameters","DSP","ComplexMaths","ComplexMaths")
     
     configf32=Tools.Config(PATTERNDIR,PARAMDIR,"f32")
+     configf16=Tools.Config(PATTERNDIR,PARAMDIR,"f16")
     configq31=Tools.Config(PATTERNDIR,PARAMDIR,"q31")
     configq15=Tools.Config(PATTERNDIR,PARAMDIR,"q15")
     
     
     writeTests(configf32,0)
+     writeTests(configf16,16)
     writeTests(configq31,31)
     writeTests(configq15,15)

--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Input1_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Input1_f16.txt
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Input2_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Input2_f16.txt
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Input3_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Input3_f16.txt
@ -0,0 +1,514 @@
+H
+256
+// 0.606399
+0x38da
+// 0.073125
+0x2cae
+// -0.140834
+0xb082
+// -0.900755
+0xbb35
+// 0.148560
+0x30c1
+// -0.261878
+0xb431
+// -0.545170
+0xb85d
+// -0.628326
+0xb907
+// 0.495511
+0x37ee
+// 0.063174
+0x2c0b
+// -0.169679
+0xb16e
+// 0.145953
+0x30ac
+// 0.496673
+0x37f2
+// 1.000000
+0x3c00
+// 0.131807
+0x3038
+// 0.036478
+0x28ab
+// 0.427452
+0x36d7
+// -0.526852
+0xb837
+// -0.153413
+0xb0e9
+// -0.180253
+0xb1c5
+// 0.192358
+0x3228
+// 0.534224
+0x3846
+// -0.143569
+0xb098
+// -0.378102
+0xb60d
+// -0.387182
+0xb632
+// -0.181926
+0xb1d2
+// 0.627082
+0x3904
+// -0.782546
+0xba43
+// 0.227872
+0x334b
+// -0.920057
+0xbb5c
+// 0.028790
+0x275f
+// -0.002111
+0x9853
+// -0.448033
+0xb72b
+// 0.259638
+0x3427
+// -0.284893
+0xb48f
+// -0.172468
+0xb185
+// 0.304282
+0x34de
+// 0.130491
+0x302d
+// -0.206776
+0xb29e
+// 0.384068
+0x3625
+// 0.208473
+0x32ac
+// -0.097562
+0xae3e
+// 0.121821
+0x2fcc
+// -0.611990
+0xb8e5
+// -0.062335
+0xabfb
+// 0.614710
+0x38eb
+// 0.438926
+0x3706
+// 0.195912
+0x3245
+// 0.081082
+0x2d30
+// 0.012720
+0x2283
+// 0.258657
+0x3423
+// -0.268801
+0xb44d
+// -0.183050
+0xb1dc
+// 0.087615
+0x2d9b
+// -0.427434
+0xb6d7
+// 0.301905
+0x34d5
+// -0.236141
+0xb38e
+// 0.188230
+0x3206
+// -0.089373
+0xadb8
+// -0.294443
+0xb4b6
+// 0.506253
+0x380d
+// 0.841140
+0x3abb
+// 0.104046
+0x2ea9
+// 0.285712
+0x3492
+// -0.725796
+0xb9ce
+// 0.649891
+0x3933
+// 0.663543
+0x394f
+// -0.062934
+0xac07
+// -0.387710
+0xb634
+// 0.531890
+0x3841
+// 0.277675
+0x3471
+// 0.319026
+0x351b
+// 0.072518
+0x2ca4
+// 0.637432
+0x3919
+// -0.316837
+0xb512
+// 0.031885
+0x2815
+// -0.109017
+0xaefa
+// -0.531561
+0xb841
+// -0.116513
+0xaf75
+// 0.005209
+0x1d56
+// -0.178215
+0xb1b4
+// 0.096452
+0x2e2c
+// -0.089155
+0xadb5
+// -0.193966
+0xb235
+// -0.318435
+0xb518
+// -0.137340
+0xb065
+// 0.325371
+0x3535
+// -0.413025
+0xb69c
+// -0.007530
+0x9fb6
+// -0.011499
+0xa1e3
+// -0.359144
+0xb5bf
+// -0.317911
+0xb516
+// 0.018568
+0x24c1
+// -0.366742
+0xb5de
+// 0.221775
+0x3319
+// 0.420017
+0x36b8
+// -0.040939
+0xa93d
+// -0.362740
+0xb5ce
+// 0.471219
+0x378a
+// -0.099621
+0xae60
+// 0.326052
+0x3538
+// 0.311047
+0x34fa
+// 0.156421
+0x3101
+// -0.068640
+0xac65
+// -0.095922
+0xae24
+// -0.013959
+0xa326
+// 0.607825
+0x38dd
+// 0.093427
+0x2dfb
+// 0.329927
+0x3547
+// 0.084851
+0x2d6e
+// 0.726006
+0x39cf
+// 0.248801
+0x33f6
+// 0.620044
+0x38f6
+// 0.164699
+0x3145
+// 0.045769
+0x29dc
+// 0.264445
+0x343b
+// -0.238087
+0xb39e
+// 0.220883
+0x3311
+// 0.018551
+0x24c0
+// -0.057134
+0xab50
+// -0.155357
+0xb0f9
+// 0.038525
+0x28ee
+// 0.208492
+0x32ac
+// -0.098819
+0xae53
+// 0.027140
+0x26f3
+// -0.333135
+0xb555
+// -0.423594
+0xb6c7
+// 0.521231
+0x382b
+// 0.406043
+0x367f
+// -0.457325
+0xb751
+// 0.089960
+0x2dc2
+// -0.107212
+0xaedd
+// 0.089652
+0x2dbd
+// -0.269460
+0xb450
+// 0.155036
+0x30f6
+// 0.024048
+0x2628
+// 0.220735
+0x3310
+// 0.032031
+0x281a
+// -0.567049
+0xb889
+// 0.145897
+0x30ab
+// -0.094783
+0xae11
+// 0.319032
+0x351b
+// -0.091891
+0xade2
+// 0.416962
+0x36ac
+// 0.093970
+0x2e04
+// 0.564895
+0x3885
+// -0.296964
+0xb4c0
+// -0.209322
+0xb2b3
+// 0.265009
+0x343d
+// 0.093215
+0x2df7
+// 0.622832
+0x38fc
+// -0.085788
+0xad7e
+// 0.670554
+0x395d
+// 0.032468
+0x2828
+// 0.118023
+0x2f8e
+// -0.269207
+0xb44f
+// 0.217617
+0x32f7
+// 0.213691
+0x32d7
+// 0.439040
+0x3706
+// 0.241885
+0x33be
+// -0.424515
+0xb6cb
+// 0.352380
+0x35a3
+// 0.588583
+0x38b5
+// -0.264797
+0xb43d
+// 0.329184
+0x3544
+// 0.034001
+0x285a
+// -0.423064
+0xb6c5
+// -0.608316
+0xb8de
+// -0.338928
+0xb56c
+// 0.419995
+0x36b8
+// 0.200555
+0x326b
+// 0.329638
+0x3546
+// -0.294240
+0xb4b5
+// -0.897858
+0xbb2f
+// 0.160219
+0x3121
+// 0.131756
+0x3037
+// 0.206411
+0x329b
+// 0.109237
+0x2efe
+// -0.367268
+0xb5e0
+// 0.292430
+0x34ae
+// -0.414400
+0xb6a1
+// -0.642448
+0xb924
+// 0.238399
+0x33a1
+// 0.090387
+0x2dc9
+// -0.512754
+0xb81a
+// 0.301373
+0x34d2
+// -0.466867
+0xb778
+// 0.204287
+0x328a
+// -0.229499
+0xb358
+// -0.119896
+0xafac
+// 0.440248
+0x370b
+// 0.649995
+0x3933
+// 0.129477
+0x3025
+// 0.241037
+0x33b7
+// -0.411964
+0xb697
+// 0.228133
+0x334d
+// 0.942283
+0x3b8a
+// -0.390976
+0xb641
+// 0.182779
+0x31d9
+// 0.228995
+0x3354
+// 0.126382
+0x300b
+// 0.225140
+0x3334
+// -0.214251
+0xb2db
+// 0.439711
+0x3709
+// -0.638072
+0xb91b
+// -0.667301
+0xb957
+// -0.353387
+0xb5a7
+// 0.329438
+0x3545
+// -0.543036
+0xb858
+// -0.195706
+0xb243
+// -0.000314
+0x8d26
+// -0.346311
+0xb58a
+// -0.040030
+0xa920
+// 0.309919
+0x34f5
+// 0.214685
+0x32df
+// -0.256227
+0xb41a
+// 0.256241
+0x341a
+// 0.423187
+0x36c5
+// -0.070894
+0xac8a
+// -0.408192
+0xb688
+// 0.258732
+0x3424
+// 0.743039
+0x39f2
+// -0.328534
+0xb542
+// -0.502412
+0xb805
+// -0.550943
+0xb868
+// 0.461636
+0x3763
+// -0.098335
+0xae4b
+// -0.331961
+0xb550
+// 0.502005
+0x3804
+// -0.060550
+0xabc0
+// -0.218616
+0xb2ff
+// 0.206607
+0x329d
+// 0.509390
+0x3813
+// 0.331278
+0x354d
+// -0.143708
+0xb099
+// 0.008236
+0x2038
+// -0.256486
+0xb41b
+// -0.154828
+0xb0f4
+// -0.606731
+0xb8db
+// 0.043363
+0x298d
+// 0.416313
+0x36a9
+// 0.132691
+0x303f
+// 0.716789
+0x39bc
+// 0.827380
+0x3a9e
+// 0.109746
+0x2f06
+// 0.480993
+0x37b2
+// -0.424777
+0xb6cc
+// -0.169704
+0xb16e
+// -0.095902
+0xae23
+// 0.022081
+0x25a7
+// -0.227175
+0xb345
+// 0.382023
+0x361d
+// 0.316215
+0x350f
+// -0.027787
+0xa71d
+// 0.107868
+0x2ee7
+// -0.091834
+0xade1
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference1_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference1_f16.txt
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference2_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference2_f16.txt
@ -0,0 +1,6 @@
+H
+2
+// -0.584459
+0xb8ad
+// 0.027514
+0x270b
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference3_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference3_f16.txt
@ -0,0 +1,6 @@
+H
+2
+// -1.063155
+0xbc41
+// 0.204536
+0x328c
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference4_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference4_f16.txt
@ -0,0 +1,6 @@
+H
+2
+// -2.020148
+0xc00a
+// -0.083691
+0xad5b
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference5_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference5_f16.txt
@ -0,0 +1,514 @@
+H
+256
+// 0.211657
+0x32c6
+// 0.164180
+0x3141
+// 0.394092
+0x364e
+// 0.837031
+0x3ab2
+// 0.203042
+0x327f
+// 0.594290
+0x38c1
+// 0.425438
+0x36cf
+// 0.692208
+0x398a
+// 0.478904
+0x37aa
+// 0.731179
+0x39d9
+// 0.228972
+0x3354
+// 0.423292
+0x36c6
+// 0.687425
+0x3980
+// 0.637676
+0x391a
+// 0.400567
+0x3669
+// 0.284900
+0x348f
+// 0.800826
+0x3a68
+// 0.530918
+0x383f
+// 0.899030
+0x3b31
+// 0.067538
+0x2c53
+// 0.418694
+0x36b3
+// 0.422016
+0x36c1
+// 0.600117
+0x38cd
+// 0.096654
+0x2e30
+// 0.382849
+0x3620
+// 0.553403
+0x386d
+// 0.109789
+0x2f07
+// 0.740600
+0x39ed
+// 0.830944
+0x3aa6
+// 0.426594
+0x36d3
+// 0.281352
+0x3480
+// 0.270642
+0x3455
+// 0.354207
+0x35ab
+// 1.046783
+0x3c30
+// 0.937881
+0x3b81
+// 0.476101
+0x379e
+// 0.369946
+0x35eb
+// 0.334837
+0x355b
+// 0.319491
+0x351d
+// 0.119662
+0x2fa9
+// 0.664848
+0x3952
+// 0.641369
+0x3922
+// 0.414986
+0x36a4
+// 0.824391
+0x3a98
+// 0.404576
+0x3679
+// 0.283363
+0x3489
+// 0.467747
+0x377c
+// 0.467828
+0x377c
+// 0.140330
+0x307e
+// 0.402280
+0x3670
+// 1.003391
+0x3c03
+// 0.144475
+0x30a0
+// 0.461048
+0x3760
+// 0.468613
+0x377f
+// 0.609266
+0x38e0
+// 0.560596
+0x387c
+// 0.410914
+0x3693
+// 0.196930
+0x324d
+// 0.256623
+0x341b
+// 0.435163
+0x36f6
+// 0.324492
+0x3531
+// 0.371618
+0x35f2
+// 0.218667
+0x32ff
+// 0.324592
+0x3532
+// 0.095539
+0x2e1d
+// 0.268452
+0x344c
+// 0.399025
+0x3662
+// 0.410560
+0x3692
+// 0.535278
+0x3848
+// 0.300637
+0x34cf
+// 0.413966
+0x36a0
+// 0.263911
+0x3439
+// 0.227363
+0x3347
+// 0.245900
+0x33de
+// 0.444928
+0x371e
+// 0.486777
+0x37ca
+// 0.388128
+0x3636
+// 0.303929
+0x34dd
+// 0.320661
+0x3521
+// 0.225175
+0x3335
+// 0.817364
+0x3a8a
+// 0.479373
+0x37ac
+// 0.857010
+0x3adb
+// 0.356348
+0x35b4
+// 0.134285
+0x304c
+// 0.324278
+0x3530
+// 0.568790
+0x388d
+// 0.302070
+0x34d5
+// 0.224479
+0x332f
+// 0.394179
+0x364f
+// 0.577446
+0x389f
+// 0.250274
+0x3401
+// 0.531677
+0x3841
+// 0.424735
+0x36cc
+// 0.653466
+0x393a
+// 0.316197
+0x350f
+// 0.227789
+0x334a
+// 0.184250
+0x31e5
+// 0.090642
+0x2dcd
+// 0.462727
+0x3767
+// 0.344118
+0x3582
+// 0.245310
+0x33da
+// 0.100303
+0x2e6b
+// 0.447675
+0x372a
+// 0.691568
+0x3988
+// 0.363492
+0x35d1
+// 0.459596
+0x375b
+// 0.559206
+0x3879
+// 0.119232
+0x2fa1
+// 0.691377
+0x3988
+// 0.423896
+0x36c8
+// 0.092387
+0x2dea
+// 0.016503
+0x243a
+// 0.389855
+0x363d
+// 0.310975
+0x34fa
+// 0.508563
+0x3812
+// 0.446708
+0x3726
+// 0.292022
+0x34ac
+// 0.280445
+0x347d
+// 0.371688
+0x35f2
+// 0.231185
+0x3366
+// 0.468586
+0x377f
+// 0.037421
+0x28ca
+// 0.483864
+0x37be
+// 0.434319
+0x36f3
+// 0.164206
+0x3141
+// 0.584744
+0x38ae
+// 0.264999
+0x343d
+// 0.371643
+0x35f2
+// 0.662050
+0x394c
+// 0.566702
+0x3889
+// 0.496748
+0x37f3
+// 0.529369
+0x383c
+// 0.397883
+0x365e
+// 0.503458
+0x3807
+// 0.154150
+0x30ef
+// 0.435808
+0x36f9
+// 0.939636
+0x3b84
+// 0.347287
+0x358e
+// 0.202447
+0x327a
+// 0.834662
+0x3aad
+// 0.522830
+0x382f
+// 0.539743
+0x3851
+// 0.390797
+0x3641
+// 0.269937
+0x3452
+// 0.528511
+0x383a
+// 0.142120
+0x308c
+// 0.507329
+0x380f
+// 0.206810
+0x329e
+// 0.793935
+0x3a5a
+// 0.835657
+0x3aaf
+// 0.670280
+0x395d
+// 0.113984
+0x2f4c
+// 0.472458
+0x378f
+// 0.575082
+0x389a
+// 0.732740
+0x39dd
+// 0.591795
+0x38bc
+// 0.546775
+0x3860
+// 0.296527
+0x34bf
+// 0.261418
+0x342f
+// 0.571263
+0x3892
+// 0.589192
+0x38b7
+// 0.822382
+0x3a94
+// 0.361338
+0x35c8
+// 0.153735
+0x30eb
+// 0.466681
+0x3778
+// 0.393489
+0x364c
+// 0.721908
+0x39c6
+// 0.086062
+0x2d82
+// 0.331373
+0x354d
+// 0.621595
+0x38f9
+// 0.516218
+0x3821
+// 0.771446
+0x3a2c
+// 0.420211
+0x36b9
+// 0.422246
+0x36c2
+// 0.338301
+0x356a
+// 0.745863
+0x39f8
+// 0.650429
+0x3934
+// 0.550667
+0x3868
+// 0.412579
+0x369a
+// 0.288576
+0x349e
+// 0.353537
+0x35a8
+// 0.225252
+0x3335
+// 0.194284
+0x3238
+// 0.547930
+0x3862
+// 0.300970
+0x34d1
+// 0.504146
+0x3808
+// 1.026896
+0x3c1c
+// 0.418343
+0x36b2
+// 0.627205
+0x3905
+// 0.230780
+0x3363
+// 0.145178
+0x30a5
+// 0.384564
+0x3627
+// 0.278455
+0x3475
+// 0.341747
+0x3578
+// 0.448953
+0x372f
+// 0.233506
+0x3379
+// 0.229044
+0x3354
+// 0.261151
+0x342e
+// 0.582315
+0x38a9
+// 0.214435
+0x32dd
+// 0.195282
+0x3240
+// 0.422011
+0x36c1
+// 1.204601
+0x3cd2
+// 0.681809
+0x3974
+// 0.108645
+0x2ef4
+// 0.054477
+0x2af9
+// 0.269794
+0x3451
+// 0.138462
+0x306e
+// 0.381523
+0x361b
+// 0.225135
+0x3334
+// 0.204132
+0x3288
+// 0.285641
+0x3492
+// 0.521935
+0x382d
+// 0.670786
+0x395e
+// 0.601863
+0x38d1
+// 0.117244
+0x2f81
+// 0.804322
+0x3a6f
+// 0.522114
+0x382d
+// 0.456285
+0x374d
+// 0.325889
+0x3537
+// 0.052211
+0x2aaf
+// 0.471914
+0x378d
+// 0.620330
+0x38f6
+// 0.183729
+0x31e1
+// 0.612658
+0x38e7
+// 0.364133
+0x35d3
+// 0.520880
+0x382b
+// 0.693829
+0x398d
+// 0.146768
+0x30b2
+// 0.350727
+0x359d
+// 0.572416
+0x3894
+// 0.145950
+0x30ac
+// 0.659216
+0x3946
+// 0.232441
+0x3370
+// 0.365908
+0x35db
+// 0.533640
+0x3845
+// 0.578456
+0x38a1
+// 0.166925
+0x3157
+// 0.439703
+0x3709
+// 0.427486
+0x36d7
+// 0.461784
+0x3763
+// 0.292177
+0x34ad
+// 0.461312
+0x3762
+// 0.543533
+0x3859
+// 0.435610
+0x36f8
+// 0.237356
+0x3398
+// 0.366056
+0x35db
+// 0.196488
+0x324a
+// 0.022935
+0x25df
+// 0.359284
+0x35c0
+// 0.409479
+0x368d
+// 0.228371
+0x334f
+// 0.124286
+0x2ff4
+// 0.434761
+0x36f5
+// 0.629995
+0x390a
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference6_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference6_f16.txt
@ -0,0 +1,514 @@
+H
+256
+// 0.044799
+0x29bc
+// 0.026955
+0x26e7
+// 0.155308
+0x30f8
+// 0.700622
+0x399b
+// 0.041226
+0x2947
+// 0.353181
+0x35a7
+// 0.180997
+0x31cb
+// 0.479153
+0x37ab
+// 0.229349
+0x3357
+// 0.534623
+0x3847
+// 0.052428
+0x2ab6
+// 0.179176
+0x31bc
+// 0.472553
+0x3790
+// 0.406630
+0x3682
+// 0.160454
+0x3122
+// 0.081168
+0x2d32
+// 0.641322
+0x3921
+// 0.281874
+0x3483
+// 0.808254
+0x3a77
+// 0.004561
+0x1cac
+// 0.175304
+0x319c
+// 0.178098
+0x31b3
+// 0.360140
+0x35c3
+// 0.009342
+0x20c8
+// 0.146573
+0x30b1
+// 0.306255
+0x34e6
+// 0.012054
+0x222c
+// 0.548488
+0x3863
+// 0.690467
+0x3986
+// 0.181983
+0x31d3
+// 0.079159
+0x2d11
+// 0.073247
+0x2cb0
+// 0.125462
+0x3004
+// 1.095754
+0x3c62
+// 0.879620
+0x3b09
+// 0.226672
+0x3341
+// 0.136860
+0x3061
+// 0.112116
+0x2f2d
+// 0.102075
+0x2e88
+// 0.014319
+0x2355
+// 0.442023
+0x3713
+// 0.411355
+0x3695
+// 0.172214
+0x3183
+// 0.679620
+0x3970
+// 0.163681
+0x313d
+// 0.080295
+0x2d24
+// 0.218787
+0x3300
+// 0.218863
+0x3301
+// 0.019693
+0x250b
+// 0.161829
+0x312e
+// 1.006794
+0x3c07
+// 0.020873
+0x2558
+// 0.212565
+0x32cd
+// 0.219598
+0x3307
+// 0.371205
+0x35f0
+// 0.314268
+0x3507
+// 0.168850
+0x3167
+// 0.038782
+0x28f7
+// 0.065855
+0x2c37
+// 0.189367
+0x320f
+// 0.105295
+0x2ebd
+// 0.138100
+0x306b
+// 0.047815
+0x2a1f
+// 0.105360
+0x2ebe
+// 0.009128
+0x20ac
+// 0.072067
+0x2c9d
+// 0.159221
+0x3118
+// 0.168559
+0x3165
+// 0.286522
+0x3496
+// 0.090382
+0x2dc9
+// 0.171368
+0x317c
+// 0.069649
+0x2c75
+// 0.051694
+0x2a9e
+// 0.060467
+0x2bbd
+// 0.197961
+0x3256
+// 0.236952
+0x3395
+// 0.150644
+0x30d2
+// 0.092373
+0x2de9
+// 0.102823
+0x2e95
+// 0.050704
+0x2a7d
+// 0.668084
+0x3958
+// 0.229798
+0x335b
+// 0.734466
+0x39e0
+// 0.126984
+0x3010
+// 0.018033
+0x249e
+// 0.105157
+0x2ebb
+// 0.323522
+0x352d
+// 0.091246
+0x2dd7
+// 0.050391
+0x2a73
+// 0.155377
+0x30f9
+// 0.333444
+0x3556
+// 0.062637
+0x2c02
+// 0.282681
+0x3486
+// 0.180400
+0x31c6
+// 0.427018
+0x36d5
+// 0.099980
+0x2e66
+// 0.051888
+0x2aa4
+// 0.033948
+0x2858
+// 0.008216
+0x2035
+// 0.214116
+0x32da
+// 0.118417
+0x2f94
+// 0.060177
+0x2bb4
+// 0.010061
+0x2127
+// 0.200412
+0x326a
+// 0.478267
+0x37a7
+// 0.132127
+0x303a
+// 0.211228
+0x32c2
+// 0.312711
+0x3501
+// 0.014216
+0x2347
+// 0.478003
+0x37a6
+// 0.179688
+0x31c0
+// 0.008535
+0x205f
+// 0.000272
+0xc76
+// 0.151987
+0x30dd
+// 0.096705
+0x2e30
+// 0.258636
+0x3423
+// 0.199548
+0x3263
+// 0.085277
+0x2d75
+// 0.078650
+0x2d09
+// 0.138152
+0x306c
+// 0.053447
+0x2ad7
+// 0.219573
+0x3307
+// 0.001400
+0x15bc
+// 0.234124
+0x337e
+// 0.188633
+0x3209
+// 0.026963
+0x26e7
+// 0.341926
+0x3579
+// 0.070225
+0x2c7f
+// 0.138118
+0x306b
+// 0.438310
+0x3703
+// 0.321151
+0x3523
+// 0.246759
+0x33e5
+// 0.280232
+0x347c
+// 0.158311
+0x3111
+// 0.253470
+0x340e
+// 0.023762
+0x2615
+// 0.189928
+0x3214
+// 0.882917
+0x3b10
+// 0.120608
+0x2fb8
+// 0.040985
+0x293f
+// 0.696661
+0x3993
+// 0.273351
+0x3460
+// 0.291322
+0x34a9
+// 0.152722
+0x30e3
+// 0.072866
+0x2caa
+// 0.279324
+0x3478
+// 0.020198
+0x252c
+// 0.257383
+0x341e
+// 0.042770
+0x2979
+// 0.630333
+0x390b
+// 0.698322
+0x3996
+// 0.449275
+0x3730
+// 0.012992
+0x22a7
+// 0.223217
+0x3325
+// 0.330719
+0x354b
+// 0.536907
+0x384c
+// 0.350221
+0x359b
+// 0.298963
+0x34c9
+// 0.087928
+0x2da1
+// 0.068340
+0x2c60
+// 0.326342
+0x3539
+// 0.347147
+0x358e
+// 0.676312
+0x3969
+// 0.130565
+0x302e
+// 0.023634
+0x260d
+// 0.217791
+0x32f8
+// 0.154834
+0x30f4
+// 0.521151
+0x382b
+// 0.007407
+0x1f96
+// 0.109808
+0x2f07
+// 0.386380
+0x362f
+// 0.266481
+0x3444
+// 0.595129
+0x38c3
+// 0.176577
+0x31a7
+// 0.178291
+0x31b5
+// 0.114447
+0x2f53
+// 0.556312
+0x3873
+// 0.423058
+0x36c5
+// 0.303234
+0x34da
+// 0.170221
+0x3172
+// 0.083276
+0x2d54
+// 0.124988
+0x3000
+// 0.050738
+0x2a7f
+// 0.037746
+0x28d5
+// 0.300227
+0x34ce
+// 0.090583
+0x2dcc
+// 0.254164
+0x3411
+// 1.054516
+0x3c38
+// 0.175011
+0x319a
+// 0.393386
+0x364b
+// 0.053260
+0x2ad1
+// 0.021077
+0x2565
+// 0.147889
+0x30bc
+// 0.077537
+0x2cf6
+// 0.116791
+0x2f7a
+// 0.201558
+0x3273
+// 0.054525
+0x2afb
+// 0.052461
+0x2ab7
+// 0.068200
+0x2c5d
+// 0.339091
+0x356d
+// 0.045982
+0x29e3
+// 0.038135
+0x28e2
+// 0.178093
+0x31b3
+// 1.451062
+0x3dce
+// 0.464864
+0x3770
+// 0.011804
+0x220b
+// 0.002968
+0x1a14
+// 0.072789
+0x2ca9
+// 0.019172
+0x24e8
+// 0.145560
+0x30a8
+// 0.050686
+0x2a7d
+// 0.041670
+0x2955
+// 0.081591
+0x2d39
+// 0.272416
+0x345c
+// 0.449953
+0x3733
+// 0.362239
+0x35cc
+// 0.013746
+0x230a
+// 0.646934
+0x392d
+// 0.272603
+0x345d
+// 0.208196
+0x32aa
+// 0.106203
+0x2ecc
+// 0.002726
+0x1995
+// 0.222703
+0x3320
+// 0.384809
+0x3628
+// 0.033756
+0x2852
+// 0.375349
+0x3601
+// 0.132593
+0x303e
+// 0.271316
+0x3457
+// 0.481399
+0x37b4
+// 0.021541
+0x2584
+// 0.123010
+0x2fdf
+// 0.327660
+0x353e
+// 0.021302
+0x2574
+// 0.434565
+0x36f4
+// 0.054029
+0x2aea
+// 0.133889
+0x3049
+// 0.284771
+0x348e
+// 0.334612
+0x355b
+// 0.027864
+0x2722
+// 0.193338
+0x3230
+// 0.182744
+0x31d9
+// 0.213245
+0x32d3
+// 0.085367
+0x2d77
+// 0.212809
+0x32cf
+// 0.295428
+0x34ba
+// 0.189756
+0x3212
+// 0.056338
+0x2b36
+// 0.133997
+0x304a
+// 0.038607
+0x28f1
+// 0.000526
+0x104f
+// 0.129085
+0x3021
+// 0.167673
+0x315e
+// 0.052153
+0x2aad
+// 0.015447
+0x23e9
+// 0.189017
+0x320c
+// 0.396894
+0x365a
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference7_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference7_f16.txt
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference8_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference8_f16.txt
--- a/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference9_f16.txt
+++ b/Testing/Patterns/DSP/ComplexMaths/ComplexMathsF16/Reference9_f16.txt
@ -0,0 +1,6 @@
+H
+2
+// -0.368091
+0xb5e4
+// -0.861249
+0xbae4
--- a/Testing/Source/Tests/ComplexTestsF16.cpp
+++ b/Testing/Source/Tests/ComplexTestsF16.cpp
@ -0,0 +1,308 @@
+#include "ComplexTestsF16.h"
+#include <stdio.h>
+#include "Error.h"
+
+#define SNR_THRESHOLD 40
+
+#define REL_ERROR (6.0e-2)
+
+    void ComplexTestsF16::test_cmplx_conj_f16()
+    {
+        const float16_t *inp1=input1.ptr();
+        float16_t *outp=output.ptr();
+
+
+        arm_cmplx_conj_f16(inp1,outp,input1.nbSamples() >> 1 );
+
+        ASSERT_EMPTY_TAIL(output);
+
+        ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
+
+        ASSERT_REL_ERROR(output,ref,REL_ERROR);
+
+
+    } 
+
+
+    void ComplexTestsF16::test_cmplx_dot_prod_f16()
+    {
+        float16_t re,im;
+
+        const float16_t *inp1=input1.ptr();
+        const float16_t *inp2=input2.ptr();
+        float16_t *outp=output.ptr();
+
+        arm_cmplx_dot_prod_f16(inp1,inp2,input1.nbSamples() >> 1,&re,&im);
+
+        outp[0] = re;
+        outp[1] = im;
+
+        ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
+
+        ASSERT_REL_ERROR(output,ref,REL_ERROR);
+
+        ASSERT_EMPTY_TAIL(output);
+    } 
+
+    void ComplexTestsF16::test_cmplx_mag_f16()
+    {
+        const float16_t *inp1=input1.ptr();
+        float16_t *outp=output.ptr();
+
+        arm_cmplx_mag_f16(inp1,outp,input1.nbSamples() >> 1 );
+        
+        ASSERT_EMPTY_TAIL(output);
+
+        ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
+
+        ASSERT_REL_ERROR(output,ref,REL_ERROR);
+    } 
+
+    void ComplexTestsF16::test_cmplx_mag_squared_f16()
+    {
+        const float16_t *inp1=input1.ptr();
+        float16_t *outp=output.ptr();
+
+        arm_cmplx_mag_squared_f16(inp1,outp,input1.nbSamples() >> 1 );
+
+        ASSERT_EMPTY_TAIL(output);
+        
+
+        ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
+
+        ASSERT_REL_ERROR(output,ref,REL_ERROR);
+    } 
+
+    void ComplexTestsF16::test_cmplx_mult_cmplx_f16()
+    {
+        const float16_t *inp1=input1.ptr();
+        const float16_t *inp2=input2.ptr();
+        float16_t *outp=output.ptr();
+
+        arm_cmplx_mult_cmplx_f16(inp1,inp2,outp,input1.nbSamples() >> 1 );
+
+        ASSERT_EMPTY_TAIL(output);
+        
+
+        ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
+
+        ASSERT_REL_ERROR(output,ref,REL_ERROR);
+    } 
+
+    void ComplexTestsF16::test_cmplx_mult_real_f16()
+    {
+        const float16_t *inp1=input1.ptr();
+        const float16_t *inp2=input2.ptr();
+        float16_t *outp=output.ptr();
+
+        arm_cmplx_mult_real_f16(inp1,inp2,outp,input1.nbSamples() >> 1 );
+
+        ASSERT_EMPTY_TAIL(output);
+        
+
+        ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
+
+        ASSERT_REL_ERROR(output,ref,REL_ERROR);
+    } 
+ 
+    void ComplexTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr)
+    {
+      
+       Testing::nbSamples_t nb=MAX_NB_SAMPLES; 
+       (void)params;
+
+       
+       switch(id)
+       {
+        case ComplexTestsF16::TEST_CMPLX_CONJ_F16_1:
+          nb = 7;
+          ref.reload(ComplexTestsF16::REF_CONJ_F16_ID,mgr,nb << 1);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+        case ComplexTestsF16::TEST_CMPLX_CONJ_F16_2:
+          nb = 16;
+          ref.reload(ComplexTestsF16::REF_CONJ_F16_ID,mgr,nb << 1);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+        case ComplexTestsF16::TEST_CMPLX_CONJ_F16_3:
+          nb = 23;
+          ref.reload(ComplexTestsF16::REF_CONJ_F16_ID,mgr,nb << 1);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+        case ComplexTestsF16::TEST_CMPLX_DOT_PROD_F16_4:
+          nb = 7;
+          ref.reload(ComplexTestsF16::REF_DOT_PROD_3_F16_ID,mgr);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+          input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+
+        case ComplexTestsF16::TEST_CMPLX_DOT_PROD_F16_5:
+          nb = 16;
+          ref.reload(ComplexTestsF16::REF_DOT_PROD_4N_F16_ID,mgr);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+          input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+
+        case ComplexTestsF16::TEST_CMPLX_DOT_PROD_F16_6:
+          nb = 23;
+          ref.reload(ComplexTestsF16::REF_DOT_PROD_4N1_F16_ID,mgr);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+          input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+        case ComplexTestsF16::TEST_CMPLX_MAG_F16_7:
+          nb = 7;
+          ref.reload(ComplexTestsF16::REF_MAG_F16_ID,mgr,nb);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+        case ComplexTestsF16::TEST_CMPLX_MAG_F16_8:
+          nb = 16;
+          ref.reload(ComplexTestsF16::REF_MAG_F16_ID,mgr,nb);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+        case ComplexTestsF16::TEST_CMPLX_MAG_F16_9:
+          nb = 23;
+          ref.reload(ComplexTestsF16::REF_MAG_F16_ID,mgr,nb);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+        case ComplexTestsF16::TEST_CMPLX_MAG_SQUARED_F16_10:
+          nb = 7;
+          ref.reload(ComplexTestsF16::REF_MAG_SQUARED_F16_ID,mgr,nb);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+        case ComplexTestsF16::TEST_CMPLX_MAG_SQUARED_F16_11:
+          nb = 16;
+          ref.reload(ComplexTestsF16::REF_MAG_SQUARED_F16_ID,mgr,nb);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+        case ComplexTestsF16::TEST_CMPLX_MAG_SQUARED_F16_12:
+          nb = 23;
+          ref.reload(ComplexTestsF16::REF_MAG_SQUARED_F16_ID,mgr,nb);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+        case ComplexTestsF16::TEST_CMPLX_MULT_CMPLX_F16_13:
+          nb = 7;
+          ref.reload(ComplexTestsF16::REF_CMPLX_MULT_CMPLX_F16_ID,mgr,nb << 1);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+          input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+        case ComplexTestsF16::TEST_CMPLX_MULT_CMPLX_F16_14:
+          nb = 16;
+          ref.reload(ComplexTestsF16::REF_CMPLX_MULT_CMPLX_F16_ID,mgr,nb << 1);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+          input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+        case ComplexTestsF16::TEST_CMPLX_MULT_CMPLX_F16_15:
+          nb = 23;
+          ref.reload(ComplexTestsF16::REF_CMPLX_MULT_CMPLX_F16_ID,mgr,nb << 1);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+          input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+        case ComplexTestsF16::TEST_CMPLX_MULT_REAL_F16_16:
+          nb = 7;
+          ref.reload(ComplexTestsF16::REF_CMPLX_MULT_REAL_F16_ID,mgr,nb << 1);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+          input2.reload(ComplexTestsF16::INPUT3_F16_ID,mgr,nb);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+        case ComplexTestsF16::TEST_CMPLX_MULT_REAL_F16_17:
+          nb = 16;
+          ref.reload(ComplexTestsF16::REF_CMPLX_MULT_REAL_F16_ID,mgr,nb << 1);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+          input2.reload(ComplexTestsF16::INPUT3_F16_ID,mgr,nb);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+        case ComplexTestsF16::TEST_CMPLX_MULT_REAL_F16_18:
+          nb = 23;
+          ref.reload(ComplexTestsF16::REF_CMPLX_MULT_REAL_F16_ID,mgr,nb << 1);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+          input2.reload(ComplexTestsF16::INPUT3_F16_ID,mgr,nb);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+          break;
+
+        case ComplexTestsF16::TEST_CMPLX_CONJ_F16_19:
+          ref.reload(ComplexTestsF16::REF_CONJ_F16_ID,mgr,nb << 1);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+        break;
+
+        case ComplexTestsF16::TEST_CMPLX_DOT_PROD_F16_20:
+          ref.reload(ComplexTestsF16::REF_DOT_PROD_LONG_F16_ID,mgr);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+          input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+        break;
+        
+        case ComplexTestsF16::TEST_CMPLX_MAG_F16_21:
+          ref.reload(ComplexTestsF16::REF_MAG_F16_ID,mgr,nb);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+        break;
+        
+        case ComplexTestsF16::TEST_CMPLX_MAG_SQUARED_F16_22:
+          ref.reload(ComplexTestsF16::REF_MAG_SQUARED_F16_ID,mgr,nb);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+        break;
+        
+        case ComplexTestsF16::TEST_CMPLX_MULT_CMPLX_F16_23:
+          ref.reload(ComplexTestsF16::REF_CMPLX_MULT_CMPLX_F16_ID,mgr,nb << 1);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+          input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+        break;
+        
+        case ComplexTestsF16::TEST_CMPLX_MULT_REAL_F16_24:
+          ref.reload(ComplexTestsF16::REF_CMPLX_MULT_REAL_F16_ID,mgr,nb << 1);
+          input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
+          input2.reload(ComplexTestsF16::INPUT3_F16_ID,mgr,nb);
+
+          output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
+        break;
+        
+       }
+      
+    }
+
+    void ComplexTestsF16::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)
+    {
+        (void)id;
+        output.dump(mgr);
+    }
--- a/Testing/desc_f16.txt
+++ b/Testing/desc_f16.txt
@ -77,6 +77,69 @@ group Root {

        }

+        group Complex Tests {
+           class = ComplexTests
+           folder = ComplexMaths
+
+           suite Complex Tests F16{
+              class = ComplexTestsF16
+              folder = ComplexMathsF16
+
+              Pattern INPUT1_F16_ID : Input1_f16.txt 
+              Pattern INPUT2_F16_ID : Input2_f16.txt 
+              Pattern INPUT3_F16_ID : Input3_f16.txt 
+
+              Pattern REF_CONJ_F16_ID : Reference1_f16.txt
+              Pattern REF_DOT_PROD_3_F16_ID : Reference2_f16.txt
+              Pattern REF_DOT_PROD_4N_F16_ID : Reference3_f16.txt
+              Pattern REF_DOT_PROD_4N1_F16_ID : Reference4_f16.txt
+              Pattern REF_MAG_F16_ID : Reference5_f16.txt
+              Pattern REF_MAG_SQUARED_F16_ID : Reference6_f16.txt
+              Pattern REF_CMPLX_MULT_CMPLX_F16_ID : Reference7_f16.txt
+              Pattern REF_CMPLX_MULT_REAL_F16_ID : Reference8_f16.txt
+              Pattern REF_DOT_PROD_LONG_F16_ID : Reference9_f16.txt
+
+              Output  OUT_SAMPLES_F16_ID : Output
+              Output  OUT_STATE_F16_ID : State
+
+              Functions {
+                Test nb=3    arm_cmplx_conj_f16:test_cmplx_conj_f16
+                Test nb=4n   arm_cmplx_conj_f16:test_cmplx_conj_f16
+                Test nb=4n+1 arm_cmplx_conj_f16:test_cmplx_conj_f16
+
+                Test nb=3    arm_cmplx_dot_prod_f16:test_cmplx_dot_prod_f16
+                Test nb=4n   arm_cmplx_dot_prod_f16:test_cmplx_dot_prod_f16
+                Test nb=4n+1 arm_cmplx_dot_prod_f16:test_cmplx_dot_prod_f16
+
+                Test nb=3    arm_cmplx_mag_f16:test_cmplx_mag_f16
+                Test nb=4n   arm_cmplx_mag_f16:test_cmplx_mag_f16
+                Test nb=4n+1 arm_cmplx_mag_f16:test_cmplx_mag_f16
+
+                Test nb=3    arm_cmplx_mag_squared_f16:test_cmplx_mag_squared_f16
+                Test nb=4n   arm_cmplx_mag_squared_f16:test_cmplx_mag_squared_f16
+                Test nb=4n+1 arm_cmplx_mag_squared_f16:test_cmplx_mag_squared_f16
+
+                Test nb=3    arm_cmplx_mult_cmplx_f16:test_cmplx_mult_cmplx_f16
+                Test nb=4n   arm_cmplx_mult_cmplx_f16:test_cmplx_mult_cmplx_f16
+                Test nb=4n+1 arm_cmplx_mult_cmplx_f16:test_cmplx_mult_cmplx_f16
+
+                Test nb=3    arm_cmplx_mult_real_f16:test_cmplx_mult_real_f16
+                Test nb=4n   arm_cmplx_mult_real_f16:test_cmplx_mult_real_f16
+                Test nb=4n+1 arm_cmplx_mult_real_f16:test_cmplx_mult_real_f16
+
+                Test long    arm_cmplx_conj_f16:test_cmplx_conj_f16
+                Test long    arm_cmplx_dot_prod_f16:test_cmplx_dot_prod_f16
+                Test long    arm_cmplx_mag_f16:test_cmplx_mag_f16
+                Test long    arm_cmplx_mag_squared_f16:test_cmplx_mag_squared_f16
+                Test long    arm_cmplx_mult_cmplx_f16:test_cmplx_mult_cmplx_f16
+                Test long    arm_cmplx_mult_real_f16:test_cmplx_mult_real_f16
+
+              }
+
+           }
+
+        }
+
        group Transform Tests {
           class = TransformTests
           folder = Transform