CMSIS-DSP: Added additional f16 statistics functions

and the required f16 fast math functions.
6 years ago · 55c9be8af0
parent 534c34f883
commit 55c9be8af0
70 changed files with 4423 additions and 81 deletions
--- a/Include/arm_common_tables_f16.h
+++ b/Include/arm_common_tables_f16.h
@ -113,6 +113,14 @@ extern "C"
    
 #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */

+#if !defined(__CC_ARM) && defined(ARM_FLOAT16_SUPPORTED)
+
+#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
+       extern const float16_t exp_tab_f16[8];
+       extern const float16_t __logf_lut_f16[8];
+#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) */
+#endif 
+       

 #ifdef   __cplusplus
 }
--- a/Include/arm_math_types_f16.h
+++ b/Include/arm_math_types_f16.h
@ -140,7 +140,7 @@ won't be built.
 #define F16_ABSMAX   ((float16_t)FLT_MAX)
 #define F16_ABSMIN   ((float16_t)0.0)

-
+#define F16INFINITY ((float16_t)0x07c00)
  
 #endif /* ARM_FLOAT16_SUPPORTED*/
 #endif /* !defined( __CC_ARM ) */
--- a/Include/arm_vec_math_f16.h
+++ b/Include/arm_vec_math_f16.h
@ -0,0 +1,232 @@
+/******************************************************************************
+ * @file     arm_vec_math_f16.h
+ * @brief    Public header file for CMSIS DSP Library
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _ARM_VEC_MATH_H
+#define _ARM_VEC_MATH_H
+
+#include "arm_math_types_f16.h"
+#include "arm_common_tables_f16.h"
+#include "arm_helium_utils.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+
+#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+
+static const float16_t __logf_rng_f16=0.693147180f16;
+
+
+/* fast inverse approximation (4x newton) */
+__STATIC_INLINE f16x8_t vrecip_hiprec_f16(
+    f16x8_t x)
+{
+    q15x8_t         m;
+    f16x8_t         b;
+    any16x8_t       xinv;
+    f16x8_t         ax = vabsq(x);
+
+    xinv.f = ax;
+
+    m = 0x03c00 - (xinv.i & 0x07c00);
+    xinv.i = xinv.i + m;
+    xinv.f = 1.41176471f16 - 0.47058824f16 * xinv.f;
+    xinv.i = xinv.i + m;
+
+    b = 2.0f16 - xinv.f * ax;
+    xinv.f = xinv.f * b;
+
+    b = 2.0f16 - xinv.f * ax;
+    xinv.f = xinv.f * b;
+
+    b = 2.0f16 - xinv.f * ax;
+    xinv.f = xinv.f * b;
+
+    b = 2.0f16 - xinv.f * ax;
+    xinv.f = xinv.f * b;
+
+    xinv.f = vdupq_m(xinv.f, F16INFINITY, vcmpeqq(x, 0.0f));
+    /*
+     * restore sign
+     */
+    xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq(x, 0.0f));
+
+    return xinv.f;
+}
+
+__STATIC_INLINE f16x8_t vdiv_f16(
+    f16x8_t num, f16x8_t den)
+{
+    return vmulq(num, vrecip_hiprec_f16(den));
+}
+
+
+/**
+  @brief         Single-precision taylor dev.
+  @param[in]     x              f16  vector input
+  @param[in]     coeffs         f16  vector coeffs
+  @return        destination    f16  vector
+ */
+
+__STATIC_INLINE float16x8_t vtaylor_polyq_f16(
+        float16x8_t           x,
+        const float16_t * coeffs)
+{
+    float16x8_t         A = vfmasq(vdupq_n_f16(coeffs[4]), x, coeffs[0]);
+    float16x8_t         B = vfmasq(vdupq_n_f16(coeffs[6]), x, coeffs[2]);
+    float16x8_t         C = vfmasq(vdupq_n_f16(coeffs[5]), x, coeffs[1]);
+    float16x8_t         D = vfmasq(vdupq_n_f16(coeffs[7]), x, coeffs[3]);
+    float16x8_t         x2 = vmulq(x, x);
+    float16x8_t         x4 = vmulq(x2, x2);
+    float16x8_t         res = vfmaq(vfmaq_f16(A, B, x2), vfmaq_f16(C, D, x2), x4);
+
+    return res;
+}
+
+__STATIC_INLINE float16x8_t vmant_exp_f16(
+    float16x8_t     x,
+    int16x8_t * e)
+{
+    any16x8_t       r;
+    int16x8_t       n;
+
+    r.f = x;
+    n = r.i >> 10;
+    n = n - 15;
+    r.i = r.i - (n << 10);
+
+    *e = n;
+    return r.f;
+}
+
+
+__STATIC_INLINE float16x8_t vlogq_f16(float16x8_t vecIn)
+{
+    q15x8_t             vecExpUnBiased;
+    float16x8_t         vecTmpFlt0, vecTmpFlt1;
+    float16x8_t         vecAcc0, vecAcc1, vecAcc2, vecAcc3;
+    float16x8_t         vecExpUnBiasedFlt;
+
+    /*
+     * extract exponent
+     */
+    vecTmpFlt1 = vmant_exp_f16(vecIn, &vecExpUnBiased);
+
+    vecTmpFlt0 = vecTmpFlt1 * vecTmpFlt1;
+    /*
+     * a = (__logf_lut_f16[4] * r.f) + (__logf_lut_f16[0]);
+     */
+    vecAcc0 = vdupq_n_f16(__logf_lut_f16[0]);
+    vecAcc0 = vfmaq(vecAcc0, vecTmpFlt1, __logf_lut_f16[4]);
+    /*
+     * b = (__logf_lut_f16[6] * r.f) + (__logf_lut_f16[2]);
+     */
+    vecAcc1 = vdupq_n_f16(__logf_lut_f16[2]);
+    vecAcc1 = vfmaq(vecAcc1, vecTmpFlt1, __logf_lut_f16[6]);
+    /*
+     * c = (__logf_lut_f16[5] * r.f) + (__logf_lut_f16[1]);
+     */
+    vecAcc2 = vdupq_n_f16(__logf_lut_f16[1]);
+    vecAcc2 = vfmaq(vecAcc2, vecTmpFlt1, __logf_lut_f16[5]);
+    /*
+     * d = (__logf_lut_f16[7] * r.f) + (__logf_lut_f16[3]);
+     */
+    vecAcc3 = vdupq_n_f16(__logf_lut_f16[3]);
+    vecAcc3 = vfmaq(vecAcc3, vecTmpFlt1, __logf_lut_f16[7]);
+    /*
+     * a = a + b * xx;
+     */
+    vecAcc0 = vfmaq(vecAcc0, vecAcc1, vecTmpFlt0);
+    /*
+     * c = c + d * xx;
+     */
+    vecAcc2 = vfmaq(vecAcc2, vecAcc3, vecTmpFlt0);
+    /*
+     * xx = xx * xx;
+     */
+    vecTmpFlt0 = vecTmpFlt0 * vecTmpFlt0;
+    vecExpUnBiasedFlt = vcvtq_f16_s16(vecExpUnBiased);
+    /*
+     * r.f = a + c * xx;
+     */
+    vecAcc0 = vfmaq(vecAcc0, vecAcc2, vecTmpFlt0);
+    /*
+     * add exponent
+     * r.f = r.f + ((float32_t) m) * __logf_rng_f16;
+     */
+    vecAcc0 = vfmaq(vecAcc0, vecExpUnBiasedFlt, __logf_rng_f16);
+    // set log0 down to -inf
+    vecAcc0 = vdupq_m(vecAcc0, -F16INFINITY, vcmpeqq(vecIn, 0.0f));
+    return vecAcc0;
+}
+
+__STATIC_INLINE float16x8_t vexpq_f16(
+    float16x8_t x)
+{
+    // Perform range reduction [-log(2),log(2)]
+    int16x8_t       m = vcvtq_s16_f16(vmulq_n_f16(x, 1.4426950408f16));
+    float16x8_t     val = vfmsq_f16(x, vcvtq_f16_s16(m), vdupq_n_f16(0.6931471805f16));
+
+    // Polynomial Approximation
+    float16x8_t         poly = vtaylor_polyq_f16(val, exp_tab_f16);
+
+    // Reconstruct
+    poly = (float16x8_t) (vqaddq_s16((int16x8_t) (poly), vqshlq_n_s16(m, 10)));
+
+    poly = vdupq_m(poly, 0.0f, vcmpltq_n_s16(m, -14));
+    return poly;
+}
+
+__STATIC_INLINE float16x8_t arm_vec_exponent_f16(float16x8_t x, int16_t nb)
+{
+    float16x8_t         r = x;
+    nb--;
+    while (nb > 0) {
+        r = vmulq(r, x);
+        nb--;
+    }
+    return (r);
+}
+
+
+
+#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)*/
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* ARM FLOAT16 SUPPORTED */
+
+#endif /* _ARM_VEC_MATH_H */
+
+/**
+ *
+ * End of file.
+ */
--- a/Include/dsp/basic_math_functions_f16.h
+++ b/Include/dsp/basic_math_functions_f16.h
@ -36,7 +36,6 @@ extern "C"

 #include "dsp/none.h"
 #include "dsp/utils.h"
-#include "dsp/fast_math_functions_f16.h"


 #if defined(ARM_FLOAT16_SUPPORTED)
--- a/Include/dsp/fast_math_functions_f16.h
+++ b/Include/dsp/fast_math_functions_f16.h
@ -31,6 +31,8 @@

 #include "dsp/none.h"
 #include "dsp/utils.h"
+
+/* For sqrt_f32 */
 #include "dsp/fast_math_functions.h"

 #ifdef   __cplusplus
@ -69,6 +71,42 @@ __STATIC_FORCEINLINE arm_status arm_sqrt_f16(
  @} end of SQRT group
 */
  
+/**
+  @brief         Floating-point vector of log values.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+  void arm_vlog_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+/**
+  @brief         Floating-point vector of exp values.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+  void arm_vexp_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+  /**
+  @brief         Floating-point vector of inverse values.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+  void arm_vinverse_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
+
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
 #ifdef   __cplusplus
 }
--- a/Include/dsp/statistics_functions_f16.h
+++ b/Include/dsp/statistics_functions_f16.h
@ -123,6 +123,65 @@ extern "C"
        float16_t * pResult,
        uint32_t * pIndex);

+/**
+ * @brief Entropy
+ *
+ * @param[in]  pSrcA        Array of input values.
+ * @param[in]  blockSize    Number of samples in the input array.
+ * @return     Entropy      -Sum(p ln p)
+ *
+ */
+
+
+float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize);
+
+float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize);
+
+/**
+ * @brief Dot product with log arithmetic
+ *
+ * Vectors are containing the log of the samples
+ *
+ * @param[in]       pSrcA points to the first input vector
+ * @param[in]       pSrcB points to the second input vector
+ * @param[in]       blockSize number of samples in each vector
+ * @param[in]       pTmpBuffer temporary buffer of length blockSize
+ * @return The log of the dot product .
+ *
+ */
+
+
+float16_t arm_logsumexp_dot_prod_f16(const float16_t * pSrcA,
+  const float16_t * pSrcB,
+  uint32_t blockSize,
+  float16_t *pTmpBuffer);
+
+/**
+ * @brief Kullback-Leibler
+ *
+ * @param[in]  pSrcA         Pointer to an array of input values for probability distribution A.
+ * @param[in]  pSrcB         Pointer to an array of input values for probability distribution B.
+ * @param[in]  blockSize     Number of samples in the input array.
+ * @return Kullback-Leibler  Divergence D(A || B)
+ *
+ */
+float16_t arm_kullback_leibler_f16(const float16_t * pSrcA
+  ,const float16_t * pSrcB
+  ,uint32_t blockSize);
+
+/**
+    @brief         Maximum value of a floating-point vector.
+    @param[in]     pSrc       points to the input vector
+    @param[in]     blockSize  number of samples in input vector
+    @param[out]    pResult    maximum value returned here
+    @return        none
+   */
+  void arm_max_no_idx_f16(
+      const float16_t *pSrc,
+      uint32_t   blockSize,
+      float16_t *pResult);
+
+

 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
 #ifdef   __cplusplus
--- a/Source/CommonTables/arm_common_tables_f16.c
+++ b/Source/CommonTables/arm_common_tables_f16.c
@ -12550,6 +12550,32 @@ const float16_t twiddleCoefF16_rfft_4096[4096] = {

 #endif /*!defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)*/

+#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
+const float16_t exp_tab_f16[8] = {
+    (1.f16),
+    (0.0416598916054f16),
+    (0.500000596046f16),
+    (0.00138889f16),
+    (1.00000011921f16),
+    (0.00833693705499f16),
+    (0.166665703058f16),
+    (0.000195780929062f16),
+};
+
+const float16_t __logf_lut_f16[8] = {
+    -2.295614848256274f16,         /*p0*/
+    -2.470711633419806f16,         /*p4*/
+    -5.686926051100417f16,         /*p2*/
+    -0.165253547131978f16,         /*p6*/
+    +5.175912446351073f16,         /*p1*/
+    +0.844006986174912f16,         /*p5*/
+    +4.584458825456749f16,         /*p3*/
+    +0.014127821926000f16          /*p7*/
+};
+
+#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

 #endif /* Not ARM AC5 */
--- a/Source/FastMathFunctions/CMakeLists.txt
+++ b/Source/FastMathFunctions/CMakeLists.txt
@ -47,6 +47,12 @@ target_sources(CMSISDSPFastMath PRIVATE arm_sqrt_q31.c)
 target_sources(CMSISDSPFastMath PRIVATE arm_vlog_f32.c)
 target_sources(CMSISDSPFastMath PRIVATE arm_vexp_f32.c)

+if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
+target_sources(CMSISDSPFastMath PRIVATE arm_vlog_f16.c)
+target_sources(CMSISDSPFastMath PRIVATE arm_vexp_f16.c)
+target_sources(CMSISDSPFastMath PRIVATE arm_vinverse_f16.c)
+endif()
+

 ### Includes
 target_include_directories(CMSISDSPFastMath PUBLIC "${DSP}/Include")
--- a/Source/FastMathFunctions/FastMathFunctionsF16.c
+++ b/Source/FastMathFunctions/FastMathFunctionsF16.c
@ -0,0 +1,31 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        FastMathFunctions.c
+ * Description:  Combination of all fast math function source files.
+ *
+ * $Date:        16. March 2020
+ * $Revision:    V1.1.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_vexp_f16.c"
+#include "arm_vlog_f16.c"
+#include "arm_vinverse_f16.c"
--- a/Source/FastMathFunctions/arm_vexp_f16.c
+++ b/Source/FastMathFunctions/arm_vexp_f16.c
@ -0,0 +1,84 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_vlog_f16.c
+ * Description:  Fast vectorized log
+ *
+ * $Date:        15. Octoboer 2020
+ * $Revision:    V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/fast_math_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include "arm_common_tables.h"
+
+#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM) || defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#include "arm_vec_math_f16.h"
+#endif
+
+
+void arm_vexp_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize)
+{
+   uint32_t blkCnt; 
+
+#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+   f16x8_t src;
+   f16x8_t dst;
+
+   blkCnt = blockSize >> 3;
+
+   while (blkCnt > 0U)
+   {
+      src = vld1q(pSrc);
+      dst = vexpq_f16(src);
+      vst1q(pDst, dst);
+
+      pSrc += 8;
+      pDst += 8;
+      /* Decrement loop counter */
+      blkCnt--;
+   }
+
+   blkCnt = blockSize & 7;
+#else
+   blkCnt = blockSize;
+#endif
+
+   while (blkCnt > 0U)
+   {
+      /* C = log(A) */
+  
+      /* Calculate log and store result in destination buffer. */
+      *pDst++ = expf(*pSrc++);
+  
+      /* Decrement loop counter */
+      blkCnt--;
+   }
+}
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Source/FastMathFunctions/arm_vinverse_f16.c
+++ b/Source/FastMathFunctions/arm_vinverse_f16.c
@ -0,0 +1,81 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_vinverse_f16.c
+ * Description:  Fast vectorized inverse
+ *
+ * $Date:        15. Octoboer 2020
+ * $Revision:    V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/fast_math_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include "arm_common_tables.h"
+
+#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM) || defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#include "arm_vec_math_f16.h"
+#endif
+
+void arm_vinverse_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize)
+{
+   uint32_t blkCnt; 
+
+#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+   f16x8_t src;
+   f16x8_t dst;
+
+   blkCnt = blockSize >> 3;
+
+   while (blkCnt > 0U)
+   {
+      src = vld1q(pSrc);
+      dst = vrecip_hiprec_f16(src);
+      vst1q(pDst, dst);
+
+      pSrc += 8;
+      pDst += 8;
+      /* Decrement loop counter */
+      blkCnt--;
+   }
+
+   blkCnt = blockSize & 7;
+#else
+   blkCnt = blockSize;
+#endif
+
+   while (blkCnt > 0U)
+   {
+      
+      *pDst++ = 1.0 / *pSrc++;
+  
+      /* Decrement loop counter */
+      blkCnt--;
+   }
+}
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Source/FastMathFunctions/arm_vlog_f16.c
+++ b/Source/FastMathFunctions/arm_vlog_f16.c
@ -0,0 +1,83 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_vlog_f16.c
+ * Description:  Fast vectorized log
+ *
+ * $Date:        15. Octoboer 2020
+ * $Revision:    V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/fast_math_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include "arm_common_tables.h"
+
+#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM) || defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#include "arm_vec_math_f16.h"
+#endif
+
+void arm_vlog_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize)
+{
+   uint32_t blkCnt; 
+
+#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+   f16x8_t src;
+   f16x8_t dst;
+
+   blkCnt = blockSize >> 3;
+
+   while (blkCnt > 0U)
+   {
+      src = vld1q(pSrc);
+      dst = vlogq_f16(src);
+      vst1q(pDst, dst);
+
+      pSrc += 8;
+      pDst += 8;
+      /* Decrement loop counter */
+      blkCnt--;
+   }
+
+   blkCnt = blockSize & 7;
+#else
+   blkCnt = blockSize;
+#endif
+
+   while (blkCnt > 0U)
+   {
+      /* C = log(A) */
+  
+      /* Calculate log and store result in destination buffer. */
+      *pDst++ = logf(*pSrc++);
+  
+      /* Decrement loop counter */
+      blkCnt--;
+   }
+}
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Source/SVMFunctions/arm_svm_linear_init_f32.c
+++ b/Source/SVMFunctions/arm_svm_linear_init_f32.c
@ -33,9 +33,18 @@
 *
 */

+/**
+  @ingroup groupSVM
+ */
+
+/**
+  @defgroup linearsvm Linear SVM
+
+  Linear SVM classifier
+ */

 /**
- * @addtogroup groupSVM
+ * @addtogroup linearsvm
 * @{
 */

@ -77,5 +86,5 @@ void arm_svm_linear_init_f32(arm_svm_linear_instance_f32 *S,


 /**
- * @} end of groupSVM group
+ * @} end of linearsvm group
 */
--- a/Source/SVMFunctions/arm_svm_linear_predict_f32.c
+++ b/Source/SVMFunctions/arm_svm_linear_predict_f32.c
@ -30,7 +30,7 @@


 /**
- * @addtogroup groupSVM
+ * @addtogroup linearsvm
 * @{
 */

@ -449,5 +449,5 @@ void arm_svm_linear_predict_f32(
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
- * @} end of groupSVM group
+ * @} end of linearsvm group
 */
--- a/Source/SVMFunctions/arm_svm_polynomial_init_f32.c
+++ b/Source/SVMFunctions/arm_svm_polynomial_init_f32.c
@ -28,12 +28,20 @@
 #include <limits.h>
 #include <math.h>

+/**
+  @ingroup groupSVM
+ */

 /**
- * @addtogroup groupSVM
- * @{
+  @defgroup polysvm Polynomial SVM
+
+  Polynomial SVM classifier
 */

+/**
+ * @addtogroup polysvm
+ * @{
+ */


 /**
@ -83,5 +91,5 @@ void arm_svm_polynomial_init_f32(arm_svm_polynomial_instance_f32 *S,


 /**
- * @} end of groupSVM group
+ * @} end of polysvm group
 */
--- a/Source/SVMFunctions/arm_svm_polynomial_predict_f32.c
+++ b/Source/SVMFunctions/arm_svm_polynomial_predict_f32.c
@ -33,7 +33,7 @@
 #endif

 /**
- * @addtogroup groupSVM
+ * @addtogroup polysvm
 * @{
 */

@ -484,5 +484,5 @@ void arm_svm_polynomial_predict_f32(


 /**
- * @} end of groupSVM group
+ * @} end of polysvm group
 */
--- a/Source/SVMFunctions/arm_svm_rbf_init_f32.c
+++ b/Source/SVMFunctions/arm_svm_rbf_init_f32.c
@ -28,9 +28,19 @@
 #include <limits.h>
 #include <math.h>

+/**
+  @ingroup groupSVM
+ */
+
+/**
+  @defgroup rbfsvm RBF SVM
+
+  RBF SVM classifier
+ */
+

 /**
- * @addtogroup groupSVM
+ * @addtogroup rbfsvm
 * @{
 */

@ -75,5 +85,5 @@ void arm_svm_rbf_init_f32(arm_svm_rbf_instance_f32 *S,


 /**
- * @} end of groupSVM group
+ * @} end of rbfsvm group
 */
--- a/Source/SVMFunctions/arm_svm_rbf_predict_f32.c
+++ b/Source/SVMFunctions/arm_svm_rbf_predict_f32.c
@ -30,7 +30,7 @@


 /**
- * @addtogroup groupSVM
+ * @addtogroup rbfsvm
 * @{
 */

@ -517,5 +517,5 @@ void arm_svm_rbf_predict_f32(
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
- * @} end of groupSVM group
+ * @} end of rbfsvm group
 */
--- a/Source/SVMFunctions/arm_svm_sigmoid_init_f32.c
+++ b/Source/SVMFunctions/arm_svm_sigmoid_init_f32.c
@ -28,9 +28,18 @@
 #include <limits.h>
 #include <math.h>

+/**
+  @ingroup groupSVM
+ */
+
+/**
+  @defgroup sigmoidsvm Sigmoid SVM
+
+  Sigmoid SVM classifier
+ */

 /**
- * @addtogroup groupSVM
+ * @addtogroup sigmoidsvm
 * @{
 */

@ -77,5 +86,5 @@ void arm_svm_sigmoid_init_f32(arm_svm_sigmoid_instance_f32 *S,


 /**
- * @} end of groupSVM group
+ * @} end of sigmoidsvm group
 */
--- a/Source/SVMFunctions/arm_svm_sigmoid_predict_f32.c
+++ b/Source/SVMFunctions/arm_svm_sigmoid_predict_f32.c
@ -29,7 +29,7 @@
 #include <math.h>

 /**
- * @addtogroup groupSVM
+ * @addtogroup sigmoidsvm
 * @{
 */

@ -481,5 +481,5 @@ void arm_svm_sigmoid_predict_f32(
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
- * @} end of groupSVM group
+ * @} end of sigmoidsvm group
 */
--- a/Source/StatisticsFunctions/CMakeLists.txt
+++ b/Source/StatisticsFunctions/CMakeLists.txt
@ -59,4 +59,9 @@ target_sources(CMSISDSPStatistics PRIVATE arm_power_f16.c)
 target_sources(CMSISDSPStatistics PRIVATE arm_rms_f16.c)
 target_sources(CMSISDSPStatistics PRIVATE arm_std_f16.c)
 target_sources(CMSISDSPStatistics PRIVATE arm_var_f16.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f16.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f16.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_dot_prod_f16.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_f16.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f16.c)
 endif()
--- a/Source/StatisticsFunctions/StatisticsFunctionsF16.c
+++ b/Source/StatisticsFunctions/StatisticsFunctionsF16.c
@ -33,3 +33,8 @@
 #include "arm_rms_f16.c"
 #include "arm_std_f16.c"
 #include "arm_var_f16.c"
+#include "arm_entropy_f16.c"
+#include "arm_kullback_leibler_f16.c"
+#include "arm_logsumexp_dot_prod_f16.c"
+#include "arm_logsumexp_f16.c"
+#include "arm_max_no_idx_f16.c"
--- a/Source/StatisticsFunctions/arm_entropy_f16.c
+++ b/Source/StatisticsFunctions/arm_entropy_f16.c
@ -0,0 +1,138 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_logsumexp_f16.c
+ * Description:  LogSumExp
+ *
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @defgroup Entropy Entropy
+
+  Computes the entropy of a distribution
+
+ */
+
+/**
+ * @addtogroup Entropy
+ * @{
+ */
+
+
+/**
+ * @brief Entropy
+ *
+ * @param[in]  pSrcA        Array of input values.
+ * @param[in]  blockSize    Number of samples in the input array.
+ * @return     Entropy      -Sum(p ln p)
+ *
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math_f16.h"
+
+float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize)
+{
+    uint32_t        blkCnt;
+    float16_t       accum=0.0f,p;
+
+
+    blkCnt = blockSize;
+
+    f16x8_t         vSum = vdupq_n_f16(0.0f);
+    /* Compute 4 outputs at a time */
+    blkCnt = blockSize >> 3U;
+
+    while (blkCnt > 0U)
+    {
+        f16x8_t         vecIn = vld1q(pSrcA);
+
+        vSum = vaddq_f16(vSum, vmulq(vecIn, vlogq_f16(vecIn)));
+
+        /*
+         * Decrement the blockSize loop counter
+         * Advance vector source and destination pointers
+         */
+        pSrcA += 8;
+        blkCnt --;
+    }
+
+    accum = vecAddAcrossF16Mve(vSum);
+
+    /* Tail */
+    blkCnt = blockSize & 0x7;
+    while(blkCnt > 0)
+    {
+       p = *pSrcA++;
+       accum += p * logf(p);
+       
+       blkCnt--;
+    
+    }
+
+    return (-accum);
+}
+
+#else
+
+float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize)
+{
+    const float16_t *pIn;
+    uint32_t blkCnt;
+    float16_t accum, p;
+ 
+    pIn = pSrcA;
+    blkCnt = blockSize;
+
+    accum = 0.0f;
+
+    while(blkCnt > 0)
+    {
+       p = *pIn++;
+       accum += p * logf(p);
+       
+       blkCnt--;
+    
+    }
+
+    return(-accum);
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+ * @} end of Entropy group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Source/StatisticsFunctions/arm_entropy_f32.c
+++ b/Source/StatisticsFunctions/arm_entropy_f32.c
@ -30,7 +30,7 @@


 /**
- * @addtogroup groupStats
+ * @addtogroup Entropy
 * @{
 */

@ -168,5 +168,5 @@ float32_t arm_entropy_f32(const float32_t * pSrcA,uint32_t blockSize)
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
- * @} end of groupStats group
+ * @} end of Entropy group
 */
--- a/Source/StatisticsFunctions/arm_entropy_f64.c
+++ b/Source/StatisticsFunctions/arm_entropy_f64.c
@ -29,7 +29,7 @@
 #include <math.h>

 /**
- * @addtogroup groupStats
+ * @addtogroup Entropy
 * @{
 */

@ -67,5 +67,5 @@ float64_t arm_entropy_f64(const float64_t * pSrcA, uint32_t blockSize)
 }

 /**
- * @} end of groupStats group
+ * @} end of Entropy group
 */
--- a/Source/StatisticsFunctions/arm_kullback_leibler_f16.c
+++ b/Source/StatisticsFunctions/arm_kullback_leibler_f16.c
@ -0,0 +1,150 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_logsumexp_f16.c
+ * Description:  LogSumExp
+ *
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @defgroup Kullback-Leibler Kullback-Leibler divergence
+
+  Computes the Kullback-Leibler divergence between two distributions
+
+ */
+
+
+/**
+ * @addtogroup Kullback-Leibler
+ * @{
+ */
+
+
+/**
+ * @brief Kullback-Leibler
+ *
+ * Distribution A may contain 0 with Neon version.
+ * Result will be right but some exception flags will be set.
+ *
+ * Distribution B must not contain 0 probability.
+ *
+ * @param[in]  *pSrcA         points to an array of input values for probaility distribution A.
+ * @param[in]  *pSrcB         points to an array of input values for probaility distribution B.
+ * @param[in]  blockSize      number of samples in the input array.
+ * @return Kullback-Leibler divergence D(A || B)
+ *
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math_f16.h"
+
+float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSrcB,uint32_t blockSize)
+{
+    uint32_t blkCnt;
+    float16_t accum, pA,pB;
+ 
+    
+    blkCnt = blockSize;
+
+    accum = 0.0f;
+
+    f16x8_t         vSum = vdupq_n_f16(0.0f);
+    blkCnt = blockSize >> 3;
+    while(blkCnt > 0)
+    {
+        f16x8_t         vecA = vld1q(pSrcA);
+        f16x8_t         vecB = vld1q(pSrcB);
+        f16x8_t         vRatio;
+
+        vRatio = vdiv_f16(vecB, vecA);
+        vSum = vaddq_f16(vSum, vmulq(vecA, vlogq_f16(vRatio)));
+
+        /*
+         * Decrement the blockSize loop counter
+         * Advance vector source and destination pointers
+         */
+        pSrcA += 8;
+        pSrcB += 8;
+        blkCnt --;
+    }
+
+    accum = vecAddAcrossF16Mve(vSum);
+
+    blkCnt = blockSize & 7;
+    while(blkCnt > 0)
+    {
+       pA = *pSrcA++;
+       pB = *pSrcB++;
+       accum += pA * logf(pB / pA);
+       
+       blkCnt--;
+    
+    }
+
+    return(-accum);
+}
+
+#else
+float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSrcB,uint32_t blockSize)
+{
+    const float16_t *pInA, *pInB;
+    uint32_t blkCnt;
+    float16_t accum, pA,pB;
+ 
+    pInA = pSrcA;
+    pInB = pSrcB;
+    blkCnt = blockSize;
+
+    accum = 0.0f;
+
+    while(blkCnt > 0)
+    {
+       pA = *pInA++;
+       pB = *pInB++;
+       accum += pA * logf(pB / pA);
+       
+       blkCnt--;
+    
+    }
+
+    return(-accum);
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+ * @} end of Kullback-Leibler group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Source/StatisticsFunctions/arm_kullback_leibler_f32.c
+++ b/Source/StatisticsFunctions/arm_kullback_leibler_f32.c
@ -30,7 +30,7 @@


 /**
- * @addtogroup groupStats
+ * @addtogroup Kullback-Leibler
 * @{
 */

@ -187,5 +187,5 @@ float32_t arm_kullback_leibler_f32(const float32_t * pSrcA,const float32_t * pSr
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
- * @} end of groupStats group
+ * @} end of Kullback-Leibler group
 */
--- a/Source/StatisticsFunctions/arm_kullback_leibler_f64.c
+++ b/Source/StatisticsFunctions/arm_kullback_leibler_f64.c
@ -29,7 +29,7 @@
 #include <math.h>

 /**
- * @addtogroup groupStats
+ * @addtogroup Kullback-Leibler
 * @{
 */

@ -69,5 +69,5 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA, const float64_t * pS
 }

 /**
- * @} end of groupStats group
+ * @} end of Kullback-Leibler group
 */
--- a/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f16.c
+++ b/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f16.c
@ -0,0 +1,82 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_logsumexp_f16.c
+ * Description:  LogSumExp
+ *
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @defgroup LogSumExp LogSumExp
+
+  LogSumExp optimizations to compute sum of probabilities with Gaussian distributions
+
+ */
+
+/**
+ * @addtogroup LogSumExp
+ * @{
+ */
+
+
+/**
+ * @brief Dot product with log arithmetic
+ *
+ * Vectors are containing the log of the samples
+ *
+ * @param[in]       *pSrcA points to the first input vector
+ * @param[in]       *pSrcB points to the second input vector
+ * @param[in]       blockSize number of samples in each vector
+ * @param[in]       *pTmpBuffer temporary buffer of length blockSize
+ * @return The log of the dot product.
+ *
+ */
+
+
+float16_t arm_logsumexp_dot_prod_f16(const float16_t * pSrcA,
+  const float16_t * pSrcB,
+  uint32_t blockSize,
+  float16_t *pTmpBuffer)
+{
+    float16_t result;
+    arm_add_f16((float16_t*)pSrcA, (float16_t*)pSrcB, pTmpBuffer, blockSize);
+
+    result = arm_logsumexp_f16(pTmpBuffer, blockSize);
+    return(result);
+}
+
+/**
+ * @} end of LogSumExp group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f32.c
+++ b/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f32.c
@ -30,7 +30,7 @@


 /**
- * @addtogroup groupStats
+ * @addtogroup LogSumExp
 * @{
 */

@ -62,5 +62,5 @@ float32_t arm_logsumexp_dot_prod_f32(const float32_t * pSrcA,
 }

 /**
- * @} end of groupStats group
+ * @} end of LogSumExp group
 */
--- a/Source/StatisticsFunctions/arm_logsumexp_f16.c
+++ b/Source/StatisticsFunctions/arm_logsumexp_f16.c
@ -0,0 +1,170 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_logsumexp_f16.c
+ * Description:  LogSumExp
+ *
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+/**
+ * @addtogroup LogSumExp
+ * @{
+ */
+
+
+/**
+ * @brief Computation of the LogSumExp
+ *
+ * In probabilistic computations, the dynamic of the probability values can be very
+ * wide because they come from gaussian functions.
+ * To avoid underflow and overflow issues, the values are represented by their log.
+ * In this representation, multiplying the original exp values is easy : their logs are added.
+ * But adding the original exp values is requiring some special handling and it is the
+ * goal of the LogSumExp function.
+ *
+ * If the values are x1...xn, the function is computing:
+ *
+ * ln(exp(x1) + ... + exp(xn)) and the computation is done in such a way that
+ * rounding issues are minimised.
+ *
+ * The max xm of the values is extracted and the function is computing:
+ * xm + ln(exp(x1 - xm) + ... + exp(xn - xm))
+ *
+ * @param[in]  *in         Pointer to an array of input values.
+ * @param[in]  blockSize   Number of samples in the input array.
+ * @return LogSumExp
+ *
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math_f16.h"
+
+float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize)
+{
+    float16_t       maxVal;
+    const float16_t *pIn;
+    int32_t         blkCnt;
+    float16_t       accum=0.0f16;
+    float16_t       tmp;
+
+
+    arm_max_no_idx_f16((float16_t *) in, blockSize, &maxVal);
+
+
+    blkCnt = blockSize;
+    pIn = in;
+
+
+    f16x8_t         vSum = vdupq_n_f16(0.0f16);
+    blkCnt = blockSize >> 3;
+    while(blkCnt > 0)
+    {
+        f16x8_t         vecIn = vld1q(pIn);
+        f16x8_t         vecExp;
+
+        vecExp = vexpq_f16(vsubq_n_f16(vecIn, maxVal));
+
+        vSum = vaddq_f16(vSum, vecExp);
+
+        /*
+         * Decrement the blockSize loop counter
+         * Advance vector source and destination pointers
+         */
+        pIn += 8;
+        blkCnt --;
+    }
+
+    /* sum + log */
+    accum = vecAddAcrossF16Mve(vSum);
+
+    blkCnt = blockSize & 0x7;
+    while(blkCnt > 0)
+    {
+       tmp = *pIn++;
+       accum += expf(tmp - maxVal);
+       blkCnt--;
+    
+    }
+
+    accum = maxVal + logf(accum);
+
+    return (accum);
+}
+
+#else
+float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize)
+{
+    float16_t maxVal;
+    float16_t tmp;
+    const float16_t *pIn;
+    uint32_t blkCnt;
+    float16_t accum;
+ 
+    pIn = in;
+    blkCnt = blockSize;
+
+    maxVal = *pIn++;
+    blkCnt--;
+
+    while(blkCnt > 0)
+    {
+       tmp = *pIn++;
+
+       if (tmp > maxVal)
+       {
+          maxVal = tmp;
+       }
+       blkCnt--;
+    
+    }
+
+    blkCnt = blockSize;
+    pIn = in;
+    accum = 0;
+    while(blkCnt > 0)
+    {
+       tmp = *pIn++;
+       accum += expf(tmp - maxVal);
+       blkCnt--;
+    
+    }
+    accum = maxVal + logf(accum);
+
+    return(accum);
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+ * @} end of LogSumExp group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Source/StatisticsFunctions/arm_logsumexp_f32.c
+++ b/Source/StatisticsFunctions/arm_logsumexp_f32.c
@ -30,7 +30,7 @@


 /**
- * @addtogroup groupStats
+ * @addtogroup LogSumExp
 * @{
 */

@ -271,5 +271,5 @@ float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize)
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
- * @} end of groupStats group
+ * @} end of LogSumExp group
 */
--- a/Source/StatisticsFunctions/arm_max_no_idx_f16.c
+++ b/Source/StatisticsFunctions/arm_max_no_idx_f16.c
@ -0,0 +1,144 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_max_no_idx_f16.c
+ * Description:  Maximum value of a floating-point vector without returning the index
+ *
+ * $Date:        16. October 2020
+ * $Revision:    V1.6.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#include <limits.h>
+#endif
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup Max
+  @{
+ */
+
+/**
+  @brief         Maximum value of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+void arm_max_no_idx_f16(
+    const float16_t *pSrc,
+    uint32_t   blockSize,
+    float16_t *pResult)
+{
+   f16x8_t     vecSrc;
+   f16x8_t     curExtremValVec = vdupq_n_f16(F16_MIN);
+   float16_t   maxValue = F16_MIN;
+   float16_t   newVal;
+   uint32_t    blkCnt;
+
+   /* Loop unrolling: Compute 4 outputs at a time */
+   blkCnt = blockSize >> 3U;
+
+   while (blkCnt > 0U)
+   {
+
+        vecSrc = vldrhq_f16(pSrc);
+        /*
+         * update per-lane max.
+         */
+        curExtremValVec = vmaxnmq(vecSrc, curExtremValVec);
+        /*
+         * Decrement the blockSize loop counter
+         * Advance vector source and destination pointers
+         */
+        pSrc += 8;
+        blkCnt --;
+    }
+    /*
+     * Get max value across the vector
+     */
+    maxValue = vmaxnmvq(maxValue, curExtremValVec);
+
+    blkCnt = blockSize & 7;
+
+    while (blkCnt > 0U)
+    {
+        newVal = *pSrc++;
+
+        /* compare for the maximum value */
+        if (maxValue < newVal)
+        {
+            /* Update the maximum value and it's index */
+            maxValue = newVal;
+        }
+
+        blkCnt --;
+    }
+
+    *pResult = maxValue;
+}
+
+#else
+
+void arm_max_no_idx_f16(
+    const float16_t *pSrc,
+    uint32_t   blockSize,
+    float16_t *pResult)
+{
+   float16_t   maxValue = F16_MIN;
+   float16_t   newVal;
+
+   while (blockSize > 0U)
+   {
+       newVal = *pSrc++;
+   
+       /* compare for the maximum value */
+       if (maxValue < newVal)
+       {
+           /* Update the maximum value and it's index */
+           maxValue = newVal;
+       }
+   
+       blockSize --;
+   }
+    
+   *pResult = maxValue;
+}
+
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @} end of Max group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Source/StatisticsFunctions/arm_var_f16.c
+++ b/Source/StatisticsFunctions/arm_var_f16.c
@ -74,14 +74,12 @@ void arm_var_f16(
    arm_mean_f16(pSrc, blockSize, &fMean);

 /* 6.14 bug */
-#if defined(SDCOMP_xxx)
-#if defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100)
+#if defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100) && (__ARMCC_VERSION < 6150001)
    __asm volatile(
        "   vmov.i32                     %[acc], #0 \n"
        : [acc] "+t"(sumVec)
        : 
        : );
-#endif
 #endif

    blkCnt = blockSize;
--- a/Source/SupportFunctions/arm_barycenter_f16.c
+++ b/Source/SupportFunctions/arm_barycenter_f16.c
@ -31,11 +31,21 @@
 #include <limits.h>
 #include <math.h>

-
 /**
  @ingroup groupSupport
 */

+/**
+  @defgroup barycenter Barycenter
+
+  Barycenter of weighted vectors
+ */
+
+/**
+  @addtogroup barycenter
+  @{
+ */
+

 /**
 * @brief Barycenter
@ -255,7 +265,7 @@ void arm_barycenter_f16(const float16_t *in, const float16_t *weights, float16_t
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
- * @} end of groupSupport group
+ * @} end of barycenter group
 */

 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
--- a/Source/SupportFunctions/arm_barycenter_f32.c
+++ b/Source/SupportFunctions/arm_barycenter_f32.c
@ -30,7 +30,7 @@


 /**
-  @ingroup groupSupport
+  @ingroup barycenter
 */


@ -408,5 +408,5 @@ void arm_barycenter_f32(const float32_t *in, const float32_t *weights, float32_t
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
- * @} end of groupSupport group
+ * @} end of barycenter group
 */
--- a/Source/SupportFunctions/arm_weighted_sum_f16.c
+++ b/Source/SupportFunctions/arm_weighted_sum_f16.c
@ -32,9 +32,19 @@

 #if defined(ARM_FLOAT16_SUPPORTED)

+/**
+  @ingroup groupSupport
+ */
+
+/**
+  @defgroup weightedsum Weighted Sum
+
+  Weighted sum of values
+ */
+

 /**
- * @addtogroup groupSupport
+ * @addtogroup weightedsum
 * @{
 */

@ -128,7 +138,7 @@ float16_t arm_weighted_sum_f16(const float16_t *in, const float16_t *weigths, ui
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
- * @} end of groupSupport group
+ * @} end of weightedsum group
 */

 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
--- a/Source/SupportFunctions/arm_weighted_sum_f32.c
+++ b/Source/SupportFunctions/arm_weighted_sum_f32.c
@ -31,7 +31,7 @@
 #include "dsp/support_functions.h"

 /**
- * @addtogroup groupSupport
+ * @addtogroup weightedsum
 * @{
 */

@ -182,5 +182,5 @@ float32_t arm_weighted_sum_f32(const float32_t *in, const float32_t *weigths, ui
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
- * @} end of groupSupport group
+ * @} end of weightedsum group
 */
--- a/Testing/CMakeLists.txt
+++ b/Testing/CMakeLists.txt
@ -339,6 +339,7 @@ set(TESTSRC16
  Source/Tests/TransformRF16.cpp
  Source/Tests/SupportTestsF16.cpp
  Source/Tests/SupportBarTestsF16.cpp
+  Source/Tests/FastMathF16.cpp
  )
 endif()
 endif() 
--- a/Testing/Include/Tests/FastMathF16.h
+++ b/Testing/Include/Tests/FastMathF16.h
@ -0,0 +1,23 @@
+#include "Test.h"
+#include "Pattern.h"
+
+#include "dsp/fast_math_functions_f16.h"
+
+class FastMathF16:public Client::Suite
+    {
+        public:
+            FastMathF16(Testing::testID_t id);
+            virtual void setUp(Testing::testID_t,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr);
+            virtual void tearDown(Testing::testID_t,Client::PatternMgr *mgr);
+        private:
+            #include "FastMathF16_decl.h"
+            
+            Client::Pattern<float16_t> input;
+
+            Client::LocalPattern<float16_t> output;
+
+            // Reference patterns are not loaded when we are in dump mode
+            Client::RefPattern<float16_t> ref;
+
+           
+    };
--- a/Testing/PatternGeneration/FastMath.py
+++ b/Testing/PatternGeneration/FastMath.py
@ -28,7 +28,7 @@ def writeTests(config,format):
    vals[0] = -0.4
    sqrtvals[0] = 0.0
    
-    if format != 0:
+    if format != 0 and format != 16:
        angles=np.concatenate((a1,a2,a1))
        angles = angles / (2*math.pi)
    config.writeInput(1, angles,"Angles")
@ -43,7 +43,7 @@ def writeTests(config,format):
    config.writeInput(1, samples,"Samples")


-def writeTestsF32(config,format):
+def writeTestsFloat(config,format):
    writeTests(config,format)

    data1 = np.random.randn(20)
@ -61,22 +61,30 @@ def writeTestsF32(config,format):
    v = np.exp(samples)
    config.writeReference(1, v,"Exp")

-    # For benchmarks
+    # For benchmarks and other tests
    samples=np.random.randn(NBSAMPLES)
    samples = np.abs(Tools.normalize(samples))
    config.writeInput(1, samples,"Samples")

+    v = 1.0 / samples
+    config.writeReference(1, v,"Inverse")
+
+
+
+
    
 def generatePatterns():
    PATTERNDIR = os.path.join("Patterns","DSP","FastMath","FastMath")
    PARAMDIR = os.path.join("Parameters","DSP","FastMath","FastMath")
    
    configf32=Tools.Config(PATTERNDIR,PARAMDIR,"f32")
+    configf16=Tools.Config(PATTERNDIR,PARAMDIR,"f16")
    configq31=Tools.Config(PATTERNDIR,PARAMDIR,"q31")
    configq15=Tools.Config(PATTERNDIR,PARAMDIR,"q15")
    
    
-    writeTestsF32(configf32,0)
+    writeTestsFloat(configf32,0)
+    writeTestsFloat(configf16,16)
    writeTests(configq31,31)
    writeTests(configq15,15)

--- a/Testing/PatternGeneration/Stats.py
+++ b/Testing/PatternGeneration/Stats.py
@ -116,6 +116,13 @@ def logSumExpDotTest(config,nb):
    config.writeInputS16(nb, dims,"Dims")
    config.writeReference(nb, outputs,"RefLogSumExpDot")

+def writeF16OnlyTests(config,nb):
+    entropyTest(config,nb)
+    logsumexpTest(config,nb+1)
+    klTest(config,nb+2)
+    logSumExpDotTest(config,nb+3)
+    return(nb+4)
+
 def writeF32OnlyTests(config,nb):
    entropyTest(config,nb)
    logsumexpTest(config,nb+1)
@ -357,6 +364,7 @@ def generatePatterns():
    writeTests(configq7,1,7)

    nb=writeTests(configf16,1,16)
+    nb=writeF16OnlyTests(configf16,22)

 if __name__ == '__main__':
  generatePatterns()
--- a/Testing/Patterns/DSP/FastMath/FastMathF16/Angles1_f16.txt
+++ b/Testing/Patterns/DSP/FastMath/FastMathF16/Angles1_f16.txt
@ -0,0 +1,48 @@
+H
+23
+// 0.000000
+0x0
+// 0.785398
+0x3a48
+// 1.570796
+0x3e48
+// 2.356194
+0x40b6
+// 3.141593
+0x4248
+// 3.926991
+0x43db
+// 4.712389
+0x44b6
+// 6.283184
+0x4648
+// -0.785398
+0xba48
+// -1.570796
+0xbe48
+// -2.356194
+0xc0b6
+// -3.141593
+0xc248
+// -3.926991
+0xc3db
+// -4.712389
+0xc4b6
+// -6.283186
+0xc648
+// 6.283185
+0x4648
+// 7.068583
+0x4712
+// 7.853982
+0x47db
+// 8.639380
+0x4852
+// 9.424778
+0x48b6
+// 10.210176
+0x491b
+// 10.995574
+0x497f
+// 12.566370
+0x4a48
--- a/Testing/Patterns/DSP/FastMath/FastMathF16/Cos1_f16.txt
+++ b/Testing/Patterns/DSP/FastMath/FastMathF16/Cos1_f16.txt
@ -0,0 +1,48 @@
+H
+23
+// 1.000000
+0x3c00
+// 0.707107
+0x39a8
+// 0.000000
+0x0
+// -0.707107
+0xb9a8
+// -1.000000
+0xbc00
+// -0.707107
+0xb9a8
+// -0.000000
+0x8000
+// 1.000000
+0x3c00
+// 0.707107
+0x39a8
+// 0.000000
+0x0
+// -0.707107
+0xb9a8
+// -1.000000
+0xbc00
+// -0.707107
+0xb9a8
+// -0.000000
+0x8000
+// 1.000000
+0x3c00
+// 1.000000
+0x3c00
+// 0.707107
+0x39a8
+// 0.000000
+0x0
+// -0.707107
+0xb9a8
+// -1.000000
+0xbc00
+// -0.707107
+0xb9a8
+// -0.000000
+0x8000
+// 1.000000
+0x3c00
--- a/Testing/Patterns/DSP/FastMath/FastMathF16/Exp1_f16.txt
+++ b/Testing/Patterns/DSP/FastMath/FastMathF16/Exp1_f16.txt
@ -0,0 +1,106 @@
+H
+52
+// 1.000000
+0x3c00
+// 2.718282
+0x4170
+// 0.670320
+0x395d
+// 0.681354
+0x3973
+// 0.692569
+0x398a
+// 0.703969
+0x39a2
+// 0.715557
+0x39b9
+// 0.727336
+0x39d2
+// 0.739308
+0x39ea
+// 0.751477
+0x3a03
+// 0.763847
+0x3a1c
+// 0.776420
+0x3a36
+// 0.789201
+0x3a50
+// 0.802191
+0x3a6b
+// 0.815396
+0x3a86
+// 0.828818
+0x3aa1
+// 0.842460
+0x3abd
+// 0.856328
+0x3ada
+// 0.870423
+0x3af7
+// 0.884751
+0x3b14
+// 0.899315
+0x3b32
+// 0.914118
+0x3b50
+// 0.929165
+0x3b6f
+// 0.944459
+0x3b8e
+// 0.960005
+0x3bae
+// 0.975808
+0x3bce
+// 0.991870
+0x3bef
+// 1.008197
+0x3c08
+// 1.024792
+0x3c19
+// 1.041661
+0x3c2b
+// 1.058807
+0x3c3c
+// 1.076236
+0x3c4e
+// 1.093951
+0x3c60
+// 1.111958
+0x3c73
+// 1.130261
+0x3c85
+// 1.148866
+0x3c98
+// 1.167777
+0x3cac
+// 1.186999
+0x3cbf
+// 1.206538
+0x3cd3
+// 1.226398
+0x3ce8
+// 1.246585
+0x3cfd
+// 1.267105
+0x3d12
+// 1.287962
+0x3d27
+// 1.309163
+0x3d3d
+// 1.330712
+0x3d53
+// 1.352616
+0x3d69
+// 1.374881
+0x3d80
+// 1.397513
+0x3d97
+// 1.420516
+0x3daf
+// 1.443899
+0x3dc7
+// 1.467666
+0x3ddf
+// 1.491825
+0x3df8
--- a/Testing/Patterns/DSP/FastMath/FastMathF16/ExpInput1_f16.txt
+++ b/Testing/Patterns/DSP/FastMath/FastMathF16/ExpInput1_f16.txt
@ -0,0 +1,106 @@
+H
+52
+// 0.000000
+0x0
+// 1.000000
+0x3c00
+// -0.400000
+0xb666
+// -0.383673
+0xb624
+// -0.367347
+0xb5e1
+// -0.351020
+0xb59e
+// -0.334694
+0xb55b
+// -0.318367
+0xb518
+// -0.302041
+0xb4d5
+// -0.285714
+0xb492
+// -0.269388
+0xb44f
+// -0.253061
+0xb40d
+// -0.236735
+0xb393
+// -0.220408
+0xb30e
+// -0.204082
+0xb288
+// -0.187755
+0xb202
+// -0.171429
+0xb17c
+// -0.155102
+0xb0f7
+// -0.138776
+0xb071
+// -0.122449
+0xafd6
+// -0.106122
+0xaecb
+// -0.089796
+0xadbf
+// -0.073469
+0xacb4
+// -0.057143
+0xab50
+// -0.040816
+0xa939
+// -0.024490
+0xa645
+// -0.008163
+0xa02e
+// 0.008163
+0x202e
+// 0.024490
+0x2645
+// 0.040816
+0x2939
+// 0.057143
+0x2b50
+// 0.073469
+0x2cb4
+// 0.089796
+0x2dbf
+// 0.106122
+0x2ecb
+// 0.122449
+0x2fd6
+// 0.138776
+0x3071
+// 0.155102
+0x30f7
+// 0.171429
+0x317c
+// 0.187755
+0x3202
+// 0.204082
+0x3288
+// 0.220408
+0x330e
+// 0.236735
+0x3393
+// 0.253061
+0x340d
+// 0.269388
+0x344f
+// 0.285714
+0x3492
+// 0.302041
+0x34d5
+// 0.318367
+0x3518
+// 0.334694
+0x355b
+// 0.351020
+0x359e
+// 0.367347
+0x35e1
+// 0.383673
+0x3624
+// 0.400000
+0x3666
--- a/Testing/Patterns/DSP/FastMath/FastMathF16/Inverse1_f16.txt
+++ b/Testing/Patterns/DSP/FastMath/FastMathF16/Inverse1_f16.txt
@ -0,0 +1,514 @@
+H
+256
+// 13.282788
+0x4aa4
+// 74.795943
+0x54ad
+// 4.534229
+0x4489
+// 3.745299
+0x437e
+// 2.439216
+0x40e1
+// 6.181096
+0x462e
+// 4.004707
+0x4401
+// 11.283654
+0x49a4
+// 1.847747
+0x3f64
+// 5.549382
+0x458d
+// 1.536148
+0x3e25
+// 30.005060
+0x4f80
+// 31.475099
+0x4fde
+// 17.543195
+0x4c63
+// 2.419261
+0x40d7
+// 4.349753
+0x445a
+// 4.841152
+0x44d7
+// 8.101053
+0x480d
+// 7.775466
+0x47c7
+// 10.960573
+0x497b
+// 1.935619
+0x3fbe
+// 14.846755
+0x4b6c
+// 2.538388
+0x4114
+// 2.328174
+0x40a8
+// 1.187660
+0x3cc0
+// 9.459035
+0x48bb
+// 6.531679
+0x4688
+// 28.111782
+0x4f07
+// 2.775787
+0x418d
+// 1.610728
+0x3e71
+// 4.295367
+0x444c
+// 8.503271
+0x4840
+// 6.431921
+0x466f
+// 1.644415
+0x3e94
+// 5.022253
+0x4506
+// 46.675156
+0x51d6
+// 7.923678
+0x47ec
+// 1.986785
+0x3ff2
+// 12.264212
+0x4a22
+// 5.127496
+0x4521
+// 3.526537
+0x430e
+// 3.099069
+0x4233
+// 11.281743
+0x49a4
+// 5.690813
+0x45b1
+// 25.784472
+0x4e72
+// 31.172867
+0x4fcb
+// 5.406237
+0x4568
+// 1.003867
+0x3c04
+// 2.474874
+0x40f3
+// 5.259653
+0x4542
+// 6.833799
+0x46d5
+// 2.146320
+0x404b
+// 4.075154
+0x4413
+// 6.755841
+0x46c1
+// 3.298992
+0x4299
+// 3.211582
+0x426c
+// 154.452815
+0x58d4
+// 2.467569
+0x40ef
+// 13.353500
+0x4aad
+// 2.897118
+0x41cb
+// 11.198146
+0x4999
+// 2.095061
+0x4031
+// 5.949994
+0x45f3
+// 502.264324
+0x5fd9
+// 9.785310
+0x48e5
+// 24.679848
+0x4e2c
+// 9.141598
+0x4892
+// 4.842086
+0x44d8
+// 2.305929
+0x409d
+// 2.553810
+0x411c
+// 6.529844
+0x4688
+// 12.616308
+0x4a4f
+// 10.160835
+0x4915
+// 4.741947
+0x44be
+// 3.062033
+0x4220
+// 15.072163
+0x4b89
+// 19.437242
+0x4cdc
+// 304.465872
+0x5cc2
+// 3.697883
+0x4365
+// 1.200278
+0x3ccd
+// 12.039526
+0x4a05
+// 6.285477
+0x4649
+// 41.940922
+0x513e
+// 14.367260
+0x4b2f
+// 2.844286
+0x41b0
+// 2.434156
+0x40de
+// 18.803330
+0x4cb3
+// 3.590405
+0x432e
+// 45.916673
+0x51bd
+// 16.744938
+0x4c30
+// 9.513594
+0x48c2
+// 266.925697
+0x5c2c
+// 141.996772
+0x5870
+// 1.273840
+0x3d18
+// 4.064921
+0x4411
+// 3.059166
+0x421e
+// 3.063974
+0x4221
+// 12.270595
+0x4a23
+// 17.040917
+0x4c43
+// 1.259633
+0x3d0a
+// 2.821234
+0x41a4
+// 6.853772
+0x46db
+// 7.454620
+0x4774
+// 1.609663
+0x3e70
+// 11.592823
+0x49cc
+// 8.194720
+0x4819
+// 2.951149
+0x41e7
+// 2.312031
+0x40a0
+// 9.662832
+0x48d5
+// 1.513853
+0x3e0e
+// 93.930231
+0x55df
+// 91.754898
+0x55bc
+// 2.936342
+0x41df
+// 36.983413
+0x509f
+// 1.683027
+0x3ebb
+// 3.835847
+0x43ac
+// 4.018890
+0x4405
+// 8.686859
+0x4858
+// 3.832640
+0x43aa
+// 5.651256
+0x45a7
+// 167.057056
+0x5938
+// 10.621388
+0x4950
+// 2.039201
+0x4014
+// 3.169404
+0x4257
+// 121.699150
+0x579b
+// 1.962375
+0x3fd9
+// 3.588653
+0x432d
+// 6.551802
+0x468d
+// 2.273146
+0x408c
+// 6.206085
+0x4635
+// 1.260120
+0x3d0a
+// 1.328843
+0x3d51
+// 7.193014
+0x4731
+// 5.073456
+0x4513
+// 4.099381
+0x4419
+// 14.640782
+0x4b52
+// 8.470057
+0x483c
+// 35.841993
+0x507b
+// 25.902541
+0x4e7a
+// 8.278306
+0x4824
+// 6.579874
+0x4694
+// 3.183432
+0x425e
+// 3.157985
+0x4251
+// 30.987647
+0x4fbf
+// 4.960553
+0x44f6
+// 28.319462
+0x4f14
+// 3.360753
+0x42b9
+// 13.357196
+0x4aae
+// 7.617099
+0x479e
+// 12.543129
+0x4a46
+// 3.206430
+0x426a
+// 24.728589
+0x4e2f
+// 2.883364
+0x41c4
+// 4.739018
+0x44bd
+// 11.599710
+0x49cd
+// 9.538527
+0x48c5
+// 16.661500
+0x4c2a
+// 2.736285
+0x4179
+// 6.631133
+0x46a2
+// 11.402722
+0x49b4
+// 1.858302
+0x3f6f
+// 11.107479
+0x498e
+// 2.144436
+0x404a
+// 3.471115
+0x42f1
+// 7.195419
+0x4732
+// 2.250048
+0x4080
+// 5.355021
+0x455b
+// 2.175851
+0x405a
+// 6.975780
+0x46fa
+// 10.830583
+0x496a
+// 4.039975
+0x440a
+// 19.297319
+0x4cd3
+// 14.568924
+0x4b49
+// 1.760573
+0x3f0b
+// 4.344142
+0x4458
+// 1.911989
+0x3fa6
+// 125.811431
+0x57dd
+// 6.147110
+0x4626
+// 3.323063
+0x42a5
+// 141.465322
+0x586c
+// 2.428645
+0x40db
+// 5.057885
+0x450f
+// 17.465511
+0x4c5e
+// 5.487886
+0x457d
+// 3.693320
+0x4363
+// 5.722713
+0x45b9
+// 2.815819
+0x41a2
+// 17.741187
+0x4c6f
+// 156.719386
+0x58e6
+// 9.622230
+0x48d0
+// 3.582729
+0x432a
+// 7.122762
+0x471f
+// 3.801118
+0x439a
+// 21.059681
+0x4d44
+// 7.002552
+0x4701
+// 1.681254
+0x3eba
+// 26.237564
+0x4e8f
+// 2.196333
+0x4065
+// 3.057311
+0x421d
+// 43.974325
+0x517f
+// 4.871455
+0x44df
+// 58.682025
+0x5356
+// 1.935719
+0x3fbe
+// 50.178529
+0x5246
+// 14.081570
+0x4b0a
+// 2.276558
+0x408e
+// 15.295183
+0x4ba6
+// 3.718388
+0x4370
+// 3.839651
+0x43ae
+// 3.527233
+0x430e
+// 2.428714
+0x40dc
+// 11.062054
+0x4988
+// 5.778279
+0x45c7
+// 2.901877
+0x41ce
+// 4.614082
+0x449d
+// 2.846533
+0x41b1
+// 8.471893
+0x483c
+// 5.528337
+0x4587
+// 4.069745
+0x4412
+// 4.311615
+0x4450
+// 2.410911
+0x40d2
+// 4.794024
+0x44cb
+// 1.952501
+0x3fcf
+// 2.350841
+0x40b4
+// 2.240902
+0x407b
+// 5.954644
+0x45f4
+// 6.121318
+0x461f
+// 6.649823
+0x46a6
+// 16.968851
+0x4c3e
+// 1.000000
+0x3c00
+// 5.024657
+0x4506
+// 5.569085
+0x4592
+// 8.212669
+0x481b
+// 2.487941
+0x40fa
+// 1.966557
+0x3fde
+// 3.370575
+0x42be
+// 29.614104
+0x4f67
+// 3.196824
+0x4265
+// 1.470828
+0x3de2
+// 8.924856
+0x4876
+// 3.005112
+0x4203
+// 3.024847
+0x420d
+// 4.340886
+0x4457
+// 1.894158
+0x3f94
+// 3.562064
+0x4320
+// 3.233189
+0x4277
+// 1.628957
+0x3e84
+// 2.791436
+0x4195
+// 8.235823
+0x481e
+// 8.808764
+0x4868
+// 36.005208
+0x5080
+// 12.766707
+0x4a62
+// 5.964918
+0x45f7
+// 1.671910
+0x3eb0
+// 2.699011
+0x4166
+// 16.411460
+0x4c1a
--- a/Testing/Patterns/DSP/FastMath/FastMathF16/Log1_f16.txt
+++ b/Testing/Patterns/DSP/FastMath/FastMathF16/Log1_f16.txt
@ -0,0 +1,52 @@
+H
+25
+// -2.302585
+0xc09b
+// -1.203973
+0xbcd1
+// -0.693147
+0xb98c
+// 0.000000
+0x0
+// 0.693147
+0x398c
+// -2.516839
+0xc109
+// 0.000000
+0x0
+// -5.908962
+0xc5e9
+// -1.345933
+0xbd62
+// -0.923815
+0xbb64
+// -4.046497
+0xc40c
+// -2.530234
+0xc10f
+// -0.724334
+0xb9cb
+// -1.436949
+0xbdbf
+// -1.327187
+0xbd4f
+// -1.741553
+0xbef7
+// -0.066722
+0xac45
+// -0.616041
+0xb8ee
+// -0.822195
+0xba94
+// -1.579204
+0xbe51
+// -1.333689
+0xbd56
+// -0.860545
+0xbae2
+// -1.080309
+0xbc52
+// -1.977120
+0xbfe9
+// -1.877663
+0xbf83
--- a/Testing/Patterns/DSP/FastMath/FastMathF16/LogInput1_f16.txt
+++ b/Testing/Patterns/DSP/FastMath/FastMathF16/LogInput1_f16.txt
@ -0,0 +1,52 @@
+H
+25
+// 0.100000
+0x2e66
+// 0.300000
+0x34cd
+// 0.500000
+0x3800
+// 1.000000
+0x3c00
+// 2.000000
+0x4000
+// 0.080714
+0x2d2a
+// 1.000000
+0x3c00
+// 0.002715
+0x198f
+// 0.260297
+0x342a
+// 0.397001
+0x365a
+// 0.017484
+0x247a
+// 0.079640
+0x2d19
+// 0.484647
+0x37c1
+// 0.237652
+0x339b
+// 0.265222
+0x343e
+// 0.175248
+0x319c
+// 0.935456
+0x3b7c
+// 0.540078
+0x3852
+// 0.439466
+0x3708
+// 0.206139
+0x3299
+// 0.263503
+0x3437
+// 0.422932
+0x36c4
+// 0.339491
+0x356f
+// 0.138467
+0x306e
+// 0.152947
+0x30e5
--- a/Testing/Patterns/DSP/FastMath/FastMathF16/Samples1_f16.txt
+++ b/Testing/Patterns/DSP/FastMath/FastMathF16/Samples1_f16.txt
@ -0,0 +1,514 @@
+H
+256
+// 0.075285
+0x2cd1
+// 0.013370
+0x22d8
+// 0.220545
+0x330f
+// 0.267001
+0x3446
+// 0.409968
+0x368f
+// 0.161784
+0x312d
+// 0.249706
+0x33fe
+// 0.088624
+0x2dac
+// 0.541200
+0x3854
+// 0.180200
+0x31c4
+// 0.650979
+0x3935
+// 0.033328
+0x2844
+// 0.031771
+0x2811
+// 0.057002
+0x2b4c
+// 0.413349
+0x369d
+// 0.229898
+0x335b
+// 0.206562
+0x329c
+// 0.123441
+0x2fe6
+// 0.128610
+0x301e
+// 0.091236
+0x2dd7
+// 0.516631
+0x3822
+// 0.067355
+0x2c50
+// 0.393951
+0x364e
+// 0.429521
+0x36df
+// 0.841992
+0x3abc
+// 0.105719
+0x2ec4
+// 0.153100
+0x30e6
+// 0.035572
+0x288e
+// 0.360258
+0x35c4
+// 0.620837
+0x38f7
+// 0.232809
+0x3373
+// 0.117602
+0x2f87
+// 0.155475
+0x30fa
+// 0.608119
+0x38dd
+// 0.199114
+0x325f
+// 0.021425
+0x257c
+// 0.126204
+0x300a
+// 0.503326
+0x3807
+// 0.081538
+0x2d38
+// 0.195027
+0x323e
+// 0.283564
+0x3489
+// 0.322678
+0x352a
+// 0.088639
+0x2dac
+// 0.175722
+0x31a0
+// 0.038783
+0x28f7
+// 0.032079
+0x281b
+// 0.184972
+0x31eb
+// 0.996148
+0x3bf8
+// 0.404061
+0x3677
+// 0.190127
+0x3216
+// 0.146331
+0x30af
+// 0.465914
+0x3774
+// 0.245390
+0x33da
+// 0.148020
+0x30bd
+// 0.303123
+0x34da
+// 0.311373
+0x34fb
+// 0.006474
+0x1ea1
+// 0.405257
+0x367c
+// 0.074887
+0x2ccb
+// 0.345171
+0x3586
+// 0.089300
+0x2db7
+// 0.477313
+0x37a3
+// 0.168067
+0x3161
+// 0.001991
+0x1814
+// 0.102194
+0x2e8a
+// 0.040519
+0x2930
+// 0.109390
+0x2f00
+// 0.206523
+0x329c
+// 0.433665
+0x36f0
+// 0.391572
+0x3644
+// 0.153143
+0x30e7
+// 0.079262
+0x2d13
+// 0.098417
+0x2e4c
+// 0.210884
+0x32c0
+// 0.326580
+0x353a
+// 0.066347
+0x2c3f
+// 0.051448
+0x2a96
+// 0.003284
+0x1aba
+// 0.270425
+0x3454
+// 0.833140
+0x3aaa
+// 0.083060
+0x2d51
+// 0.159097
+0x3117
+// 0.023843
+0x261b
+// 0.069603
+0x2c74
+// 0.351582
+0x35a0
+// 0.410820
+0x3693
+// 0.053182
+0x2acf
+// 0.278520
+0x3475
+// 0.021779
+0x2593
+// 0.059720
+0x2ba5
+// 0.105113
+0x2eba
+// 0.003746
+0x1bac
+// 0.007042
+0x1f36
+// 0.785028
+0x3a48
+// 0.246007
+0x33df
+// 0.326886
+0x353b
+// 0.326374
+0x3539
+// 0.081496
+0x2d37
+// 0.058682
+0x2b83
+// 0.793882
+0x3a5a
+// 0.354455
+0x35ac
+// 0.145905
+0x30ab
+// 0.134145
+0x304b
+// 0.621248
+0x38f8
+// 0.086260
+0x2d85
+// 0.122030
+0x2fcf
+// 0.338851
+0x356c
+// 0.432520
+0x36ec
+// 0.103489
+0x2ea0
+// 0.660566
+0x3949
+// 0.010646
+0x2173
+// 0.010899
+0x2195
+// 0.340560
+0x3573
+// 0.027039
+0x26ec
+// 0.594168
+0x38c1
+// 0.260699
+0x342c
+// 0.248825
+0x33f6
+// 0.115116
+0x2f5e
+// 0.260917
+0x342d
+// 0.176952
+0x31aa
+// 0.005986
+0x1e21
+// 0.094150
+0x2e07
+// 0.490388
+0x37d9
+// 0.315517
+0x350c
+// 0.008217
+0x2035
+// 0.509586
+0x3814
+// 0.278656
+0x3475
+// 0.152630
+0x30e2
+// 0.439919
+0x370a
+// 0.161132
+0x3128
+// 0.793575
+0x3a59
+// 0.752534
+0x3a05
+// 0.139024
+0x3073
+// 0.197104
+0x324f
+// 0.243939
+0x33ce
+// 0.068302
+0x2c5f
+// 0.118063
+0x2f8e
+// 0.027900
+0x2724
+// 0.038606
+0x28f1
+// 0.120798
+0x2fbb
+// 0.151979
+0x30dd
+// 0.314126
+0x3507
+// 0.316658
+0x3511
+// 0.032271
+0x2821
+// 0.201590
+0x3273
+// 0.035311
+0x2885
+// 0.297552
+0x34c3
+// 0.074866
+0x2ccb
+// 0.131284
+0x3033
+// 0.079725
+0x2d1a
+// 0.311873
+0x34fd
+// 0.040439
+0x292d
+// 0.346817
+0x358d
+// 0.211014
+0x32c1
+// 0.086209
+0x2d84
+// 0.104838
+0x2eb6
+// 0.060019
+0x2baf
+// 0.365459
+0x35d9
+// 0.150804
+0x30d3
+// 0.087698
+0x2d9d
+// 0.538126
+0x384e
+// 0.090029
+0x2dc3
+// 0.466323
+0x3776
+// 0.288092
+0x349c
+// 0.138977
+0x3073
+// 0.444435
+0x371c
+// 0.186741
+0x31fa
+// 0.459590
+0x375a
+// 0.143353
+0x3096
+// 0.092331
+0x2de9
+// 0.247526
+0x33ec
+// 0.051821
+0x2aa2
+// 0.068639
+0x2c65
+// 0.567997
+0x388b
+// 0.230195
+0x335e
+// 0.523016
+0x382f
+// 0.007948
+0x2012
+// 0.162678
+0x3135
+// 0.300927
+0x34d1
+// 0.007069
+0x1f3d
+// 0.411752
+0x3697
+// 0.197711
+0x3254
+// 0.057256
+0x2b54
+// 0.182220
+0x31d5
+// 0.270759
+0x3455
+// 0.174742
+0x3197
+// 0.355136
+0x35af
+// 0.056366
+0x2b37
+// 0.006381
+0x1e89
+// 0.103926
+0x2ea7
+// 0.279117
+0x3477
+// 0.140395
+0x307e
+// 0.263080
+0x3436
+// 0.047484
+0x2a14
+// 0.142805
+0x3092
+// 0.594794
+0x38c2
+// 0.038113
+0x28e1
+// 0.455304
+0x3749
+// 0.327085
+0x353c
+// 0.022741
+0x25d2
+// 0.205277
+0x3292
+// 0.017041
+0x245d
+// 0.516604
+0x3822
+// 0.019929
+0x251a
+// 0.071015
+0x2c8c
+// 0.439260
+0x3707
+// 0.065380
+0x2c2f
+// 0.268934
+0x344e
+// 0.260440
+0x342b
+// 0.283508
+0x3489
+// 0.411741
+0x3696
+// 0.090399
+0x2dc9
+// 0.173062
+0x318a
+// 0.344605
+0x3584
+// 0.216728
+0x32ef
+// 0.351305
+0x359f
+// 0.118037
+0x2f8e
+// 0.180886
+0x31ca
+// 0.245716
+0x33dd
+// 0.231932
+0x336c
+// 0.414781
+0x36a3
+// 0.208593
+0x32ad
+// 0.512164
+0x3819
+// 0.425380
+0x36ce
+// 0.446249
+0x3724
+// 0.167936
+0x3160
+// 0.163364
+0x313a
+// 0.150380
+0x30d0
+// 0.058932
+0x2b8b
+// 1.000000
+0x3c00
+// 0.199019
+0x325e
+// 0.179563
+0x31bf
+// 0.121763
+0x2fcb
+// 0.401939
+0x366e
+// 0.508503
+0x3811
+// 0.296685
+0x34bf
+// 0.033768
+0x2852
+// 0.312811
+0x3501
+// 0.679889
+0x3970
+// 0.112047
+0x2f2c
+// 0.332766
+0x3553
+// 0.330595
+0x354a
+// 0.230368
+0x335f
+// 0.527939
+0x3839
+// 0.280736
+0x347e
+// 0.309292
+0x34f3
+// 0.613890
+0x38e9
+// 0.358239
+0x35bb
+// 0.121421
+0x2fc5
+// 0.113523
+0x2f44
+// 0.027774
+0x271c
+// 0.078329
+0x2d03
+// 0.167647
+0x315d
+// 0.598118
+0x38c9
+// 0.370506
+0x35ee
+// 0.060933
+0x2bcd
--- a/Testing/Patterns/DSP/FastMath/FastMathF16/Sin1_f16.txt
+++ b/Testing/Patterns/DSP/FastMath/FastMathF16/Sin1_f16.txt
@ -0,0 +1,48 @@
+H
+23
+// 0.000000
+0x0
+// 0.707107
+0x39a8
+// 1.000000
+0x3c00
+// 0.707107
+0x39a8
+// 0.000000
+0x0
+// -0.707107
+0xb9a8
+// -1.000000
+0xbc00
+// -0.000001
+0x8011
+// -0.707107
+0xb9a8
+// -1.000000
+0xbc00
+// -0.707107
+0xb9a8
+// -0.000000
+0x8000
+// 0.707107
+0x39a8
+// 1.000000
+0x3c00
+// -0.000001
+0x8011
+// -0.000000
+0x8000
+// 0.707107
+0x39a8
+// 1.000000
+0x3c00
+// 0.707107
+0x39a8
+// 0.000000
+0x0
+// -0.707107
+0xb9a8
+// -1.000000
+0xbc00
+// -0.000001
+0x8011
--- a/Testing/Patterns/DSP/FastMath/FastMathF16/Sqrt1_f16.txt
+++ b/Testing/Patterns/DSP/FastMath/FastMathF16/Sqrt1_f16.txt
@ -0,0 +1,18 @@
+H
+8
+// 0.000000
+0x0
+// 0.000000
+0x0
+// 0.316228
+0x350f
+// 1.000000
+0x3c00
+// 1.414214
+0x3da8
+// 1.732051
+0x3eee
+// 1.870829
+0x3f7c
+// 1.897367
+0x3f97
--- a/Testing/Patterns/DSP/FastMath/FastMathF16/SqrtInput1_f16.txt
+++ b/Testing/Patterns/DSP/FastMath/FastMathF16/SqrtInput1_f16.txt
@ -0,0 +1,18 @@
+H
+8
+// -0.400000
+0xb666
+// 0.000000
+0x0
+// 0.100000
+0x2e66
+// 1.000000
+0x3c00
+// 2.000000
+0x4000
+// 3.000000
+0x4200
+// 3.500000
+0x4300
+// 3.600000
+0x4333
--- a/Testing/Patterns/DSP/Stats/StatsF16/Dims22_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/Dims22_s16.txt
@ -0,0 +1,24 @@
+H
+11
+// 10
+0x000A
+// 3
+0x0003
+// 8
+0x0008
+// 9
+0x0009
+// 12
+0x000C
+// 3
+0x0003
+// 8
+0x0008
+// 9
+0x0009
+// 12
+0x000C
+// 3
+0x0003
+// 8
+0x0008
--- a/Testing/Patterns/DSP/Stats/StatsF16/Dims23_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/Dims23_s16.txt
@ -0,0 +1,24 @@
+H
+11
+// 10
+0x000A
+// 3
+0x0003
+// 8
+0x0008
+// 9
+0x0009
+// 12
+0x000C
+// 3
+0x0003
+// 8
+0x0008
+// 9
+0x0009
+// 12
+0x000C
+// 3
+0x0003
+// 8
+0x0008
--- a/Testing/Patterns/DSP/Stats/StatsF16/Dims24_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/Dims24_s16.txt
@ -0,0 +1,24 @@
+H
+11
+// 10
+0x000A
+// 3
+0x0003
+// 8
+0x0008
+// 9
+0x0009
+// 12
+0x000C
+// 3
+0x0003
+// 8
+0x0008
+// 9
+0x0009
+// 12
+0x000C
+// 3
+0x0003
+// 8
+0x0008
--- a/Testing/Patterns/DSP/Stats/StatsF16/Dims25_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/Dims25_s16.txt
@ -0,0 +1,24 @@
+H
+11
+// 10
+0x000A
+// 3
+0x0003
+// 8
+0x0008
+// 9
+0x0009
+// 12
+0x000C
+// 3
+0x0003
+// 8
+0x0008
+// 9
+0x0009
+// 12
+0x000C
+// 3
+0x0003
+// 8
+0x0008
--- a/Testing/Patterns/DSP/Stats/StatsF16/Input22_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/Input22_f16.txt
@ -0,0 +1,152 @@
+H
+75
+// 0.322836
+0x352a
+// 0.198525
+0x325a
+// 0.478639
+0x37a9
+// 0.026143
+0x26b1
+// 0.098991
+0x2e56
+// 0.143166
+0x3095
+// 0.085541
+0x2d7a
+// 0.068541
+0x2c63
+// 0.211153
+0x32c2
+// 0.181133
+0x31cc
+// 0.185330
+0x31ee
+// 0.089042
+0x2db3
+// 0.147745
+0x30ba
+// 0.095482
+0x2e1c
+// 0.040729
+0x2937
+// 0.148711
+0x30c2
+// 0.132218
+0x303b
+// 0.141558
+0x3088
+// 0.075285
+0x2cd1
+// 0.129230
+0x3023
+// 0.111640
+0x2f25
+// 0.120751
+0x2fba
+// 0.013239
+0x22c7
+// 0.060065
+0x2bb0
+// 0.127400
+0x3014
+// 0.094640
+0x2e0f
+// 0.106302
+0x2ece
+// 0.047992
+0x2a25
+// 0.051468
+0x2a97
+// 0.125120
+0x3001
+// 0.120732
+0x2fba
+// 0.020651
+0x2549
+// 0.144559
+0x30a0
+// 0.511255
+0x3817
+// 0.344187
+0x3582
+// 0.115065
+0x2f5d
+// 0.068302
+0x2c5f
+// 0.205347
+0x3292
+// 0.160452
+0x3122
+// 0.065289
+0x2c2e
+// 0.127455
+0x3014
+// 0.248741
+0x33f6
+// 0.009349
+0x20c9
+// 0.100586
+0x2e70
+// 0.102405
+0x2e8e
+// 0.188184
+0x3206
+// 0.192786
+0x322b
+// 0.160000
+0x311f
+// 0.027773
+0x271c
+// 0.112430
+0x2f32
+// 0.001022
+0x142f
+// 0.114814
+0x2f59
+// 0.056143
+0x2b30
+// 0.122610
+0x2fd9
+// 0.165753
+0x314e
+// 0.135443
+0x3056
+// 0.019103
+0x24e4
+// 0.028083
+0x2730
+// 0.013344
+0x22d5
+// 0.165378
+0x314b
+// 0.075133
+0x2ccf
+// 0.056506
+0x2b3c
+// 0.005057
+0x1d2e
+// 0.157446
+0x310a
+// 0.426391
+0x36d2
+// 0.165223
+0x314a
+// 0.408385
+0x3689
+// 0.017004
+0x245a
+// 0.296335
+0x34be
+// 0.005781
+0x1deb
+// 0.076763
+0x2cea
+// 0.223549
+0x3327
+// 0.055634
+0x2b1f
+// 0.222075
+0x331b
+// 0.102857
+0x2e95
--- a/Testing/Patterns/DSP/Stats/StatsF16/Input23_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/Input23_f16.txt
@ -0,0 +1,152 @@
+H
+75
+// 0.475515
+0x379c
+// 0.270052
+0x3452
+// 0.254433
+0x3412
+// 0.055055
+0x2b0c
+// 0.202411
+0x327a
+// 0.124266
+0x2ff4
+// 0.046322
+0x29ee
+// 0.178920
+0x31ba
+// 0.078939
+0x2d0d
+// 0.111714
+0x2f26
+// 0.202373
+0x327a
+// 0.187127
+0x31fd
+// 0.154401
+0x30f1
+// 0.054258
+0x2af2
+// 0.123123
+0x2fe1
+// 0.099707
+0x2e62
+// 0.011955
+0x221f
+// 0.166787
+0x3156
+// 0.088390
+0x2da8
+// 0.114251
+0x2f50
+// 0.070847
+0x2c89
+// 0.118218
+0x2f91
+// 0.057549
+0x2b5e
+// 0.143263
+0x3096
+// 0.031234
+0x27ff
+// 0.169478
+0x316c
+// 0.045658
+0x29d8
+// 0.170969
+0x3179
+// 0.011664
+0x21f9
+// 0.043150
+0x2986
+// 0.056999
+0x2b4c
+// 0.080973
+0x2d2f
+// 0.301702
+0x34d4
+// 0.375561
+0x3602
+// 0.322737
+0x352a
+// 0.069801
+0x2c78
+// 0.234393
+0x3380
+// 0.188541
+0x3209
+// 0.233770
+0x337b
+// 0.089282
+0x2db7
+// 0.019572
+0x2503
+// 0.075178
+0x2cd0
+// 0.089462
+0x2dba
+// 0.116452
+0x2f74
+// 0.111320
+0x2f20
+// 0.215334
+0x32e4
+// 0.119770
+0x2faa
+// 0.058668
+0x2b82
+// 0.204849
+0x328e
+// 0.009485
+0x20db
+// 0.158639
+0x3114
+// 0.005484
+0x1d9e
+// 0.137894
+0x306a
+// 0.154182
+0x30ef
+// 0.008037
+0x201d
+// 0.040326
+0x2929
+// 0.038281
+0x28e6
+// 0.082787
+0x2d4c
+// 0.046186
+0x29e9
+// 0.118893
+0x2f9c
+// 0.034195
+0x2860
+// 0.090605
+0x2dcc
+// 0.156341
+0x3101
+// 0.092273
+0x2de8
+// 0.578286
+0x38a0
+// 0.049254
+0x2a4e
+// 0.372461
+0x35f6
+// 0.021948
+0x259e
+// 0.059545
+0x2b9f
+// 0.138834
+0x3071
+// 0.220685
+0x3310
+// 0.062460
+0x2bff
+// 0.201480
+0x3273
+// 0.087765
+0x2d9e
+// 0.207282
+0x32a2
--- a/Testing/Patterns/DSP/Stats/StatsF16/InputA24_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/InputA24_f16.txt
@ -0,0 +1,152 @@
+H
+75
+// 0.066000
+0x2c39
+// 0.029316
+0x2781
+// 0.904684
+0x3b3d
+// 0.055167
+0x2b10
+// 0.210490
+0x32bc
+// 0.148163
+0x30be
+// 0.189833
+0x3213
+// 0.038896
+0x28fb
+// 0.138513
+0x306f
+// 0.022802
+0x25d6
+// 0.196137
+0x3247
+// 0.076794
+0x2cea
+// 0.010108
+0x212d
+// 0.220958
+0x3312
+// 0.129726
+0x3027
+// 0.099061
+0x2e57
+// 0.136545
+0x305f
+// 0.232233
+0x336e
+// 0.014392
+0x235e
+// 0.080182
+0x2d22
+// 0.004332
+0x1c70
+// 0.083574
+0x2d59
+// 0.080585
+0x2d28
+// 0.117765
+0x2f89
+// 0.162459
+0x3133
+// 0.019641
+0x2507
+// 0.075399
+0x2cd3
+// 0.144758
+0x30a2
+// 0.064852
+0x2c27
+// 0.130529
+0x302d
+// 0.026411
+0x26c3
+// 0.089694
+0x2dbe
+// 0.345746
+0x3588
+// 0.286879
+0x3497
+// 0.367375
+0x35e1
+// 0.099391
+0x2e5c
+// 0.081898
+0x2d3e
+// 0.075065
+0x2cce
+// 0.179149
+0x31bc
+// 0.135742
+0x3058
+// 0.071606
+0x2c95
+// 0.174743
+0x3197
+// 0.182405
+0x31d6
+// 0.067165
+0x2c4c
+// 0.071465
+0x2c93
+// 0.128327
+0x301b
+// 0.219746
+0x3308
+// 0.104638
+0x2eb2
+// 0.145188
+0x30a5
+// 0.010493
+0x215f
+// 0.166402
+0x3153
+// 0.086577
+0x2d8a
+// 0.134145
+0x304b
+// 0.007296
+0x1f79
+// 0.070782
+0x2c88
+// 0.104541
+0x2eb1
+// 0.119510
+0x2fa6
+// 0.109169
+0x2efd
+// 0.070861
+0x2c89
+// 0.052457
+0x2ab7
+// 0.013363
+0x22d7
+// 0.067024
+0x2c4a
+// 0.142142
+0x308c
+// 0.108710
+0x2ef5
+// 0.182145
+0x31d4
+// 0.359778
+0x35c2
+// 0.458077
+0x3754
+// 0.166456
+0x3154
+// 0.192489
+0x3229
+// 0.161820
+0x312e
+// 0.183137
+0x31dc
+// 0.038873
+0x28fa
+// 0.103665
+0x2ea2
+// 0.103497
+0x2ea0
+// 0.050063
+0x2a68
--- a/Testing/Patterns/DSP/Stats/StatsF16/InputA25_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/InputA25_f16.txt
@ -0,0 +1,152 @@
+H
+75
+// -0.959212
+0xbbac
+// -0.670324
+0xb95d
+// -2.232477
+0xc077
+// -1.387737
+0xbd8d
+// -4.329000
+0xc454
+// -1.806354
+0xbf3a
+// -2.749885
+0xc180
+// -1.850150
+0xbf67
+// -1.610971
+0xbe72
+// -3.493034
+0xc2fc
+// -2.050449
+0xc01a
+// -2.407527
+0xc0d1
+// -2.544094
+0xc117
+// -2.448739
+0xc0e6
+// -1.799126
+0xbf32
+// -1.731562
+0xbeed
+// -1.709799
+0xbed7
+// -4.335314
+0xc456
+// -2.254958
+0xc083
+// -2.192057
+0xc062
+// -1.938937
+0xbfc1
+// -2.430444
+0xc0dc
+// -4.417635
+0xc46b
+// -2.415578
+0xc0d5
+// -2.538128
+0xc114
+// -3.305833
+0xc29d
+// -3.357861
+0xc2b7
+// -2.316667
+0xc0a2
+// -2.270840
+0xc08b
+// -2.387285
+0xc0c6
+// -2.154695
+0xc04f
+// -2.140212
+0xc048
+// -1.657703
+0xbea1
+// -0.645126
+0xb929
+// -1.248875
+0xbcff
+// -1.603551
+0xbe6a
+// -2.633577
+0xc144
+// -1.835578
+0xbf58
+// -2.357818
+0xc0b7
+// -3.324743
+0xc2a6
+// -1.648598
+0xbe98
+// -2.037842
+0xc013
+// -2.109987
+0xc038
+// -1.195792
+0xbcc8
+// -2.659356
+0xc152
+// -1.980175
+0xbfec
+// -2.161604
+0xc053
+// -1.794321
+0xbf2d
+// -2.246154
+0xc07e
+// -2.408418
+0xc0d1
+// -3.953591
+0xc3e8
+// -6.752190
+0xc6c1
+// -2.878867
+0xc1c2
+// -2.157113
+0xc050
+// -2.660457
+0xc152
+// -2.162948
+0xc053
+// -3.034069
+0xc211
+// -2.095012
+0xc031
+// -4.019581
+0xc405
+// -1.844349
+0xbf61
+// -2.773310
+0xc18c
+// -2.257637
+0xc084
+// -4.031211
+0xc408
+// -2.102431
+0xc034
+// -1.073346
+0xbc4b
+// -0.878225
+0xbb07
+// -1.408060
+0xbda2
+// -2.406489
+0xc0d0
+// -1.872983
+0xbf7e
+// -2.021809
+0xc00b
+// -1.781448
+0xbf20
+// -1.769868
+0xbf14
+// -1.823862
+0xbf4c
+// -2.119634
+0xc03d
+// -4.550491
+0xc48d
--- a/Testing/Patterns/DSP/Stats/StatsF16/InputB24_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/InputB24_f16.txt
@ -0,0 +1,152 @@
+H
+75
+// 0.320653
+0x3521
+// 0.199694
+0x3264
+// 0.479652
+0x37ad
+// 0.102236
+0x2e8b
+// 0.105106
+0x2eba
+// 0.201341
+0x3271
+// 0.132813
+0x3040
+// 0.088571
+0x2dab
+// 0.058607
+0x2b80
+// 0.249883
+0x33ff
+// 0.061443
+0x2bdd
+// 0.071180
+0x2c8e
+// 0.107576
+0x2ee3
+// 0.057274
+0x2b55
+// 0.105606
+0x2ec2
+// 0.152027
+0x30dd
+// 0.103325
+0x2e9d
+// 0.049270
+0x2a4e
+// 0.220453
+0x330e
+// 0.133290
+0x3044
+// 0.105547
+0x2ec1
+// 0.084060
+0x2d61
+// 0.137416
+0x3066
+// 0.030274
+0x27c0
+// 0.048675
+0x2a3b
+// 0.127130
+0x3011
+// 0.046091
+0x29e6
+// 0.066165
+0x2c3c
+// 0.067586
+0x2c53
+// 0.097630
+0x2e40
+// 0.072033
+0x2c9c
+// 0.117393
+0x2f83
+// 0.453053
+0x3740
+// 0.081985
+0x2d3f
+// 0.464962
+0x3770
+// 0.052770
+0x2ac1
+// 0.131843
+0x3038
+// 0.057721
+0x2b63
+// 0.152657
+0x30e3
+// 0.210732
+0x32be
+// 0.038870
+0x28fa
+// 0.265057
+0x343e
+// 0.090349
+0x2dc8
+// 0.153684
+0x30eb
+// 0.057884
+0x2b69
+// 0.057020
+0x2b4c
+// 0.071140
+0x2c8e
+// 0.261764
+0x3430
+// 0.015038
+0x23b3
+// 0.081591
+0x2d39
+// 0.262766
+0x3434
+// 0.039113
+0x2902
+// 0.156950
+0x3106
+// 0.095202
+0x2e18
+// 0.025249
+0x2677
+// 0.037328
+0x28c7
+// 0.083821
+0x2d5d
+// 0.104429
+0x2eaf
+// 0.087592
+0x2d9b
+// 0.114188
+0x2f4f
+// 0.056203
+0x2b32
+// 0.004401
+0x1c82
+// 0.095480
+0x2e1c
+// 0.139157
+0x3074
+// 0.294734
+0x34b7
+// 0.268507
+0x344c
+// 0.436759
+0x36fd
+// 0.009448
+0x20d6
+// 0.081745
+0x2d3b
+// 0.168340
+0x3163
+// 0.148550
+0x30c1
+// 0.136539
+0x305f
+// 0.036541
+0x28ad
+// 0.253112
+0x340d
+// 0.165724
+0x314e
--- a/Testing/Patterns/DSP/Stats/StatsF16/InputB25_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/InputB25_f16.txt
@ -0,0 +1,152 @@
+H
+75
+// -0.550708
+0xb868
+// -1.086194
+0xbc58
+// -2.430874
+0xc0dd
+// -2.189178
+0xc061
+// -1.915799
+0xbfaa
+// -1.850925
+0xbf67
+// -1.791474
+0xbf2a
+// -1.795864
+0xbf2f
+// -1.805296
+0xbf39
+// -3.914507
+0xc3d4
+// -2.609097
+0xc138
+// -2.791088
+0xc195
+// -1.657411
+0xbea1
+// -3.818442
+0xc3a3
+// -2.085128
+0xc02c
+// -1.898179
+0xbf98
+// -5.553102
+0xc58e
+// -1.827261
+0xbf4f
+// -2.342852
+0xc0b0
+// -1.613762
+0xbe74
+// -2.609171
+0xc138
+// -1.941609
+0xbfc4
+// -3.429489
+0xc2dc
+// -3.528022
+0xc30e
+// -2.070397
+0xc024
+// -1.926029
+0xbfb4
+// -3.317714
+0xc2a3
+// -2.238273
+0xc07a
+// -1.737780
+0xbef3
+// -3.121640
+0xc23e
+// -4.527533
+0xc487
+// -2.446193
+0xc0e4
+// -2.637576
+0xc146
+// -1.176571
+0xbcb5
+// -0.474604
+0xb798
+// -3.033151
+0xc211
+// -1.835852
+0xbf58
+// -1.691092
+0xbec4
+// -1.805705
+0xbf39
+// -2.648123
+0xc14c
+// -2.160886
+0xc052
+// -2.461351
+0xc0ec
+// -1.718385
+0xbee0
+// -3.930567
+0xc3dc
+// -2.625716
+0xc140
+// -1.569582
+0xbe47
+// -1.492665
+0xbdf8
+// -2.078453
+0xc028
+// -2.391468
+0xc0c8
+// -3.195368
+0xc264
+// -2.475005
+0xc0f3
+// -1.956577
+0xbfd4
+// -2.960791
+0xc1ec
+// -2.717495
+0xc16f
+// -2.590550
+0xc12e
+// -2.850811
+0xc1b4
+// -2.901796
+0xc1ce
+// -2.290420
+0xc095
+// -2.218488
+0xc070
+// -2.282453
+0xc091
+// -2.319957
+0xc0a4
+// -2.432125
+0xc0dd
+// -2.239578
+0xc07b
+// -2.294428
+0xc097
+// -0.590827
+0xb8ba
+// -1.218422
+0xbce0
+// -1.881018
+0xbf86
+// -1.877314
+0xbf82
+// -2.526120
+0xc10d
+// -1.792010
+0xbf2b
+// -1.845023
+0xbf61
+// -1.752139
+0xbf02
+// -2.295159
+0xc097
+// -3.509155
+0xc305
+// -1.928857
+0xbfb7
--- a/Testing/Patterns/DSP/Stats/StatsF16/RefEntropy22_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/RefEntropy22_f16.txt
@ -0,0 +1,22 @@
+H
+10
+// 1.038650
+0x3c28
+// 1.946771
+0x3fc9
+// 2.139348
+0x4047
+// 2.343983
+0x40b0
+// 0.989680
+0x3beb
+// 1.881268
+0x3f86
+// 1.990067
+0x3ff6
+// 2.193480
+0x4063
+// 1.026660
+0x3c1b
+// 1.720281
+0x3ee2
--- a/Testing/Patterns/DSP/Stats/StatsF16/RefKL24_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/RefKL24_f16.txt
@ -0,0 +1,22 @@
+H
+10
+// 0.413470
+0x369e
+// 0.394708
+0x3651
+// 0.582607
+0x38a9
+// 0.396809
+0x3659
+// 0.179323
+0x31bd
+// 0.111712
+0x2f26
+// 0.515907
+0x3821
+// 0.325282
+0x3534
+// 0.039444
+0x290c
+// 0.581119
+0x38a6
--- a/Testing/Patterns/DSP/Stats/StatsF16/RefLogSumExp23_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/RefLogSumExp23_f16.txt
@ -0,0 +1,22 @@
+H
+10
+// 1.437131
+0x3dc0
+// 2.206211
+0x406a
+// 2.309694
+0x409f
+// 2.569599
+0x4124
+// 1.432431
+0x3dbb
+// 2.207405
+0x406a
+// 2.310898
+0x409f
+// 2.569412
+0x4124
+// 1.455066
+0x3dd2
+// 2.207080
+0x406a
--- a/Testing/Patterns/DSP/Stats/StatsF16/RefLogSumExpDot25_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/RefLogSumExpDot25_f16.txt
@ -0,0 +1,22 @@
+H
+10
+// -0.908800
+0xbb45
+// -1.999736
+0xc000
+// -2.257576
+0xc084
+// -2.442263
+0xc0e2
+// -1.038944
+0xbc28
+// -2.090152
+0xc02e
+// -2.287087
+0xc093
+// -2.463247
+0xc0ed
+// -1.051243
+0xbc34
+// -2.074013
+0xc026
--- a/Testing/Source/Tests/FastMathF16.cpp
+++ b/Testing/Source/Tests/FastMathF16.cpp
@ -0,0 +1,243 @@
+#include "FastMathF16.h"
+#include <stdio.h>
+#include "Error.h"
+#include "arm_vec_math_f16.h"
+#include "Test.h"
+
+
+#define SNR_THRESHOLD 60
+#define SNR_LOG_THRESHOLD 40
+
+/* 
+
+Reference patterns are generated with
+a double precision computation.
+
+*/
+#define REL_ERROR (1.0e-3)
+#define ABS_ERROR (1.0e-3)
+
+#define REL_LOG_ERROR (3.0e-2)
+#define ABS_LOG_ERROR (3.0e-2)
+
+#if 0
+    void FastMathF16::test_cos_f16()
+    {
+        const float16_t *inp  = input.ptr();
+        float16_t *outp  = output.ptr();
+        unsigned long i;
+
+        for(i=0; i < ref.nbSamples(); i++)
+        {
+          outp[i]=arm_cos_f16(inp[i]);
+        }
+
+        ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD);
+        ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR);
+
+    }
+
+    void FastMathF16::test_sin_f16()
+    {
+        const float16_t *inp  = input.ptr();
+        float16_t *outp  = output.ptr();
+        unsigned long i;
+
+        for(i=0; i < ref.nbSamples(); i++)
+        {
+          outp[i]=arm_sin_f16(inp[i]);
+        }
+
+        ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD);
+        ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR);
+
+    }
+
+#endif 
+
+    void FastMathF16::test_sqrt_f16()
+    {
+        const float16_t *inp  = input.ptr();
+        float16_t *outp  = output.ptr();
+        arm_status status;
+        unsigned long i;
+
+        for(i=0; i < ref.nbSamples(); i++)
+        {
+           status=arm_sqrt_f16(inp[i],&outp[i]);
+           ASSERT_TRUE((status == ARM_MATH_SUCCESS) || ((inp[i] < 0.0f) && (status == ARM_MATH_ARGUMENT_ERROR)));
+        }
+
+
+        ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD);
+        ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR);
+
+
+    }
+
+    void FastMathF16::test_vlog_f16()
+    {
+        const float16_t *inp  = input.ptr();
+        float16_t *outp  = output.ptr();
+
+        arm_vlog_f16(inp,outp,ref.nbSamples());
+    
+        ASSERT_SNR(ref,output,(float16_t)SNR_LOG_THRESHOLD);
+        ASSERT_CLOSE_ERROR(ref,output,ABS_LOG_ERROR,REL_LOG_ERROR);
+        ASSERT_EMPTY_TAIL(output);
+
+    }
+
+    void FastMathF16::test_vexp_f16()
+    {
+        const float16_t *inp  = input.ptr();
+        float16_t *outp  = output.ptr();
+
+        arm_vexp_f16(inp,outp,ref.nbSamples());
+    
+        ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR);
+        ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD);
+        ASSERT_EMPTY_TAIL(output);
+
+    }
+
+    void FastMathF16::test_inverse_f16()
+    {
+        const float16_t *inp  = input.ptr();
+
+        float16_t *outp  = output.ptr();
+
+        arm_vinverse_f16(inp,outp,ref.nbSamples());
+
+        ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR);
+        ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD);
+        ASSERT_EMPTY_TAIL(output);
+
+    }
+
+  
+    void FastMathF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
+    {
+        (void)paramsArgs;
+        switch(id)
+        {
+#if 0
+            case FastMathF16::TEST_COS_F16_1:
+            {
+               input.reload(FastMathF16::ANGLES1_F16_ID,mgr);
+               ref.reload(FastMathF16::COS1_F16_ID,mgr);
+               output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
+
+            }
+            break;
+
+            case FastMathF16::TEST_SIN_F16_2:
+            {
+               input.reload(FastMathF16::ANGLES1_F16_ID,mgr);
+               ref.reload(FastMathF16::SIN1_F16_ID,mgr);
+               output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
+
+            }
+            break;
+#endif 
+          
+            case FastMathF16::TEST_SQRT_F16_3:
+            {
+               input.reload(FastMathF16::SQRTINPUT1_F16_ID,mgr);
+               ref.reload(FastMathF16::SQRT1_F16_ID,mgr);
+               output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
+
+            }
+            break;
+
+            case FastMathF16::TEST_VLOG_F16_4:
+            {
+               input.reload(FastMathF16::LOGINPUT1_F16_ID,mgr);
+               ref.reload(FastMathF16::LOG1_F16_ID,mgr);
+               output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
+
+            }
+            break;
+
+            case FastMathF16::TEST_VLOG_F16_5:
+            {
+               input.reload(FastMathF16::LOGINPUT1_F16_ID,mgr,7);
+               ref.reload(FastMathF16::LOG1_F16_ID,mgr,7);
+               output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
+
+            }
+            break;
+
+            case FastMathF16::TEST_VLOG_F16_6:
+            {
+               input.reload(FastMathF16::LOGINPUT1_F16_ID,mgr,16);
+               ref.reload(FastMathF16::LOG1_F16_ID,mgr,16);
+               output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
+
+            }
+            break;
+
+            case FastMathF16::TEST_VLOG_F16_7:
+            {
+               input.reload(FastMathF16::LOGINPUT1_F16_ID,mgr,23);
+               ref.reload(FastMathF16::LOG1_F16_ID,mgr,23);
+               output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
+
+            }
+            break;
+
+            case FastMathF16::TEST_VEXP_F16_8:
+            {
+              
+              input.reload(FastMathF16::EXPINPUT1_F16_ID,mgr);
+              ref.reload(FastMathF16::EXP1_F16_ID,mgr);
+              output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
+
+            }
+            break;
+
+            case FastMathF16::TEST_VEXP_F16_9:
+            {
+               input.reload(FastMathF16::EXPINPUT1_F16_ID,mgr,7);
+               ref.reload(FastMathF16::EXP1_F16_ID,mgr,7);
+               output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
+
+            }
+            break;
+
+            case FastMathF16::TEST_VEXP_F16_10:
+            {
+               input.reload(FastMathF16::EXPINPUT1_F16_ID,mgr,16);
+               ref.reload(FastMathF16::EXP1_F16_ID,mgr,16);
+               output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
+
+            }
+            break;
+
+            case FastMathF16::TEST_VEXP_F16_11:
+            {
+               input.reload(FastMathF16::EXPINPUT1_F16_ID,mgr,23);
+               ref.reload(FastMathF16::EXP1_F16_ID,mgr,23);
+               output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
+
+            }
+            break;
+
+            case FastMathF16::TEST_INVERSE_F16_12:
+            {
+               input.reload(FastMathF16::INPUT1_F16_ID,mgr);
+               ref.reload(FastMathF16::INVERSE1_F16_ID,mgr);
+               output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
+
+            }
+            break;
+        }
+        
+    }
+
+    void FastMathF16::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)
+    {
+      (void)id;
+      output.dump(mgr);
+      
+    }
--- a/Testing/Source/Tests/StatsTestsF16.cpp
+++ b/Testing/Source/Tests/StatsTestsF16.cpp
@ -4,14 +4,18 @@
 #include "Test.h"


-#define SNR_THRESHOLD 50
+#define SNR_THRESHOLD 48
+#define SNR_KULLBACK_THRESHOLD 40
 /* 

 Reference patterns are generated with
 a double precision computation.

 */
-#define REL_ERROR (3.0e-3)
+#define REL_ERROR (6.0e-3)
+
+#define REL_KULLBACK_ERROR (5.0e-3)
+#define ABS_KULLBACK_ERROR (5.0e-3)

    void StatsTestsF16::test_max_f16()
    {
@ -39,7 +43,7 @@ a double precision computation.

    }

-#if 0
+
    void StatsTestsF16::test_max_no_idx_f16()
    {
        const float16_t *inp  = inputA.ptr();
@ -59,7 +63,7 @@ a double precision computation.
        ASSERT_EQ(result,refp[this->refOffset]);

    }
-#endif 
+

    void StatsTestsF16::test_min_f16()
    {
@ -241,7 +245,7 @@ a double precision computation.

    }

-#if 0
+
    void StatsTestsF16::test_entropy_f16()
    {
      const float16_t *inp  = inputA.ptr();
@ -298,9 +302,9 @@ a double precision computation.
         inpB += dimsp[i+1];
      }

-      ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD);
+      ASSERT_SNR(ref,output,(float16_t)SNR_KULLBACK_THRESHOLD);

-      ASSERT_REL_ERROR(ref,output,REL_ERROR);
+      ASSERT_CLOSE_ERROR(ref,output,ABS_KULLBACK_ERROR,REL_KULLBACK_ERROR);
    } 

    void StatsTestsF16::test_logsumexp_dot_prod_f16()
@ -325,7 +329,7 @@ a double precision computation.
      ASSERT_REL_ERROR(ref,output,REL_ERROR);
    } 

-  #endif
+
  
    void StatsTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
    {
@ -595,7 +599,7 @@ a double precision computation.
               refOffset = 2;
            }
            break;
-#if 0
+
            case StatsTestsF16::TEST_ENTROPY_F16_22:
            {
               inputA.reload(StatsTestsF16::INPUT22_F16_ID,mgr);
@ -651,7 +655,7 @@ a double precision computation.

            case StatsTestsF16::TEST_MAX_NO_IDX_F16_26:
            {
-               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,3);
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,7);
              
               ref.reload(StatsTestsF16::MAXVALS_F16_ID,mgr);
               
@ -684,7 +688,7 @@ a double precision computation.
               refOffset = 2;
            }
            break;
-#endif
+
            case TEST_MEAN_F16_29:
               inputA.reload(StatsTestsF16::INPUT2_F16_ID,mgr,100);
              
--- a/Testing/desc_f16.txt
+++ b/Testing/desc_f16.txt
@ -25,23 +25,23 @@ group Root {
              Pattern STDVALS_F16_ID : StdVals6_f16.txt
              Pattern VARVALS_F16_ID : VarVals7_f16.txt

-              //Pattern INPUT22_F16_ID : Input22_f16.txt 
-              //Pattern DIM22_S16_ID : Dims22_s16.txt 
-              //Pattern REF22_ENTROPY_F16_ID : RefEntropy22_f16.txt
-              //
-              //Pattern INPUT23_F16_ID : Input23_f16.txt 
-              //Pattern DIM23_S16_ID : Dims23_s16.txt 
-              //Pattern REF23_LOGSUMEXP_F16_ID : RefLogSumExp23_f16.txt
-              //
-              //Pattern INPUTA24_F16_ID : InputA24_f16.txt 
-              //Pattern INPUTB24_F16_ID : InputB24_f16.txt
-              //Pattern DIM24_S16_ID : Dims24_s16.txt 
-              //Pattern REF24_KL_F16_ID : RefKL24_f16.txt
-              //
-              //Pattern INPUTA25_F16_ID : InputA25_f16.txt 
-              //Pattern INPUTB25_F16_ID : InputB25_f16.txt
-              //Pattern DIM25_S16_ID : Dims25_s16.txt 
-              //Pattern REF25_LOGSUMEXP_DOT_F16_ID : RefLogSumExpDot25_f16.txt
+              Pattern INPUT22_F16_ID : Input22_f16.txt 
+              Pattern DIM22_S16_ID : Dims22_s16.txt 
+              Pattern REF22_ENTROPY_F16_ID : RefEntropy22_f16.txt
+              
+              Pattern INPUT23_F16_ID : Input23_f16.txt 
+              Pattern DIM23_S16_ID : Dims23_s16.txt 
+              Pattern REF23_LOGSUMEXP_F16_ID : RefLogSumExp23_f16.txt
+              
+              Pattern INPUTA24_F16_ID : InputA24_f16.txt 
+              Pattern INPUTB24_F16_ID : InputB24_f16.txt
+              Pattern DIM24_S16_ID : Dims24_s16.txt 
+              Pattern REF24_KL_F16_ID : RefKL24_f16.txt
+              
+              Pattern INPUTA25_F16_ID : InputA25_f16.txt 
+              Pattern INPUTB25_F16_ID : InputB25_f16.txt
+              Pattern DIM25_S16_ID : Dims25_s16.txt 
+              Pattern REF25_LOGSUMEXP_DOT_F16_ID : RefLogSumExpDot25_f16.txt

              Output  OUT_F16_ID : Output
              Output  OUT_S16_ID : Index
@ -76,14 +76,14 @@ group Root {
                Test nb=8n   arm_var_f16:test_var_f16
                Test nb=8n+1 arm_var_f16:test_var_f16

-                disabled{arm_entropy_f16:test_entropy_f16}
-                disabled{arm_logsumexp_f16:test_logsumexp_f16}
-                disabled{arm_kullback_leibler_f16:test_kullback_leibler_f16}
-                disabled{arm_logsumexp_dot_prod_f16:test_logsumexp_dot_prod_f16}
+                arm_entropy_f16:test_entropy_f16
+                arm_logsumexp_f16:test_logsumexp_f16
+                arm_kullback_leibler_f16:test_kullback_leibler_f16
+                arm_logsumexp_dot_prod_f16:test_logsumexp_dot_prod_f16

-                disabled{Test nb=7    arm_max_no_idx_f16:test_max_no_idx_f16}
-                disabled{Test nb=8n   arm_max_no_idx_f16:test_max_no_idx_f16}
-                disabled{Test nb=8n+1 arm_max_no_idx_f16:test_max_no_idx_f16}
+                Test nb=7    arm_max_no_idx_f16:test_max_no_idx_f16
+                Test nb=8n   arm_max_no_idx_f16:test_max_no_idx_f16
+                Test nb=8n+1 arm_max_no_idx_f16:test_max_no_idx_f16

                Test long  arm_mean_f16:test_mean_f16
                Test long  arm_rms_f16:test_rms_f16
@ -342,6 +342,46 @@ group Root {

        }

+        group Fast Maths Tests {
+          class = FastMath
+          folder = FastMath 
+
+          suite Fast Maths F16 {
+              class = FastMathF16
+              folder = FastMathF16
+
+              Pattern ANGLES1_F16_ID : Angles1_f16.txt
+              Pattern SQRTINPUT1_F16_ID : SqrtInput1_f16.txt
+              Pattern LOGINPUT1_F16_ID : LogInput1_f16.txt
+              Pattern EXPINPUT1_F16_ID : ExpInput1_f16.txt
+              Pattern INPUT1_F16_ID : Samples1_f16.txt
+
+              Pattern COS1_F16_ID : Cos1_f16.txt 
+              Pattern SIN1_F16_ID : Sin1_f16.txt 
+              Pattern SQRT1_F16_ID : Sqrt1_f16.txt 
+              Pattern LOG1_F16_ID : Log1_f16.txt 
+              Pattern EXP1_F16_ID : Exp1_f16.txt 
+              Pattern INVERSE1_F16_ID : Inverse1_f16.txt
+
+              Output  OUT_F16_ID : Output
+
+              Functions {
+                disabled{test_cos_f16:test_cos_f16}
+                disabled{test_sin_f16:test_sin_f16}
+                test_sqrt_f16:test_sqrt_f16
+                test_vlog_f16:test_vlog_f16
+                test_vlog_f16 nb=3:test_vlog_f16
+                test_vlog_f16 nb=4n:test_vlog_f16
+                test_vlog_f16 nb=4n+1:test_vlog_f16
+                test_vexp_f16:test_vexp_f16
+                test_vexp_f16 nb=3:test_vexp_f16
+                test_vexp_f16 nb=4n:test_vexp_f16
+                test_vexp_f16 nb=4n+1:test_vexp_f16
+                test_inverse_f16:test_inverse_f16
+              }
+          }
+        }
+
        group Filtering Tests {
           class = FilteringTests
           folder = Filtering