CMSIS-DSP: Added f16 versions of statistics functions.

6 years ago · 51be1901ce
parent 71218873eb
commit 51be1901ce
29 changed files with 3561 additions and 15 deletions
--- a/Include/dsp/statistics_functions_f16.h
+++ b/Include/dsp/statistics_functions_f16.h
@ -26,12 +26,104 @@
 #ifndef _STATISTICS_FUNCTIONS_F16_H_
 #define _STATISTICS_FUNCTIONS_F16_H_

+#include "arm_math_types_f16.h"
+#include "arm_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#include "dsp/basic_math_functions_f16.h"
+#include "dsp/fast_math_functions_f16.h"
+
 #ifdef   __cplusplus
 extern "C"
 {
 #endif

 #if defined(ARM_FLOAT16_SUPPORTED)
+
+ /**
+   * @brief  Sum of the squares of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void arm_power_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
+
+ /**
+   * @brief  Mean value of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void arm_mean_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
+
+  /**
+   * @brief  Variance of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void arm_var_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
+
+ /**
+   * @brief  Root Mean Square of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void arm_rms_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
+
+ /**
+   * @brief  Standard deviation of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void arm_std_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult);
+
+ /**
+   * @brief  Minimum value of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[out] pIndex     is the array index of the minimum value in the input buffer.
+   */
+  void arm_min_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex);
+
+/**
+ * @brief Maximum value of a floating-point vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void arm_max_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex);
+
+
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
 #ifdef   __cplusplus
 }
--- a/PythonWrapper/setup.py
+++ b/PythonWrapper/setup.py
@ -38,6 +38,7 @@ matrix.remove(os.path.join(ROOT,"Source","MatrixFunctions","MatrixFunctionsF16.c

 statistics = glob.glob(os.path.join(ROOT,"Source","StatisticsFunctions","*.c"))
 statistics.remove(os.path.join(ROOT,"Source","StatisticsFunctions","StatisticsFunctions.c"))
+statistics.remove(os.path.join(ROOT,"Source","StatisticsFunctions","StatisticsFunctionsF16.c"))

 complexf = glob.glob(os.path.join(ROOT,"Source","ComplexMathFunctions","*.c"))
 complexf.remove(os.path.join(ROOT,"Source","ComplexMathFunctions","ComplexMathFunctions.c"))
--- a/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c
+++ b/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c
@ -52,7 +52,6 @@
 */
 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)

-#include <stdio.h>
 #include "arm_helium_utils.h"

 #define DONTCARE            0 /* inactive lane content */
--- a/Source/StatisticsFunctions/CMakeLists.txt
+++ b/Source/StatisticsFunctions/CMakeLists.txt
@ -5,9 +5,43 @@ project(CMSISDSPStatistics)
 include(configLib)
 include(configDsp)

-file(GLOB SRC "./*_*.c")

-add_library(CMSISDSPStatistics STATIC ${SRC})
+
+add_library(CMSISDSPStatistics STATIC)
+
+target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f32.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f64.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f32.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f64.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_dot_prod_f32.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_f32.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_max_f32.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f32.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_max_q15.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_max_q31.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_max_q7.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_mean_f32.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_mean_q15.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_mean_q31.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_mean_q7.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_min_f32.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_min_q15.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_min_q31.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_min_q7.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_power_f32.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_power_q15.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_power_q31.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_power_q7.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_rms_f32.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_rms_q15.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_rms_q31.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_std_f32.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_std_q15.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_std_q31.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_var_f32.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_var_q15.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_var_q31.c)
+

 configLib(CMSISDSPStatistics ${ROOT})
 configDsp(CMSISDSPStatistics ${ROOT})
@ -17,3 +51,12 @@ target_include_directories(CMSISDSPStatistics PUBLIC "${DSP}/Include")



+if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
+target_sources(CMSISDSPStatistics PRIVATE arm_max_f16.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_min_f16.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_mean_f16.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_power_f16.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_rms_f16.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_std_f16.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_var_f16.c)
+endif()
--- a/Source/StatisticsFunctions/StatisticsFunctionsF16.c
+++ b/Source/StatisticsFunctions/StatisticsFunctionsF16.c
@ -0,0 +1,35 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        StatisticsFunctions.c
+ * Description:  Combination of all statistics function source files.
+ *
+ * $Date:        16. March 2020
+ * $Revision:    V1.1.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_max_f16.c"
+#include "arm_min_f16.c"
+#include "arm_mean_f16.c"
+#include "arm_power_f16.c"
+#include "arm_rms_f16.c"
+#include "arm_std_f16.c"
+#include "arm_var_f16.c"
--- a/Source/StatisticsFunctions/arm_max_f16.c
+++ b/Source/StatisticsFunctions/arm_max_f16.c
@ -0,0 +1,246 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_max_f16.c
+ * Description:  Maximum value of a floating-point vector
+ *
+ * $Date:        18. March 2020
+ * $Revision:    V1.6.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#include <limits.h>
+#endif
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup Max
+  @{
+ */
+
+/**
+  @brief         Maximum value of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @param[out]    pIndex     index of maximum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+void arm_max_f16(
+  const float16_t * pSrc,
+  uint32_t blockSize,
+  float16_t * pResult,
+  uint32_t * pIndex)
+{
+     int32_t blkCnt;
+    f16x8_t vecSrc;
+    f16x8_t curExtremValVec = vdupq_n_f16(F16_MIN);
+    float16_t maxValue = F16_MIN;
+    uint32_t idx = blockSize;
+    uint16x8_t indexVec;
+    uint16x8_t curExtremIdxVec;
+    uint32_t curIdx = 0;
+    mve_pred16_t p0;
+    float16_t tmp;
+
+
+    indexVec = vidupq_wb_u16(&curIdx, 1);
+    curExtremIdxVec = vdupq_n_u16(0);
+
+    /* Compute 4 outputs at a time */
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0)
+    {
+        vecSrc = vldrhq_f16(pSrc);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+        p0 = vcmpgeq(vecSrc, curExtremValVec);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+
+        indexVec = vidupq_wb_u16(&curIdx, 1);
+
+        pSrc += 8;
+        /* Decrement the loop counter */
+        blkCnt--;
+    }
+
+
+    /*
+     * Get max value across the vector
+     */
+    maxValue = vmaxnmvq(maxValue, curExtremValVec);
+    /*
+     * set index for lower values to max possible index
+     */
+    p0 = vcmpgeq(curExtremValVec, maxValue);
+    indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
+    /*
+     * Get min index which is thus for a max value
+     */
+    idx = vminvq(idx, indexVec);
+
+    /* Tail */
+    blkCnt = blockSize & 7;
+
+    while (blkCnt > 0)
+    {
+      /* Initialize tmp to the next consecutive values one by one */
+      tmp = *pSrc++;
+
+      /* compare for the maximum value */
+      if (maxValue < tmp)
+      {
+        /* Update the maximum value and it's index */
+        maxValue = tmp;
+        idx = blockSize - blkCnt;
+      }
+
+      /* Decrement loop counter */
+      blkCnt--;
+    }
+
+    /*
+     * Save result
+     */
+    *pIndex = idx;
+    *pResult = maxValue;
+}
+
+#else
+void arm_max_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex)
+{
+        float16_t maxVal, out;                         /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+        uint32_t index;                                /* index of maximum value */
+#endif
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+  /* Initialise index of maximum value. */
+  index = 0U;
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = (blockSize - 1U) >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to next consecutive values one by one */
+    maxVal = *pSrc++;
+
+    /* compare for the maximum value */
+    if (out < maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+      outIndex = index + 1U;
+    }
+
+    maxVal = *pSrc++;
+    if (out < maxVal)
+    {
+      out = maxVal;
+      outIndex = index + 2U;
+    }
+
+    maxVal = *pSrc++;
+    if (out < maxVal)
+    {
+      out = maxVal;
+      outIndex = index + 3U;
+    }
+
+    maxVal = *pSrc++;
+    if (out < maxVal)
+    {
+      out = maxVal;
+      outIndex = index + 4U;
+    }
+
+    index += 4U;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = *pSrc++;
+
+    /* compare for the maximum value */
+    if (out < maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @} end of Max group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Source/StatisticsFunctions/arm_mean_f16.c
+++ b/Source/StatisticsFunctions/arm_mean_f16.c
@ -0,0 +1,152 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mean_f16.c
+ * Description:  Mean value of a floating-point vector
+ *
+ * $Date:        18. March 2020
+ * $Revision:    V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @defgroup mean Mean
+
+  Calculates the mean of the input vector. Mean is defined as the average of the elements in the vector.
+  The underlying algorithm is used:
+
+  <pre>
+      Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]) / blockSize;
+  </pre>
+
+  There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+  @addtogroup mean
+  @{
+ */
+
+/**
+  @brief         Mean value of a floating-point vector.
+  @param[in]     pSrc       points to the input vector.
+  @param[in]     blockSize  number of samples in input vector.
+  @param[out]    pResult    mean value returned here.
+  @return        none
+ */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+
+void arm_mean_f16(
+  const float16_t * pSrc,
+  uint32_t blockSize,
+  float16_t * pResult)
+{
+    int32_t  blkCnt;           /* loop counters */
+    f16x8_t vecSrc;
+    f16x8_t sumVec = vdupq_n_f16(0.0f16);
+
+    blkCnt = blockSize;
+    do {
+        mve_pred16_t p = vctp16q(blkCnt);
+
+        vecSrc = vldrhq_z_f16((float16_t const *) pSrc, p);
+        sumVec = vaddq_m_f16(sumVec, sumVec, vecSrc, p);
+
+        blkCnt -= 8;
+        pSrc += 8;
+    }
+    while (blkCnt > 0);
+
+    *pResult = vecAddAcrossF16Mve(sumVec) / (float16_t) blockSize;
+}
+
+
+#else
+
+void arm_mean_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        float16_t sum = 0.0f;                          /* Temporary result storage */
+
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+
+    sum += *pSrc++;
+
+    sum += *pSrc++;
+
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
+  /* Store result to destination */
+  *pResult = (sum / (float16_t)blockSize);
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @} end of mean group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Source/StatisticsFunctions/arm_mean_f32.c
+++ b/Source/StatisticsFunctions/arm_mean_f32.c
@ -32,18 +32,6 @@
  @ingroup groupStats
 */

-/**
-  @defgroup mean Mean
-
-  Calculates the mean of the input vector. Mean is defined as the average of the elements in the vector.
-  The underlying algorithm is used:
-
-  <pre>
-      Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]) / blockSize;
-  </pre>
-
-  There are separate functions for floating-point, Q31, Q15, and Q7 data types.
- */

 /**
  @addtogroup mean
--- a/Source/StatisticsFunctions/arm_min_f16.c
+++ b/Source/StatisticsFunctions/arm_min_f16.c
@ -0,0 +1,240 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_min_f16.c
+ * Description:  Minimum value of a floating-point vector
+ *
+ * $Date:        18. March 2020
+ * $Revision:    V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+
+#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#include <limits.h>
+#endif
+
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup Min
+  @{
+ */
+
+/**
+  @brief         Minimum value of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @param[out]    pIndex     index of minimum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+void arm_min_f16(
+  const float16_t * pSrc,
+  uint32_t blockSize,
+  float16_t * pResult,
+  uint32_t * pIndex)
+{
+    int32_t  blkCnt;           /* loop counters */
+    f16x8_t vecSrc;
+    float16_t const *pSrcVec;
+    f16x8_t curExtremValVec = vdupq_n_f16(F16_MAX);
+    float16_t minValue = F16_MAX;
+    uint32_t  idx = blockSize;
+    uint16x8_t indexVec;
+    uint16x8_t curExtremIdxVec;
+    mve_pred16_t p0;
+
+    indexVec = vidupq_u16((uint32_t)0, 1);
+    curExtremIdxVec = vdupq_n_u16(0);
+
+    pSrcVec = (float16_t const *) pSrc;
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0)
+    {
+        vecSrc = vldrhq_f16(pSrcVec);  pSrcVec += 8;
+        /*
+         * Get current min per lane and current index per lane
+         * when a min is selected
+         */
+        p0 = vcmpleq(vecSrc, curExtremValVec);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+
+        indexVec = indexVec + 8;
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0)
+    {
+        vecSrc = vldrhq_f16(pSrcVec);  pSrcVec += 8;
+        p0 = vctp16q(blkCnt);
+        /*
+         * Get current min per lane and current index per lane
+         * when a min is selected
+         */
+        p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+    }
+    /*
+     * Get min value across the vector
+     */
+    minValue = vminnmvq(minValue, curExtremValVec);
+    /*
+     * set index for lower values to min possible index
+     */
+    p0 = vcmpleq(curExtremValVec, minValue);
+    indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
+    /*
+     * Get min index which is thus for a min value
+     */
+    idx = vminvq(idx, indexVec);
+    /*
+     * Save result
+     */
+    *pIndex = idx;
+    *pResult = minValue;
+}
+
+#else
+
+void arm_min_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex)
+{
+        float16_t minVal, out;                         /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+        uint32_t index;                                /* index of maximum value */
+#endif
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+  /* Initialise index of maximum value. */
+  index = 0U;
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = (blockSize - 1U) >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize minVal to next consecutive values one by one */
+    minVal = *pSrc++;
+
+    /* compare for the minimum value */
+    if (out > minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+      outIndex = index + 1U;
+    }
+
+    minVal = *pSrc++;
+    if (out > minVal)
+    {
+      out = minVal;
+      outIndex = index + 2U;
+    }
+
+    minVal = *pSrc++;
+    if (out > minVal)
+    {
+      out = minVal;
+      outIndex = index + 3U;
+    }
+
+    minVal = *pSrc++;
+    if (out > minVal)
+    {
+      out = minVal;
+      outIndex = index + 4U;
+    }
+
+    index += 4U;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = *pSrc++;
+
+    /* compare for the minimum value */
+    if (out > minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @} end of Min group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Source/StatisticsFunctions/arm_power_f16.c
+++ b/Source/StatisticsFunctions/arm_power_f16.c
@ -0,0 +1,152 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_power_f16.c
+ * Description:  Sum of the squares of the elements of a floating-point vector
+ *
+ * $Date:        18. March 2020
+ * $Revision:    V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+
+/**
+  @ingroup groupStats
+ */
+
+
+
+/**
+  @addtogroup power
+  @{
+ */
+
+/**
+  @brief         Sum of the squares of the elements of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    sum of the squares value returned here
+  @return        none
+ */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+
+void arm_power_f16(
+  const float16_t * pSrc,
+  uint32_t blockSize,
+  float16_t * pResult)
+{
+    int32_t         blkCnt;     /* loop counters */
+    f16x8_t         vecSrc;
+    f16x8_t         sumVec = vdupq_n_f16(0.0f);
+
+
+    blkCnt = blockSize;
+    do {
+        mve_pred16_t    p = vctp16q(blkCnt);
+
+        vecSrc = vldrhq_z_f16((float16_t const *) pSrc, p);
+        /*
+         * sum lanes
+         */
+        sumVec = vfmaq_m(sumVec, vecSrc, vecSrc, p);
+
+        blkCnt -= 8;
+        pSrc += 8;
+    }
+    while (blkCnt > 0);
+
+    *pResult = vecAddAcrossF16Mve(sumVec);
+}
+#else
+
+void arm_power_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        float16_t sum = 0.0f;                          /* Temporary result storage */
+        float16_t in;                                  /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+    /* Compute Power and store result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in * in;
+
+    in = *pSrc++;
+    sum += in * in;
+
+    in = *pSrc++;
+    sum += in * in;
+
+    in = *pSrc++;
+    sum += in * in;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+    /* Compute Power and store result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in * in;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result to destination */
+  *pResult = sum;
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @} end of power group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Source/StatisticsFunctions/arm_rms_f16.c
+++ b/Source/StatisticsFunctions/arm_rms_f16.c
@ -0,0 +1,147 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_rms_f16.c
+ * Description:  Root mean square value of the elements of a floating-point vector
+ *
+ * $Date:        18. March 2020
+ * $Revision:    V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @defgroup RMS Root mean square (RMS)
+
+  Calculates the Root Mean Square of the elements in the input vector.
+  The underlying algorithm is used:
+
+  <pre>
+      Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize));
+  </pre>
+
+  There are separate functions for floating point, Q31, and Q15 data types.
+ */
+
+/**
+  @addtogroup RMS
+  @{
+ */
+
+/**
+  @brief         Root Mean Square of the elements of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    root mean square value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+void arm_rms_f16(
+  const float16_t * pSrc,
+  uint32_t blockSize,
+  float16_t * pResult)
+{
+    float16_t pow = 0.0f;
+
+    arm_power_f16(pSrc, blockSize, &pow);
+
+    /* Compute Rms and store the result in the destination */
+    arm_sqrt_f16(pow / (float16_t) blockSize, pResult);
+}
+#else
+
+void arm_rms_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        float16_t sum = 0.0f;                          /* Temporary result storage */
+        float16_t in;                                  /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+    in = *pSrc++;
+    /* Compute sum of squares and store result in a temporary variable, sum. */
+    sum += in * in;
+
+    in = *pSrc++;
+    sum += in * in;
+
+    in = *pSrc++;
+    sum += in * in;
+
+    in = *pSrc++;
+    sum += in * in;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+    in = *pSrc++;
+    /* Compute sum of squares and store result in a temporary variable. */
+    sum += ( in * in);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Rms and store result in destination */
+  arm_sqrt_f16(sum / (float16_t) blockSize, pResult);
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @} end of RMS group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Source/StatisticsFunctions/arm_std_f16.c
+++ b/Source/StatisticsFunctions/arm_std_f16.c
@ -0,0 +1,67 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_std_f16.c
+ * Description:  Standard deviation of the elements of a floating-point vector
+ *
+ * $Date:        18. March 2020
+ * $Revision:    V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+
+/**
+  @ingroup groupStats
+ */
+
+
+
+/**
+  @addtogroup STD
+  @{
+ */
+
+/**
+  @brief         Standard deviation of the elements of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    standard deviation value returned here
+  @return        none
+ */
+void arm_std_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult)
+{
+  float16_t var;
+  arm_var_f16(pSrc,blockSize,&var);
+  arm_sqrt_f16(var, pResult);
+}
+
+/**
+  @} end of STD group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Source/StatisticsFunctions/arm_var_f16.c
+++ b/Source/StatisticsFunctions/arm_var_f16.c
@ -0,0 +1,221 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_var_f16.c
+ * Description:  Variance of the elements of a floating-point vector
+ *
+ * $Date:        18. March 2020
+ * $Revision:    V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions_f16.h"
+
+#include <stdio.h>
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup variance
+  @{
+ */
+
+/**
+  @brief         Variance of the elements of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    variance value returned here
+  @return        none
+ */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+
+
+void arm_var_f16(
+           const float16_t * pSrc,
+                 uint32_t blockSize,
+                 float16_t * pResult)
+{
+    int32_t         blkCnt;     /* loop counters */
+    f16x8_t         vecSrc;
+    f16x8_t         sumVec = vdupq_n_f16((float16_t) 0.0);
+    float16_t       fMean;
+
+    if (blockSize <= 1U) {
+        *pResult = 0;
+        return;
+    }
+
+
+    arm_mean_f16(pSrc, blockSize, &fMean);
+
+/* 6.14 bug */
+#if defined(SDCOMP_xxx)
+#if defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100)
+    __asm volatile(
+        "   vmov.i32                     %[acc], #0 \n"
+        : [acc] "+t"(sumVec)
+        : 
+        : );
+#endif
+#endif
+
+    blkCnt = blockSize;
+    do {
+        mve_pred16_t    p = vctp16q(blkCnt);
+
+        vecSrc = vldrhq_z_f16((float16_t const *) pSrc, p);
+        /*
+         * sum lanes
+         */
+        vecSrc = vsubq_m(vuninitializedq_f16(), vecSrc, fMean, p);
+        sumVec = vfmaq_m(sumVec, vecSrc, vecSrc, p);
+
+        blkCnt -= 8;
+        pSrc += 8;
+    }
+    while (blkCnt > 0);
+    
+    /* Variance */
+    *pResult = vecAddAcrossF16Mve(sumVec) / (float16_t) (blockSize - 1.0f);
+}
+#else
+
+void arm_var_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        _Float16 sum = 0.0f;                          /* Temporary result storage */
+        _Float16 fSum = 0.0f;
+        _Float16 fMean, fValue;
+  const float16_t * pInput = pSrc;
+
+  if (blockSize <= 1U)
+  {
+    *pResult = 0;
+    return;
+  }
+
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+
+    sum += *pInput++;
+    sum += *pInput++;
+    sum += *pInput++;
+    sum += *pInput++;
+
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+
+    sum += *pInput++;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
+  fMean = sum / (float16_t) blockSize;
+
+  pInput = pSrc;
+
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    fValue = *pInput++ - fMean;
+    fSum += fValue * fValue;
+
+    fValue = *pInput++ - fMean;
+    fSum += fValue * fValue;
+
+    fValue = *pInput++ - fMean;
+    fSum += fValue * fValue;
+
+    fValue = *pInput++ - fMean;
+    fSum += fValue * fValue;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    fValue = *pInput++ - fMean;
+    fSum += fValue * fValue;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Variance */
+  *pResult = fSum / (float16_t)(blockSize - 1.0f);
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @} end of variance group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Testing/CMakeLists.txt
+++ b/Testing/CMakeLists.txt
@ -329,6 +329,7 @@ set(TESTSRC16
  Source/Tests/BasicTestsF16.cpp
  Source/Tests/ComplexTestsF16.cpp
  Source/Tests/InterpolationTestsF16.cpp
+  Source/Tests/StatsTestsF16.cpp
  Source/Tests/FIRF16.cpp
  Source/Tests/BIQUADF16.cpp
  Source/Tests/MISCF16.cpp
--- a/Testing/Include/Tests/StatsTestsF16.h
+++ b/Testing/Include/Tests/StatsTestsF16.h
@ -0,0 +1,35 @@
+#include "Test.h"
+#include "Pattern.h"
+
+#include "dsp/statistics_functions_f16.h"
+
+class StatsTestsF16:public Client::Suite
+    {
+        public:
+            StatsTestsF16(Testing::testID_t id);
+            virtual void setUp(Testing::testID_t,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr);
+            virtual void tearDown(Testing::testID_t,Client::PatternMgr *mgr);
+        private:
+            #include "StatsTestsF16_decl.h"
+            
+            Client::Pattern<float16_t> inputA;
+            Client::Pattern<float16_t> inputB;
+            Client::Pattern<int16_t> dims;
+
+            Client::LocalPattern<float16_t> output;
+            Client::LocalPattern<int16_t> index;
+            Client::LocalPattern<float16_t> tmp;
+
+            // Reference patterns are not loaded when we are in dump mode
+            Client::RefPattern<float16_t> ref;
+            Client::Pattern<int16_t> maxIndexes;
+            Client::Pattern<int16_t> minIndexes;
+
+            int nbPatterns;
+            int vecDim;
+
+            int refOffset;
+
+           
+
+    };
--- a/Testing/PatternGeneration/Stats.py
+++ b/Testing/PatternGeneration/Stats.py
@ -343,6 +343,7 @@ def generatePatterns():
    PARAMDIR = os.path.join("Parameters","DSP","Stats","Stats")
    
    configf32=Tools.Config(PATTERNDIR,PARAMDIR,"f32")
+    configf16=Tools.Config(PATTERNDIR,PARAMDIR,"f16")
    configf64=Tools.Config(PATTERNDIR,PARAMDIR,"f64")
    configq31=Tools.Config(PATTERNDIR,PARAMDIR,"q31")
    configq15=Tools.Config(PATTERNDIR,PARAMDIR,"q15")
@ -355,5 +356,7 @@ def generatePatterns():
    writeTests(configq15,1,15)
    writeTests(configq7,1,7)

+    nb=writeTests(configf16,1,16)
+
 if __name__ == '__main__':
  generatePatterns()
--- a/Testing/Patterns/DSP/Stats/StatsF16/Input1_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/Input1_f16.txt
@ -0,0 +1,602 @@
+H
+300
+// -0.181541
+0xb1cf
+// -0.169745
+0xb16f
+// 0.237867
+0x339d
+// 0.280697
+0x347e
+// 0.111340
+0x2f20
+// -0.281630
+0xb482
+// 0.426820
+0x36d4
+// -0.147220
+0xb0b6
+// -0.301289
+0xb4d2
+// 0.085008
+0x2d71
+// -0.096822
+0xae32
+// 0.118322
+0x2f93
+// 0.026424
+0x26c4
+// -0.105946
+0xaec8
+// 0.411642
+0x3696
+// 0.136448
+0x305e
+// -0.335485
+0xb55e
+// -0.202068
+0xb277
+// -0.361403
+0xb5c8
+// -0.478874
+0xb7a9
+// -0.554829
+0xb870
+// -0.725607
+0xb9ce
+// -0.397848
+0xb65e
+// 0.081577
+0x2d39
+// 0.376096
+0x3604
+// 0.619949
+0x38f6
+// 0.035635
+0x2890
+// -0.045637
+0xa9d7
+// -0.168639
+0xb165
+// 0.170281
+0x3173
+// -0.077000
+0xacee
+// 0.167346
+0x315b
+// -0.189246
+0xb20e
+// 0.327278
+0x353d
+// -0.125620
+0xb005
+// -0.302179
+0xb4d6
+// 0.324256
+0x3530
+// -0.115925
+0xaf6b
+// 0.023388
+0x25fd
+// 0.213505
+0x32d5
+// -0.249834
+0xb3ff
+// 0.230302
+0x335f
+// 0.052223
+0x2aaf
+// -0.118431
+0xaf94
+// 0.237972
+0x339d
+// 0.167228
+0x315a
+// 0.500285
+0x3801
+// 0.396042
+0x3656
+// 0.287551
+0x349a
+// 0.363907
+0x35d3
+// -0.285888
+0xb493
+// -0.298927
+0xb4c8
+// 0.384334
+0x3626
+// -0.214899
+0xb2e0
+// -0.256566
+0xb41b
+// 0.438213
+0x3703
+// -0.057463
+0xab5b
+// 0.099164
+0x2e59
+// -0.337406
+0xb566
+// -0.602355
+0xb8d2
+// -0.003762
+0x9bb4
+// -0.270555
+0xb454
+// -0.351693
+0xb5a1
+// 0.279530
+0x3479
+// -0.365779
+0xb5da
+// 0.133551
+0x3046
+// 0.059872
+0x2baa
+// -0.286309
+0xb495
+// 0.246533
+0x33e4
+// 0.171805
+0x317f
+// -0.341557
+0xb577
+// 0.024636
+0x264f
+// 0.191050
+0x321d
+// 0.093971
+0x2e04
+// -0.077312
+0xacf3
+// -0.628921
+0xb908
+// 0.057062
+0x2b4e
+// 0.004234
+0x1c56
+// 0.702554
+0x399f
+// 0.070054
+0x2c7c
+// -0.126315
+0xb00b
+// -0.114205
+0xaf4f
+// 0.174413
+0x3195
+// -0.323334
+0xb52c
+// -0.419826
+0xb6b8
+// -0.092743
+0xadf0
+// 0.023260
+0x25f4
+// 0.169239
+0x316a
+// -0.112398
+0xaf32
+// -0.530023
+0xb83d
+// 0.057968
+0x2b6b
+// 0.099520
+0x2e5f
+// 0.249493
+0x33fc
+// -0.158562
+0xb113
+// 0.442311
+0x3714
+// -0.375796
+0xb603
+// 0.262720
+0x3434
+// -0.068077
+0xac5b
+// -0.338289
+0xb56a
+// 0.158634
+0x3114
+// 0.292740
+0x34af
+// 0.255336
+0x3416
+// 0.163457
+0x313b
+// -0.642740
+0xb924
+// -0.303784
+0xb4dc
+// 0.290379
+0x34a5
+// -0.139732
+0xb079
+// -0.531862
+0xb841
+// -0.109192
+0xaefd
+// -0.051034
+0xaa88
+// -0.845953
+0xbac5
+// 0.370689
+0x35ee
+// -0.224557
+0xb330
+// 0.087760
+0x2d9e
+// 0.044607
+0x29b6
+// -0.176383
+0xb1a5
+// -0.054776
+0xab03
+// -0.340220
+0xb572
+// -0.437922
+0xb702
+// -0.336270
+0xb561
+// 0.267078
+0x3446
+// 0.069067
+0x2c6c
+// -0.058832
+0xab88
+// -0.331618
+0xb54e
+// -0.029930
+0xa7a9
+// 0.609125
+0x38df
+// -0.251916
+0xb408
+// -0.122801
+0xafdc
+// 0.387843
+0x3635
+// 0.112303
+0x2f30
+// -0.407489
+0xb685
+// 0.316717
+0x3511
+// -0.076934
+0xacec
+// 0.299806
+0x34cc
+// -0.123522
+0xafe8
+// 0.221139
+0x3314
+// -0.012030
+0xa229
+// -0.330032
+0xb548
+// 0.043511
+0x2992
+// -0.790171
+0xba52
+// 0.119824
+0x2fab
+// -0.009153
+0xa0b0
+// 0.212885
+0x32d0
+// 0.430953
+0x36e5
+// 0.169799
+0x316f
+// -0.264303
+0xb43b
+// -0.075679
+0xacd8
+// -0.062243
+0xabf8
+// -0.322429
+0xb529
+// -0.229378
+0xb357
+// 0.020636
+0x2548
+// 0.474186
+0x3796
+// -0.149192
+0xb0c6
+// 0.159253
+0x3119
+// -0.418241
+0xb6b1
+// -0.220744
+0xb310
+// -0.332685
+0xb553
+// 0.078293
+0x2d03
+// 0.284063
+0x348c
+// 0.109206
+0x2efd
+// 0.069842
+0x2c78
+// -0.054476
+0xaaf9
+// -0.018401
+0xa4b6
+// 0.077888
+0x2cfc
+// 0.219216
+0x3304
+// 0.174853
+0x3198
+// 0.218507
+0x32fe
+// 0.208197
+0x32aa
+// -0.057710
+0xab63
+// -0.231653
+0xb36a
+// -0.213055
+0xb2d1
+// -0.174800
+0xb198
+// -0.099992
+0xae66
+// 0.081162
+0x2d32
+// -0.150392
+0xb0d0
+// -0.448191
+0xb72c
+// 0.200164
+0x3268
+// 0.020702
+0x254d
+// -0.249379
+0xb3fb
+// 0.079454
+0x2d16
+// 0.215899
+0x32e9
+// 0.415424
+0x36a6
+// -0.369649
+0xb5ea
+// 0.007064
+0x1f3c
+// -0.536642
+0xb84b
+// -0.106107
+0xaeca
+// -0.074443
+0xacc4
+// 0.087497
+0x2d9a
+// 0.543513
+0x3859
+// -0.090931
+0xadd2
+// -0.197363
+0xb251
+// -0.452118
+0xb73c
+// 0.139166
+0x3074
+// 0.054687
+0x2b00
+// -0.274367
+0xb464
+// -0.153652
+0xb0eb
+// -0.458440
+0xb756
+// -0.159573
+0xb11b
+// -0.014334
+0xa357
+// -0.138766
+0xb071
+// -0.285817
+0xb493
+// 0.166074
+0x3150
+// -0.007518
+0x9fb3
+// -0.287886
+0xb49b
+// 0.040596
+0x2932
+// 0.024333
+0x263b
+// -0.711750
+0xb9b2
+// -0.111767
+0xaf27
+// 0.123774
+0x2fec
+// -0.405266
+0xb67c
+// -0.291294
+0xb4a9
+// 0.165725
+0x314e
+// -0.390727
+0xb640
+// 0.037912
+0x28da
+// 0.067118
+0x2c4c
+// -0.020696
+0xa54c
+// 0.469622
+0x3784
+// -0.268572
+0xb44c
+// -0.365714
+0xb5da
+// 0.359382
+0x35c0
+// -0.351236
+0xb59f
+// 0.003563
+0x1b4c
+// -0.055833
+0xab26
+// 0.185036
+0x31ec
+// 0.220537
+0x330f
+// 0.142509
+0x308f
+// -0.131898
+0xb039
+// -0.040169
+0xa924
+// 0.340507
+0x3573
+// -0.443301
+0xb718
+// -0.504501
+0xb809
+// -0.143797
+0xb09a
+// -0.224534
+0xb32f
+// 0.413910
+0x369f
+// -0.304571
+0xb4e0
+// -0.546099
+0xb85e
+// 0.298548
+0x34c7
+// -0.075447
+0xacd4
+// -0.397018
+0xb65a
+// -0.706257
+0xb9a6
+// -0.326503
+0xb539
+// 0.027824
+0x271f
+// -0.067521
+0xac52
+// 0.132370
+0x303c
+// -0.119409
+0xafa4
+// 0.417147
+0x36ad
+// -0.053274
+0xaad2
+// 0.215552
+0x32e6
+// 0.555113
+0x3871
+// 0.132507
+0x303d
+// -0.112059
+0xaf2c
+// -0.438044
+0xb702
+// -0.061994
+0xabef
+// 0.420391
+0x36ba
+// -0.511288
+0xb817
+// -0.169411
+0xb16c
+// 0.455896
+0x374b
+// -0.037098
+0xa8c0
+// 0.097329
+0x2e3b
+// -0.510351
+0xb815
+// 0.031878
+0x2815
+// 0.451849
+0x373b
+// -0.005921
+0x9e10
+// 0.522571
+0x382e
+// -0.795109
+0xba5c
+// -0.063013
+0xac08
+// -0.395304
+0xb653
+// 0.044986
+0x29c2
+// 0.439022
+0x3706
+// 0.151798
+0x30dc
+// 1.000000
+0x3c00
+// 0.151786
+0x30db
+// 0.096127
+0x2e27
+// -0.079173
+0xad11
+// -0.182049
+0xb1d3
+// 0.381454
+0x361a
+// 0.370677
+0x35ee
+// -0.187572
+0xb201
+// 0.183005
+0x31db
+// -0.588512
+0xb8b5
+// -0.118559
+0xaf96
+// 0.200334
+0x3269
+// 0.120477
+0x2fb6
+// 0.423841
+0x36c8
+// -0.482627
+0xb7b9
+// 0.069639
+0x2c75
+// 0.179386
+0x31be
+// -0.066133
+0xac3c
+// 0.042963
+0x2980
+// -0.106336
+0xaece
+// -0.125617
+0xb005
+// -0.464414
+0xb76e
+// 0.132390
+0x303d
+// -0.202302
+0xb279
+// -0.703952
+0xb9a2
+// -0.128242
+0xb01b
+// 0.437213
+0x36ff
+// 0.160378
+0x3122
+// -0.112012
+0xaf2b
+// -0.038690
+0xa8f4
--- a/Testing/Patterns/DSP/Stats/StatsF16/Input2_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/Input2_f16.txt
@ -0,0 +1,602 @@
+H
+300
+// 0.181541
+0x31cf
+// 0.169745
+0x316f
+// 0.237867
+0x339d
+// 0.280697
+0x347e
+// 0.111340
+0x2f20
+// 0.281630
+0x3482
+// 0.426820
+0x36d4
+// 0.147220
+0x30b6
+// 0.301289
+0x34d2
+// 0.085008
+0x2d71
+// 0.096822
+0x2e32
+// 0.118322
+0x2f93
+// 0.026424
+0x26c4
+// 0.105946
+0x2ec8
+// 0.411642
+0x3696
+// 0.136448
+0x305e
+// 0.335485
+0x355e
+// 0.202068
+0x3277
+// 0.361403
+0x35c8
+// 0.478874
+0x37a9
+// 0.554829
+0x3870
+// 0.725607
+0x39ce
+// 0.397848
+0x365e
+// 0.081577
+0x2d39
+// 0.376096
+0x3604
+// 0.619949
+0x38f6
+// 0.035635
+0x2890
+// 0.045637
+0x29d7
+// 0.168639
+0x3165
+// 0.170281
+0x3173
+// 0.077000
+0x2cee
+// 0.167346
+0x315b
+// 0.189246
+0x320e
+// 0.327278
+0x353d
+// 0.125620
+0x3005
+// 0.302179
+0x34d6
+// 0.324256
+0x3530
+// 0.115925
+0x2f6b
+// 0.023388
+0x25fd
+// 0.213505
+0x32d5
+// 0.249834
+0x33ff
+// 0.230302
+0x335f
+// 0.052223
+0x2aaf
+// 0.118431
+0x2f94
+// 0.237972
+0x339d
+// 0.167228
+0x315a
+// 0.500285
+0x3801
+// 0.396042
+0x3656
+// 0.287551
+0x349a
+// 0.363907
+0x35d3
+// 0.285888
+0x3493
+// 0.298927
+0x34c8
+// 0.384334
+0x3626
+// 0.214899
+0x32e0
+// 0.256566
+0x341b
+// 0.438213
+0x3703
+// 0.057463
+0x2b5b
+// 0.099164
+0x2e59
+// 0.337406
+0x3566
+// 0.602355
+0x38d2
+// 0.003762
+0x1bb4
+// 0.270555
+0x3454
+// 0.351693
+0x35a1
+// 0.279530
+0x3479
+// 0.365779
+0x35da
+// 0.133551
+0x3046
+// 0.059872
+0x2baa
+// 0.286309
+0x3495
+// 0.246533
+0x33e4
+// 0.171805
+0x317f
+// 0.341557
+0x3577
+// 0.024636
+0x264f
+// 0.191050
+0x321d
+// 0.093971
+0x2e04
+// 0.077312
+0x2cf3
+// 0.628921
+0x3908
+// 0.057062
+0x2b4e
+// 0.004234
+0x1c56
+// 0.702554
+0x399f
+// 0.070054
+0x2c7c
+// 0.126315
+0x300b
+// 0.114205
+0x2f4f
+// 0.174413
+0x3195
+// 0.323334
+0x352c
+// 0.419826
+0x36b8
+// 0.092743
+0x2df0
+// 0.023260
+0x25f4
+// 0.169239
+0x316a
+// 0.112398
+0x2f32
+// 0.530023
+0x383d
+// 0.057968
+0x2b6b
+// 0.099520
+0x2e5f
+// 0.249493
+0x33fc
+// 0.158562
+0x3113
+// 0.442311
+0x3714
+// 0.375796
+0x3603
+// 0.262720
+0x3434
+// 0.068077
+0x2c5b
+// 0.338289
+0x356a
+// 0.158634
+0x3114
+// 0.292740
+0x34af
+// 0.255336
+0x3416
+// 0.163457
+0x313b
+// 0.642740
+0x3924
+// 0.303784
+0x34dc
+// 0.290379
+0x34a5
+// 0.139732
+0x3079
+// 0.531862
+0x3841
+// 0.109192
+0x2efd
+// 0.051034
+0x2a88
+// 0.845953
+0x3ac5
+// 0.370689
+0x35ee
+// 0.224557
+0x3330
+// 0.087760
+0x2d9e
+// 0.044607
+0x29b6
+// 0.176383
+0x31a5
+// 0.054776
+0x2b03
+// 0.340220
+0x3572
+// 0.437922
+0x3702
+// 0.336270
+0x3561
+// 0.267078
+0x3446
+// 0.069067
+0x2c6c
+// 0.058832
+0x2b88
+// 0.331618
+0x354e
+// 0.029930
+0x27a9
+// 0.609125
+0x38df
+// 0.251916
+0x3408
+// 0.122801
+0x2fdc
+// 0.387843
+0x3635
+// 0.112303
+0x2f30
+// 0.407489
+0x3685
+// 0.316717
+0x3511
+// 0.076934
+0x2cec
+// 0.299806
+0x34cc
+// 0.123522
+0x2fe8
+// 0.221139
+0x3314
+// 0.012030
+0x2229
+// 0.330032
+0x3548
+// 0.043511
+0x2992
+// 0.790171
+0x3a52
+// 0.119824
+0x2fab
+// 0.009153
+0x20b0
+// 0.212885
+0x32d0
+// 0.430953
+0x36e5
+// 0.169799
+0x316f
+// 0.264303
+0x343b
+// 0.075679
+0x2cd8
+// 0.062243
+0x2bf8
+// 0.322429
+0x3529
+// 0.229378
+0x3357
+// 0.020636
+0x2548
+// 0.474186
+0x3796
+// 0.149192
+0x30c6
+// 0.159253
+0x3119
+// 0.418241
+0x36b1
+// 0.220744
+0x3310
+// 0.332685
+0x3553
+// 0.078293
+0x2d03
+// 0.284063
+0x348c
+// 0.109206
+0x2efd
+// 0.069842
+0x2c78
+// 0.054476
+0x2af9
+// 0.018401
+0x24b6
+// 0.077888
+0x2cfc
+// 0.219216
+0x3304
+// 0.174853
+0x3198
+// 0.218507
+0x32fe
+// 0.208197
+0x32aa
+// 0.057710
+0x2b63
+// 0.231653
+0x336a
+// 0.213055
+0x32d1
+// 0.174800
+0x3198
+// 0.099992
+0x2e66
+// 0.081162
+0x2d32
+// 0.150392
+0x30d0
+// 0.448191
+0x372c
+// 0.200164
+0x3268
+// 0.020702
+0x254d
+// 0.249379
+0x33fb
+// 0.079454
+0x2d16
+// 0.215899
+0x32e9
+// 0.415424
+0x36a6
+// 0.369649
+0x35ea
+// 0.007064
+0x1f3c
+// 0.536642
+0x384b
+// 0.106107
+0x2eca
+// 0.074443
+0x2cc4
+// 0.087497
+0x2d9a
+// 0.543513
+0x3859
+// 0.090931
+0x2dd2
+// 0.197363
+0x3251
+// 0.452118
+0x373c
+// 0.139166
+0x3074
+// 0.054687
+0x2b00
+// 0.274367
+0x3464
+// 0.153652
+0x30eb
+// 0.458440
+0x3756
+// 0.159573
+0x311b
+// 0.014334
+0x2357
+// 0.138766
+0x3071
+// 0.285817
+0x3493
+// 0.166074
+0x3150
+// 0.007518
+0x1fb3
+// 0.287886
+0x349b
+// 0.040596
+0x2932
+// 0.024333
+0x263b
+// 0.711750
+0x39b2
+// 0.111767
+0x2f27
+// 0.123774
+0x2fec
+// 0.405266
+0x367c
+// 0.291294
+0x34a9
+// 0.165725
+0x314e
+// 0.390727
+0x3640
+// 0.037912
+0x28da
+// 0.067118
+0x2c4c
+// 0.020696
+0x254c
+// 0.469622
+0x3784
+// 0.268572
+0x344c
+// 0.365714
+0x35da
+// 0.359382
+0x35c0
+// 0.351236
+0x359f
+// 0.003563
+0x1b4c
+// 0.055833
+0x2b26
+// 0.185036
+0x31ec
+// 0.220537
+0x330f
+// 0.142509
+0x308f
+// 0.131898
+0x3039
+// 0.040169
+0x2924
+// 0.340507
+0x3573
+// 0.443301
+0x3718
+// 0.504501
+0x3809
+// 0.143797
+0x309a
+// 0.224534
+0x332f
+// 0.413910
+0x369f
+// 0.304571
+0x34e0
+// 0.546099
+0x385e
+// 0.298548
+0x34c7
+// 0.075447
+0x2cd4
+// 0.397018
+0x365a
+// 0.706257
+0x39a6
+// 0.326503
+0x3539
+// 0.027824
+0x271f
+// 0.067521
+0x2c52
+// 0.132370
+0x303c
+// 0.119409
+0x2fa4
+// 0.417147
+0x36ad
+// 0.053274
+0x2ad2
+// 0.215552
+0x32e6
+// 0.555113
+0x3871
+// 0.132507
+0x303d
+// 0.112059
+0x2f2c
+// 0.438044
+0x3702
+// 0.061994
+0x2bef
+// 0.420391
+0x36ba
+// 0.511288
+0x3817
+// 0.169411
+0x316c
+// 0.455896
+0x374b
+// 0.037098
+0x28c0
+// 0.097329
+0x2e3b
+// 0.510351
+0x3815
+// 0.031878
+0x2815
+// 0.451849
+0x373b
+// 0.005921
+0x1e10
+// 0.522571
+0x382e
+// 0.795109
+0x3a5c
+// 0.063013
+0x2c08
+// 0.395304
+0x3653
+// 0.044986
+0x29c2
+// 0.439022
+0x3706
+// 0.151798
+0x30dc
+// 1.000000
+0x3c00
+// 0.151786
+0x30db
+// 0.096127
+0x2e27
+// 0.079173
+0x2d11
+// 0.182049
+0x31d3
+// 0.381454
+0x361a
+// 0.370677
+0x35ee
+// 0.187572
+0x3201
+// 0.183005
+0x31db
+// 0.588512
+0x38b5
+// 0.118559
+0x2f96
+// 0.200334
+0x3269
+// 0.120477
+0x2fb6
+// 0.423841
+0x36c8
+// 0.482627
+0x37b9
+// 0.069639
+0x2c75
+// 0.179386
+0x31be
+// 0.066133
+0x2c3c
+// 0.042963
+0x2980
+// 0.106336
+0x2ece
+// 0.125617
+0x3005
+// 0.464414
+0x376e
+// 0.132390
+0x303d
+// 0.202302
+0x3279
+// 0.703952
+0x39a2
+// 0.128242
+0x301b
+// 0.437213
+0x36ff
+// 0.160378
+0x3122
+// 0.112012
+0x2f2b
+// 0.038690
+0x28f4
--- a/Testing/Patterns/DSP/Stats/StatsF16/MaxIndexes1_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/MaxIndexes1_s16.txt
@ -0,0 +1,8 @@
+H
+3
+// 6
+0x0006
+// 6
+0x0006
+// 6
+0x0006
--- a/Testing/Patterns/DSP/Stats/StatsF16/MaxVals1_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/MaxVals1_f16.txt
@ -0,0 +1,8 @@
+H
+3
+// 0.426820
+0x36d4
+// 0.426820
+0x36d4
+// 0.426820
+0x36d4
--- a/Testing/Patterns/DSP/Stats/StatsF16/MeanVals2_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/MeanVals2_f16.txt
@ -0,0 +1,10 @@
+H
+4
+// 0.241377
+0x33b9
+// 0.194923
+0x323d
+// 0.268473
+0x344c
+// 0.237753
+0x339c
--- a/Testing/Patterns/DSP/Stats/StatsF16/MinIndexes3_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/MinIndexes3_s16.txt
@ -0,0 +1,8 @@
+H
+3
+// 5
+0x0005
+// 8
+0x0008
+// 21
+0x0015
--- a/Testing/Patterns/DSP/Stats/StatsF16/MinVals3_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/MinVals3_f16.txt
@ -0,0 +1,8 @@
+H
+3
+// -0.281630
+0xb482
+// -0.301289
+0xb4d2
+// -0.725607
+0xb9ce
--- a/Testing/Patterns/DSP/Stats/StatsF16/PowerVals4_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/PowerVals4_f16.txt
@ -0,0 +1,8 @@
+H
+3
+// 0.471030
+0x3789
+// 0.814069
+0x3a83
+// 2.320008
+0x40a4
--- a/Testing/Patterns/DSP/Stats/StatsF16/RmsVals5_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/RmsVals5_f16.txt
@ -0,0 +1,10 @@
+H
+4
+// 0.259403
+0x3427
+// 0.225564
+0x3338
+// 0.317600
+0x3515
+// 0.288172
+0x349c
--- a/Testing/Patterns/DSP/Stats/StatsF16/StdVals6_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/StdVals6_f16.txt
@ -0,0 +1,10 @@
+H
+4
+// 0.272449
+0x345c
+// 0.230237
+0x335e
+// 0.305035
+0x34e1
+// 0.289078
+0x34a0
--- a/Testing/Patterns/DSP/Stats/StatsF16/VarVals7_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/VarVals7_f16.txt
@ -0,0 +1,10 @@
+H
+4
+// 0.074228
+0x2cc0
+// 0.053009
+0x2ac9
+// 0.093046
+0x2df4
+// 0.083566
+0x2d59
--- a/Testing/Source/Tests/StatsTestsF16.cpp
+++ b/Testing/Source/Tests/StatsTestsF16.cpp
@ -0,0 +1,749 @@
+#include "StatsTestsF16.h"
+#include <stdio.h>
+#include "Error.h"
+#include "Test.h"
+
+
+#define SNR_THRESHOLD 50
+/* 
+
+Reference patterns are generated with
+a double precision computation.
+
+*/
+#define REL_ERROR (3.0e-3)
+
+    void StatsTestsF16::test_max_f16()
+    {
+        const float16_t *inp  = inputA.ptr();
+
+        float16_t result;
+        uint32_t  indexval;
+
+        float16_t *refp  = ref.ptr();
+        int16_t  *refind = maxIndexes.ptr();
+
+        float16_t *outp  = output.ptr();
+        int16_t  *ind    = index.ptr();
+
+        arm_max_f16(inp,
+              inputA.nbSamples(),
+              &result,
+              &indexval);
+
+        outp[0] = result;
+        ind[0] = indexval;
+
+        ASSERT_EQ(result,refp[this->refOffset]);
+        ASSERT_EQ((int16_t)indexval,refind[this->refOffset]);
+
+    }
+
+#if 0
+    void StatsTestsF16::test_max_no_idx_f16()
+    {
+        const float16_t *inp  = inputA.ptr();
+
+        float16_t result;
+
+        float16_t *refp  = ref.ptr();
+
+        float16_t *outp  = output.ptr();
+
+        arm_max_no_idx_f16(inp,
+              inputA.nbSamples(),
+              &result);
+
+        outp[0] = result;
+
+        ASSERT_EQ(result,refp[this->refOffset]);
+
+    }
+#endif 
+
+    void StatsTestsF16::test_min_f16()
+    {
+        const float16_t *inp  = inputA.ptr();
+
+        float16_t result;
+        uint32_t  indexval;
+
+        float16_t *refp  = ref.ptr();
+        int16_t  *refind = minIndexes.ptr();
+
+        float16_t *outp  = output.ptr();
+        int16_t  *ind    = index.ptr();
+
+        arm_min_f16(inp,
+              inputA.nbSamples(),
+              &result,
+              &indexval);
+
+        outp[0] = result;
+        ind[0] = indexval;
+
+        ASSERT_EQ(result,refp[this->refOffset]);
+        ASSERT_EQ((int16_t)indexval,refind[this->refOffset]);
+
+    }
+
+    void StatsTestsF16::test_mean_f16()
+    {
+        const float16_t *inp  = inputA.ptr();
+
+        float16_t result;
+
+        float16_t *refp  = ref.ptr();
+
+        float16_t *outp  = output.ptr();
+
+        arm_mean_f16(inp,
+              inputA.nbSamples(),
+              &result);
+
+        outp[0] = result;
+
+        ASSERT_SNR(result,refp[this->refOffset],(float16_t)SNR_THRESHOLD);
+
+        ASSERT_REL_ERROR(result,refp[this->refOffset],REL_ERROR);
+
+    }
+
+    void StatsTestsF16::test_power_f16()
+    {
+        const float16_t *inp  = inputA.ptr();
+
+        float16_t result;
+
+        float16_t *refp  = ref.ptr();
+
+        float16_t *outp  = output.ptr();
+
+        arm_power_f16(inp,
+              inputA.nbSamples(),
+              &result);
+
+        outp[0] = result;
+
+        ASSERT_SNR(result,refp[this->refOffset],(float16_t)SNR_THRESHOLD);
+
+        ASSERT_REL_ERROR(result,refp[this->refOffset],REL_ERROR);
+
+    }
+
+    void StatsTestsF16::test_rms_f16()
+    {
+        const float16_t *inp  = inputA.ptr();
+
+        float16_t result;
+
+        float16_t *refp  = ref.ptr();
+
+        float16_t *outp  = output.ptr();
+
+        arm_rms_f16(inp,
+              inputA.nbSamples(),
+              &result);
+
+        outp[0] = result;
+
+        ASSERT_SNR(result,refp[this->refOffset],(float16_t)SNR_THRESHOLD);
+
+        ASSERT_REL_ERROR(result,refp[this->refOffset],REL_ERROR);
+
+    }
+
+    void StatsTestsF16::test_std_f16()
+    {
+        const float16_t *inp  = inputA.ptr();
+
+        float16_t result;
+
+        float16_t *refp  = ref.ptr();
+
+        float16_t *outp  = output.ptr();
+
+        arm_std_f16(inp,
+              inputA.nbSamples(),
+              &result);
+
+        outp[0] = result;
+
+        ASSERT_SNR(result,refp[this->refOffset],(float16_t)SNR_THRESHOLD);
+
+        ASSERT_REL_ERROR(result,refp[this->refOffset],REL_ERROR);
+
+    }
+
+    void StatsTestsF16::test_var_f16()
+    {
+        const float16_t *inp  = inputA.ptr();
+
+        float16_t result;
+
+        float16_t *refp  = ref.ptr();
+
+        float16_t *outp  = output.ptr();
+
+        arm_var_f16(inp,
+              inputA.nbSamples(),
+              &result);
+
+        outp[0] = result;
+
+        ASSERT_SNR(result,refp[this->refOffset],(float16_t)SNR_THRESHOLD);
+
+        ASSERT_REL_ERROR(result,refp[this->refOffset],REL_ERROR);
+
+    }
+
+
+    void StatsTestsF16::test_std_stability_f16()
+    {
+      /*
+
+      With the textbook algorithm, those values will produce a negative
+      value for the variance.
+
+      The CMSIS-DSP variance algorithm is the two pass one so will work
+      with those values.
+
+      So, it should be possible to compute the square root for the standard
+      deviation.
+
+      */
+      float16_t in[4]={4.0f, 7.0f, 13.0f, 16.0f};
+      float16_t result;
+      int i;
+
+      /*
+
+      Add bigger offset so that average is much bigger than standard deviation.
+
+      */
+      for(i=0 ; i < 4; i++)
+      {
+        in[i] += 3.0e3f;
+      }
+
+      arm_std_f16(in,4,&result);
+
+      /*
+
+      If variance is giving a negative value, the square root
+      should return zero.
+
+      We check it is not happening here.
+
+
+      */
+      ASSERT_TRUE(fabs(5.47723f - result) < 0.32f);
+
+    }
+
+#if 0
+    void StatsTestsF16::test_entropy_f16()
+    {
+      const float16_t *inp  = inputA.ptr();
+      const int16_t *dimsp  = dims.ptr();
+
+      float16_t *outp         = output.ptr();
+
+      for(int i=0;i < this->nbPatterns; i++)
+      {
+         *outp = arm_entropy_f16(inp,dimsp[i+1]);
+         outp++;
+         inp += dimsp[i+1];
+      }
+
+      ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD);
+
+      ASSERT_REL_ERROR(ref,output,REL_ERROR);
+
+    } 
+
+    void StatsTestsF16::test_logsumexp_f16()
+    {
+      const float16_t *inp  = inputA.ptr();
+      const int16_t *dimsp  = dims.ptr();
+
+      float16_t *outp         = output.ptr();
+
+      for(int i=0;i < this->nbPatterns; i++)
+      {
+         *outp = arm_logsumexp_f16(inp,dimsp[i+1]);
+         outp++;
+         inp += dimsp[i+1];
+      }
+
+      ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD);
+
+      ASSERT_REL_ERROR(ref,output,REL_ERROR);
+    } 
+
+
+    void StatsTestsF16::test_kullback_leibler_f16()
+    {
+      const float16_t *inpA  = inputA.ptr();
+      const float16_t *inpB  = inputB.ptr();
+      const int16_t *dimsp  = dims.ptr();
+
+      float16_t *outp         = output.ptr();
+
+      for(int i=0;i < this->nbPatterns; i++)
+      {
+         *outp = arm_kullback_leibler_f16(inpA,inpB,dimsp[i+1]);
+         outp++;
+         inpA += dimsp[i+1];
+         inpB += dimsp[i+1];
+      }
+
+      ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD);
+
+      ASSERT_REL_ERROR(ref,output,REL_ERROR);
+    } 
+
+    void StatsTestsF16::test_logsumexp_dot_prod_f16()
+    {
+      const float16_t *inpA  = inputA.ptr();
+      const float16_t *inpB  = inputB.ptr();
+      const int16_t *dimsp  = dims.ptr();
+
+      float16_t *outp         = output.ptr();
+      float16_t *tmpp         = tmp.ptr();
+
+      for(int i=0;i < this->nbPatterns; i++)
+      {
+         *outp = arm_logsumexp_dot_prod_f16(inpA,inpB,dimsp[i+1],tmpp);
+         outp++;
+         inpA += dimsp[i+1];
+         inpB += dimsp[i+1];
+      }
+
+      ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD);
+
+      ASSERT_REL_ERROR(ref,output,REL_ERROR);
+    } 
+
+  #endif
+  
+    void StatsTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
+    {
+        (void)paramsArgs;
+        switch(id)
+        {
+            case StatsTestsF16::TEST_MAX_F16_1:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,7);
+              
+               maxIndexes.reload(StatsTestsF16::MAXINDEXES_S16_ID,mgr);
+               ref.reload(StatsTestsF16::MAXVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+               index.create(1,StatsTestsF16::OUT_S16_ID,mgr);
+
+               refOffset = 0;
+            }
+            break;
+
+            case StatsTestsF16::TEST_MAX_F16_2:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,16);
+              
+               maxIndexes.reload(StatsTestsF16::MAXINDEXES_S16_ID,mgr);
+               ref.reload(StatsTestsF16::MAXVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+               index.create(1,StatsTestsF16::OUT_S16_ID,mgr);
+
+               refOffset = 1;
+            }
+            break;
+
+            case StatsTestsF16::TEST_MAX_F16_3:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,23);
+              
+               maxIndexes.reload(StatsTestsF16::MAXINDEXES_S16_ID,mgr);
+               ref.reload(StatsTestsF16::MAXVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+               index.create(1,StatsTestsF16::OUT_S16_ID,mgr);
+
+               refOffset = 2;
+            }
+            break;
+
+            case StatsTestsF16::TEST_MEAN_F16_4:
+            {
+               inputA.reload(StatsTestsF16::INPUT2_F16_ID,mgr,7);
+              
+               ref.reload(StatsTestsF16::MEANVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 0;
+            }
+            break;
+
+            case StatsTestsF16::TEST_MEAN_F16_5:
+            {
+               inputA.reload(StatsTestsF16::INPUT2_F16_ID,mgr,16);
+              
+               ref.reload(StatsTestsF16::MEANVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 1;
+            }
+            break;
+
+            case StatsTestsF16::TEST_MEAN_F16_6:
+            {
+               inputA.reload(StatsTestsF16::INPUT2_F16_ID,mgr,23);
+              
+               ref.reload(StatsTestsF16::MEANVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 2;
+            }
+            break;
+
+            case StatsTestsF16::TEST_MIN_F16_7:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,7);
+              
+               minIndexes.reload(StatsTestsF16::MININDEXES_S16_ID,mgr);
+               ref.reload(StatsTestsF16::MINVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+               index.create(1,StatsTestsF16::OUT_S16_ID,mgr);
+
+               refOffset = 0;
+            }
+            break;
+
+            case StatsTestsF16::TEST_MIN_F16_8:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,16);
+              
+               minIndexes.reload(StatsTestsF16::MININDEXES_S16_ID,mgr);
+               ref.reload(StatsTestsF16::MINVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+               index.create(1,StatsTestsF16::OUT_S16_ID,mgr);
+
+               refOffset = 1;
+            }
+            break;
+
+            case StatsTestsF16::TEST_MIN_F16_9:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,23);
+              
+               minIndexes.reload(StatsTestsF16::MININDEXES_S16_ID,mgr);
+               ref.reload(StatsTestsF16::MINVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+               index.create(1,StatsTestsF16::OUT_S16_ID,mgr);
+
+               refOffset = 2;
+            }
+            break;
+
+            case StatsTestsF16::TEST_POWER_F16_10:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,7);
+              
+               ref.reload(StatsTestsF16::POWERVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 0;
+            }
+            break;
+
+            case StatsTestsF16::TEST_POWER_F16_11:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,16);
+              
+               ref.reload(StatsTestsF16::POWERVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 1;
+            }
+            break;
+
+            case StatsTestsF16::TEST_POWER_F16_12:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,23);
+              
+               ref.reload(StatsTestsF16::POWERVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 2;
+            }
+            break;
+
+            case StatsTestsF16::TEST_RMS_F16_13:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,7);
+              
+               ref.reload(StatsTestsF16::RMSVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 0;
+            }
+            break;
+
+            case StatsTestsF16::TEST_RMS_F16_14:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,16);
+              
+               ref.reload(StatsTestsF16::RMSVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 1;
+            }
+            break;
+
+            case StatsTestsF16::TEST_RMS_F16_15:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,23);
+              
+               ref.reload(StatsTestsF16::RMSVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 2;
+            }
+            break;
+
+            case StatsTestsF16::TEST_STD_F16_16:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,7);
+              
+               ref.reload(StatsTestsF16::STDVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 0;
+            }
+            break;
+
+            case StatsTestsF16::TEST_STD_F16_17:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,16);
+              
+               ref.reload(StatsTestsF16::STDVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 1;
+            }
+            break;
+
+            case StatsTestsF16::TEST_STD_F16_18:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,23);
+              
+               ref.reload(StatsTestsF16::STDVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 2;
+            }
+            break;
+
+            case StatsTestsF16::TEST_VAR_F16_19:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,7);
+              
+               ref.reload(StatsTestsF16::VARVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 0;
+            }
+            break;
+
+            case StatsTestsF16::TEST_VAR_F16_20:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,16);
+              
+               ref.reload(StatsTestsF16::VARVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 1;
+            }
+            break;
+
+            case StatsTestsF16::TEST_VAR_F16_21:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,23);
+              
+               ref.reload(StatsTestsF16::VARVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 2;
+            }
+            break;
+#if 0
+            case StatsTestsF16::TEST_ENTROPY_F16_22:
+            {
+               inputA.reload(StatsTestsF16::INPUT22_F16_ID,mgr);
+               dims.reload(StatsTestsF16::DIM22_S16_ID,mgr);
+               ref.reload(StatsTestsF16::REF22_ENTROPY_F16_ID,mgr);
+               output.create(ref.nbSamples(),StatsTestsF16::OUT_F16_ID,mgr);
+
+               const int16_t *dimsp  = dims.ptr();
+               this->nbPatterns=dimsp[0];
+            }
+            break;
+
+            case StatsTestsF16::TEST_LOGSUMEXP_F16_23:
+            {
+               inputA.reload(StatsTestsF16::INPUT23_F16_ID,mgr);
+               dims.reload(StatsTestsF16::DIM23_S16_ID,mgr);
+               ref.reload(StatsTestsF16::REF23_LOGSUMEXP_F16_ID,mgr);
+               output.create(ref.nbSamples(),StatsTestsF16::OUT_F16_ID,mgr);
+
+               const int16_t *dimsp  = dims.ptr();
+               this->nbPatterns=dimsp[0];
+            }
+            break;
+
+            case StatsTestsF16::TEST_KULLBACK_LEIBLER_F16_24:
+            {
+               inputA.reload(StatsTestsF16::INPUTA24_F16_ID,mgr);
+               inputB.reload(StatsTestsF16::INPUTB24_F16_ID,mgr);
+               dims.reload(StatsTestsF16::DIM24_S16_ID,mgr);
+               ref.reload(StatsTestsF16::REF24_KL_F16_ID,mgr);
+               output.create(ref.nbSamples(),StatsTestsF16::OUT_F16_ID,mgr);
+
+               const int16_t *dimsp  = dims.ptr();
+               this->nbPatterns=dimsp[0];
+            }
+            break;
+
+            case StatsTestsF16::TEST_LOGSUMEXP_DOT_PROD_F16_25:
+            {
+               inputA.reload(StatsTestsF16::INPUTA25_F16_ID,mgr);
+               inputB.reload(StatsTestsF16::INPUTB25_F16_ID,mgr);
+               dims.reload(StatsTestsF16::DIM25_S16_ID,mgr);
+               ref.reload(StatsTestsF16::REF25_LOGSUMEXP_DOT_F16_ID,mgr);
+               output.create(ref.nbSamples(),StatsTestsF16::OUT_F16_ID,mgr);
+
+               const int16_t *dimsp  = dims.ptr();
+               this->nbPatterns=dimsp[0];
+
+               /* 12 is max vecDim as defined in Python script generating the data */
+               tmp.create(12,StatsTestsF16::TMP_F16_ID,mgr);
+            }
+            break;
+
+            case StatsTestsF16::TEST_MAX_NO_IDX_F16_26:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,3);
+              
+               ref.reload(StatsTestsF16::MAXVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 0;
+            }
+            break;
+
+            case StatsTestsF16::TEST_MAX_NO_IDX_F16_27:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,8);
+              
+               ref.reload(StatsTestsF16::MAXVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 1;
+            }
+            break;
+
+            case StatsTestsF16::TEST_MAX_NO_IDX_F16_28:
+            {
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,11);
+              
+               ref.reload(StatsTestsF16::MAXVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 2;
+            }
+            break;
+#endif
+            case TEST_MEAN_F16_29:
+               inputA.reload(StatsTestsF16::INPUT2_F16_ID,mgr,100);
+              
+               ref.reload(StatsTestsF16::MEANVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 3;
+            break;
+
+            case TEST_RMS_F16_30:
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,100);
+              
+               ref.reload(StatsTestsF16::RMSVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 3;
+            break;
+
+            case TEST_STD_F16_31:
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,100);
+              
+               ref.reload(StatsTestsF16::STDVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 3;
+            break;
+
+            case TEST_VAR_F16_32:
+               inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,100);
+              
+               ref.reload(StatsTestsF16::VARVALS_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 3;
+            break;
+        }
+        
+    }
+
+    void StatsTestsF16::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)
+    {
+      (void)id;
+      switch(id)
+      {
+            case StatsTestsF16::TEST_MAX_F16_1:
+            case StatsTestsF16::TEST_MAX_F16_2:
+            case StatsTestsF16::TEST_MAX_F16_3:
+            case StatsTestsF16::TEST_MIN_F16_7:
+            case StatsTestsF16::TEST_MIN_F16_8:
+            case StatsTestsF16::TEST_MIN_F16_9:
+              index.dump(mgr);
+              output.dump(mgr);
+            break;
+
+            default:
+              output.dump(mgr);
+      }
+    }
--- a/Testing/desc_f16.txt
+++ b/Testing/desc_f16.txt
@ -5,6 +5,97 @@ group Root {
        class = DSPTests
        folder = DSP

+        group Statistics Tests {
+           class = StatsTests
+           folder = Stats
+
+           suite Statistics Tests F16 {
+              class = StatsTestsF16
+              folder = StatsF16
+
+              Pattern INPUT1_F16_ID : Input1_f16.txt 
+              Pattern INPUT2_F16_ID : Input2_f16.txt 
+              Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt
+              Pattern MAXVALS_F16_ID : MaxVals1_f16.txt
+              Pattern MEANVALS_F16_ID : MeanVals2_f16.txt
+              Pattern MININDEXES_S16_ID : MinIndexes3_s16.txt
+              Pattern MINVALS_F16_ID : MinVals3_f16.txt
+              Pattern POWERVALS_F16_ID : PowerVals4_f16.txt
+              Pattern RMSVALS_F16_ID : RmsVals5_f16.txt
+              Pattern STDVALS_F16_ID : StdVals6_f16.txt
+              Pattern VARVALS_F16_ID : VarVals7_f16.txt
+
+              //Pattern INPUT22_F16_ID : Input22_f16.txt 
+              //Pattern DIM22_S16_ID : Dims22_s16.txt 
+              //Pattern REF22_ENTROPY_F16_ID : RefEntropy22_f16.txt
+//
+              //Pattern INPUT23_F16_ID : Input23_f16.txt 
+              //Pattern DIM23_S16_ID : Dims23_s16.txt 
+              //Pattern REF23_LOGSUMEXP_F16_ID : RefLogSumExp23_f16.txt
+//
+              //Pattern INPUTA24_F16_ID : InputA24_f16.txt 
+              //Pattern INPUTB24_F16_ID : InputB24_f16.txt
+              //Pattern DIM24_S16_ID : Dims24_s16.txt 
+              //Pattern REF24_KL_F16_ID : RefKL24_f16.txt
+//
+              //Pattern INPUTA25_F16_ID : InputA25_f16.txt 
+              //Pattern INPUTB25_F16_ID : InputB25_f16.txt
+              //Pattern DIM25_S16_ID : Dims25_s16.txt 
+              //Pattern REF25_LOGSUMEXP_DOT_F16_ID : RefLogSumExpDot25_f16.txt
+
+              Output  OUT_F16_ID : Output
+              Output  OUT_S16_ID : Index
+              Output  TMP_F16_ID : Temp
+
+              Functions {
+                Test nb=7    arm_max_f16:test_max_f16
+                Test nb=8n   arm_max_f16:test_max_f16
+                Test nb=8n+1 arm_max_f16:test_max_f16
+
+                Test nb=7    arm_mean_f16:test_mean_f16
+                Test nb=8n   arm_mean_f16:test_mean_f16
+                Test nb=8n+1 arm_mean_f16:test_mean_f16
+
+                Test nb=7    arm_min_f16:test_min_f16
+                Test nb=8n   arm_min_f16:test_min_f16
+                Test nb=8n+1 arm_min_f16:test_min_f16
+
+                Test nb=7    arm_power_f16:test_power_f16
+                Test nb=8n   arm_power_f16:test_power_f16
+                Test nb=8n+1 arm_power_f16:test_power_f16
+
+                Test nb=7    arm_rms_f16:test_rms_f16
+                Test nb=8n   arm_rms_f16:test_rms_f16
+                Test nb=8n+1 arm_rms_f16:test_rms_f16
+
+                Test nb=7    arm_std_f16:test_std_f16
+                Test nb=8n   arm_std_f16:test_std_f16
+                Test nb=8n+1 arm_std_f16:test_std_f16
+
+                Test nb=7    arm_var_f16:test_var_f16
+                Test nb=8n   arm_var_f16:test_var_f16
+                Test nb=8n+1 arm_var_f16:test_var_f16
+
+                disabled{arm_entropy_f16:test_entropy_f16}
+                disabled{arm_logsumexp_f16:test_logsumexp_f16}
+                disabled{arm_kullback_leibler_f16:test_kullback_leibler_f16}
+                disabled{arm_logsumexp_dot_prod_f16:test_logsumexp_dot_prod_f16}
+
+                disabled{Test nb=7    arm_max_no_idx_f16:test_max_no_idx_f16}
+                disabled{Test nb=8n   arm_max_no_idx_f16:test_max_no_idx_f16}
+                disabled{Test nb=8n+1 arm_max_no_idx_f16:test_max_no_idx_f16}
+
+                Test long  arm_mean_f16:test_mean_f16
+                Test long  arm_rms_f16:test_rms_f16
+                Test long  arm_std_f16:test_std_f16
+                Test long  arm_var_f16:test_var_f16
+
+                Test stability  arm_std_f16:test_std_stability_f16
+              }
+           }
+        }
+
+
        group Interpolation Tests{
          class = InterpolationTests
          folder = Interpolation