CMSIS-DSP: Added arm_fir_f16

Improved f16 comlex dot product Correction to compile flags to FFT tables for MVE version.
6 years ago · 7c55ae80f7
parent ebf9104c4e
commit 7c55ae80f7
23 changed files with 5918 additions and 15 deletions
--- a/Include/dsp/filtering_functions_f16.h
+++ b/Include/dsp/filtering_functions_f16.h
@ -26,12 +26,59 @@
 #ifndef _FILTERING_FUNCTIONS_F16_H_
 #define _FILTERING_FUNCTIONS_F16_H_

+#include "arm_math_types_f16.h"
+#include "arm_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+
 #ifdef   __cplusplus
 extern "C"
 {
 #endif

 #if defined(ARM_FLOAT16_SUPPORTED)
+
+ /**
+   * @brief Instance structure for the floating-point FIR filter.
+   */
+  typedef struct
+  {
+          uint16_t numTaps;     /**< number of filter coefficients in the filter. */
+          float16_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    const float16_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
+  } arm_fir_instance_f16;
+
+  /**
+   * @brief  Initialization function for the floating-point FIR filter.
+   * @param[in,out] S          points to an instance of the floating-point FIR filter structure.
+   * @param[in]     numTaps    Number of filter coefficients in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     blockSize  number of samples that are processed at a time.
+   */
+  void arm_fir_init_f16(
+        arm_fir_instance_f16 * S,
+        uint16_t numTaps,
+  const float16_t * pCoeffs,
+        float16_t * pState,
+        uint32_t blockSize);
+
+  /**
+   * @brief Processing function for the floating-point FIR filter.
+   * @param[in]  S          points to an instance of the floating-point FIR structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
+   */
+  void arm_fir_f16(
+  const arm_fir_instance_f16 * S,
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
 #ifdef   __cplusplus
 }
--- a/Platforms/FVP/ARMv81MML/system_ARMv81MML.c
+++ b/Platforms/FVP/ARMv81MML/system_ARMv81MML.c
@ -299,6 +299,10 @@ void SystemInit (void)
  SCB->CCR |= SCB_CCR_UNALIGN_TRP_Msk;
 #endif

+  unsigned int  fpscr =__get_FPSCR();
+  fpscr = fpscr & (~FPU_FPDSCR_AHP_Msk);
+  __set_FPSCR(fpscr);
+

  // enable DL branch cache
  CCR |= CCR_DL;
--- a/Platforms/IPSS/ARMv81MML/system_ARMv81MML.c
+++ b/Platforms/IPSS/ARMv81MML/system_ARMv81MML.c
@ -299,6 +299,9 @@ void SystemInit (void)
  SCB->CCR |= SCB_CCR_UNALIGN_TRP_Msk;
 #endif

+  unsigned int  fpscr =__get_FPSCR();
+  fpscr = fpscr & (~FPU_FPDSCR_AHP_Msk);
+  __set_FPSCR(fpscr);

  // enable DL branch cache
  CCR |= CCR_DL;
--- a/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f16.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f16.c
@ -143,8 +143,8 @@ void arm_cmplx_dot_prod_f16(
        float16_t * imagResult)
 {
        uint32_t blkCnt;                               /* Loop counter */
-        float16_t real_sum = 0.0f, imag_sum = 0.0f;    /* Temporary result variables */
-        float16_t a0,b0,c0,d0;
+        _Float16 real_sum = 0.0f, imag_sum = 0.0f;    /* Temporary result variables */
+        _Float16 a0,b0,c0,d0;

 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)

--- a/Source/FilteringFunctions/CMakeLists.txt
+++ b/Source/FilteringFunctions/CMakeLists.txt
@ -123,6 +123,10 @@ target_sources(CMSISDSPFiltering PRIVATE arm_lms_norm_q31.c)
 target_sources(CMSISDSPFiltering PRIVATE arm_lms_q15.c)
 target_sources(CMSISDSPFiltering PRIVATE arm_lms_q31.c)

+if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
+target_sources(CMSISDSPFiltering PRIVATE arm_fir_f16.c)
+target_sources(CMSISDSPFiltering PRIVATE arm_fir_init_f16.c)
+endif()

 ### Includes
 target_include_directories(CMSISDSPFiltering PUBLIC "${DSP}/Include")
--- a/Source/FilteringFunctions/FilteringFunctionsF16.c
+++ b/Source/FilteringFunctions/FilteringFunctionsF16.c
@ -0,0 +1,28 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        FilteringFunctions.c
+ * Description:  Combination of all filtering function f16 source files.
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_fir_f16.c"
+#include "arm_fir_init_f16.c"
--- a/Source/FilteringFunctions/arm_fir_f16.c
+++ b/Source/FilteringFunctions/arm_fir_f16.c
@ -0,0 +1,852 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_fir_f16.c
+ * Description:  Floating-point FIR filter processing function
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/filtering_functions_f16.h"
+
+/**
+  @ingroup groupFilters
+ */
+
+
+/**
+  @addtogroup FIR
+  @{
+ */
+
+/**
+  @brief         Processing function for floating-point FIR filter.
+  @param[in]     S          points to an instance of the floating-point FIR filter structure
+  @param[in]     pSrc       points to the block of input data
+  @param[out]    pDst       points to the block of output data
+  @param[in]     blockSize  number of samples to process
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#define FIR_F16_CORE(pSamples, c, NB_TAPS)                                 \
+        vecAcc0 = vdupq_n_f16(0.0f16);                                     \
+        for (int i = 0; i < NB_TAPS; i++) {                                \
+            vecIn0 = vld1q(&pSamples[i]);                                  \
+            vecAcc0 = vfmaq(vecAcc0, vecIn0, c[i]);                        \
+        }
+
+static void arm_fir_f16_1_4_mve(const arm_fir_instance_f16 * S, const float16_t * pSrc, float16_t * pDst, uint32_t blockSize)
+{
+    float16_t      *pState = S->pState;     /* State pointer */
+    const float16_t *pCoeffs = S->pCoeffs;  /* Coefficient pointer */
+    float16_t      *pStateCur;              /* Points to the current sample of the state */
+    const float16_t *pSamples;              /* Temporary pointer to the sample buffer */
+    float16_t      *pOutput;                /* Temporary pointer to the output buffer */
+    const float16_t *pTempSrc;              /* Temporary pointer to the source data */
+    float16_t      *pTempDest;              /* Temporary pointer to the destination buffer */
+    uint32_t        numTaps = S->numTaps;   /* Number of filter coefficients in the filter */
+    int32_t         blkCnt;
+    f16x8_t         vecIn0;
+    f16x8_t         vecAcc0;
+    const int       NB_TAPS=4;
+    float16_t       c[NB_TAPS];
+
+
+    /*
+     * pState points to state array which contains previous frame (numTaps - 1) samples
+     * pStateCur points to the location where the new input data should be written
+     */
+    pStateCur = &(pState[(numTaps - 1u)]);
+    /*
+     * Copy new data into state so that we obtain a continuous sample buffer
+     * containing both the tail end of the old data and the new data.
+     */
+    pSamples = pState;
+    pTempSrc = pSrc;
+    pOutput = pDst;
+
+    for (int i = 0; i < NB_TAPS; i++)
+        c[i] = pCoeffs[i];
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0) {
+        /*
+         * Save 8 input samples in the history buffer
+         */
+        vst1q(pStateCur, vld1q(pTempSrc));
+        pStateCur += 8;
+        pTempSrc += 8;
+
+        FIR_F16_CORE(pSamples, c, NB_TAPS);
+
+        vst1q(pOutput, vecAcc0);
+
+        pOutput += 8;
+        pSamples += 8;
+
+        blkCnt--;
+    }
+
+    blkCnt = blockSize & 7;
+    {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+
+        vst1q(pStateCur, vld1q(pTempSrc));
+        pStateCur += 8;
+        pTempSrc += 8;
+
+        FIR_F16_CORE(pSamples, c, NB_TAPS);
+
+        vstrhq_p_f16(pOutput, vecAcc0, p0);
+    }
+
+    /*
+     * Copy the samples back into the history buffer start
+     */
+    pTempSrc = &pState[blockSize];
+    pTempDest = pState;
+
+    blkCnt = numTaps >> 3;
+    while (blkCnt > 0) {
+        vst1q(pTempDest, vld1q(pTempSrc));
+        pTempSrc += 8;
+        pTempDest += 8;
+        blkCnt--;
+    }
+    blkCnt = numTaps & 7;
+    if (blkCnt > 0) {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+        vstrhq_p_f16(pTempDest, vld1q(pTempSrc), p0);
+    }
+
+}
+
+
+static void arm_fir_f16_5_8_mve(const arm_fir_instance_f16 * S, const float16_t * pSrc, float16_t * pDst, uint32_t blockSize)
+{
+    float16_t      *pState = S->pState;     /* State pointer */
+    const float16_t *pCoeffs = S->pCoeffs;  /* Coefficient pointer */
+    float16_t      *pStateCur;              /* Points to the current sample of the state */
+    const float16_t *pSamples;              /* Temporary pointer to the sample buffer */
+    float16_t      *pOutput;                /* Temporary pointer to the output buffer */
+    const float16_t *pTempSrc;              /* Temporary pointer to the source data */
+    float16_t      *pTempDest;              /* Temporary pointer to the destination buffer */
+    uint32_t        numTaps = S->numTaps;   /* Number of filter coefficients in the filter */
+    int32_t         blkCnt;
+    f16x8_t         vecIn0;
+    f16x8_t         vecAcc0;
+    const int       NB_TAPS=8;
+    float16_t       c[NB_TAPS];
+
+
+    /*
+     * pState points to state array which contains previous frame (numTaps - 1) samples
+     * pStateCur points to the location where the new input data should be written
+     */
+    pStateCur = &(pState[(numTaps - 1u)]);
+    /*
+     * Copy new data into state so that we obtain a continuous sample buffer
+     * containing both the tail end of the old data and the new data.
+     */
+    pSamples = pState;
+    pTempSrc = pSrc;
+    pOutput = pDst;
+
+    for (int i = 0; i < NB_TAPS; i++)
+        c[i] = pCoeffs[i];
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0) {
+        /*
+         * Save 8 input samples in the history buffer
+         */
+        vst1q(pStateCur, vld1q(pTempSrc));
+        pStateCur += 8;
+        pTempSrc += 8;
+
+        FIR_F16_CORE(pSamples, c, NB_TAPS);
+
+        vst1q(pOutput, vecAcc0);
+
+        pOutput += 8;
+        pSamples += 8;
+
+        blkCnt--;
+    }
+
+    blkCnt = blockSize & 7;
+    {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+
+        vst1q(pStateCur, vld1q(pTempSrc));
+        pStateCur += 8;
+        pTempSrc += 8;
+
+        FIR_F16_CORE(pSamples, c, NB_TAPS);
+
+        vstrhq_p_f16(pOutput, vecAcc0, p0);
+    }
+
+    /*
+     * Copy the samples back into the history buffer start
+     */
+    pTempSrc = &pState[blockSize];
+    pTempDest = pState;
+
+    blkCnt = numTaps >> 3;
+    while (blkCnt > 0) {
+        vst1q(pTempDest, vld1q(pTempSrc));
+        pTempSrc += 8;
+        pTempDest += 8;
+        blkCnt--;
+    }
+    blkCnt = numTaps & 7;
+    if (blkCnt > 0) {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+        vstrhq_p_f16(pTempDest, vld1q(pTempSrc), p0);
+    }
+}
+
+
+void arm_fir_f16(const arm_fir_instance_f16 * S, 
+  const float16_t * pSrc, 
+  float16_t * pDst, 
+  uint32_t blockSize)
+{
+    float16_t *pState = S->pState;  /* State pointer */
+    const float16_t *pCoeffs = S->pCoeffs;    /* Coefficient pointer */
+    float16_t *pStateCur;       /* Points to the current sample of the state */
+    const float16_t *pSamples;        /* Temporary pointer to the sample buffer */
+    float16_t *pOutput;         /* Temporary pointer to the output buffer */
+    const float16_t *pTempSrc;        /* Temporary pointer to the source data */
+    float16_t *pTempDest;       /* Temporary pointer to the destination buffer */
+    int32_t  numTaps = S->numTaps; /* Number of filter coefficients in the filter */
+    uint32_t  blkCnt;
+    f16x8_t vecIn0;
+    f16x8_t vecAcc0;
+    float16_t c0, c1, c2, c3;
+    float16_t c4, c5, c6, c7;
+
+    /*
+     * [1 to 8 taps] specialized routines
+     */
+    if (numTaps <= 4)
+    {
+        arm_fir_f16_1_4_mve(S, pSrc, pDst, blockSize);
+        return;
+    }
+    else if (numTaps <= 8)
+    {
+        arm_fir_f16_5_8_mve(S, pSrc, pDst, blockSize);
+        return;
+    }
+
+    /*
+     * pState points to state array which contains previous frame (numTaps - 1) samples
+     * pStateCur points to the location where the new input data should be written
+     */
+    pStateCur = &(pState[(numTaps - 1u)]);
+    /*
+     * Copy new data into state so that we obtain a continuous sample buffer
+     * containing both the tail end of the old data and the new data.
+     */
+    pSamples = pState;
+    pTempSrc = pSrc;
+    pOutput = pDst;
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U)
+    {
+        int       i;
+        const float16_t *pCoeffsCur = pCoeffs;
+
+        /*
+         * Save 8 input samples in the history buffer
+         */
+        vst1q(pStateCur, vld1q(pTempSrc));
+        pStateCur += 8;
+        pTempSrc += 8;
+
+        c0 = *pCoeffsCur++;
+        c1 = *pCoeffsCur++;
+        c2 = *pCoeffsCur++;
+        c3 = *pCoeffsCur++;
+        c4 = *pCoeffsCur++;
+        c5 = *pCoeffsCur++;
+        c6 = *pCoeffsCur++;
+        c7 = *pCoeffsCur++;
+
+        vecIn0 = vld1q(pSamples);
+        vecAcc0 = vmulq(vecIn0, c0);
+
+        vecIn0 = vld1q(&pSamples[1]);
+        vecAcc0 = vfmaq(vecAcc0, vecIn0, c1);
+
+        vecIn0 = vld1q(&pSamples[2]);
+        vecAcc0 = vfmaq(vecAcc0, vecIn0, c2);
+
+        vecIn0 = vld1q(&pSamples[3]);
+        vecAcc0 = vfmaq(vecAcc0, vecIn0, c3);
+
+        vecIn0 = vld1q(&pSamples[4]);
+        vecAcc0 = vfmaq(vecAcc0, vecIn0, c4);
+
+        vecIn0 = vld1q(&pSamples[5]);
+        vecAcc0 = vfmaq(vecAcc0, vecIn0, c5);
+
+        vecIn0 = vld1q(&pSamples[6]);
+        vecAcc0 = vfmaq(vecAcc0, vecIn0, c6);
+
+        vecIn0 = vld1q(&pSamples[7]);
+        vecAcc0 = vfmaq(vecAcc0, vecIn0, c7);
+
+        pSamples += 8;
+
+        for (i = 0; i <= ((numTaps - 9) / 8); i++)
+        {
+            c0 = *pCoeffsCur++;
+            c1 = *pCoeffsCur++;
+            c2 = *pCoeffsCur++;
+            c3 = *pCoeffsCur++;
+            c4 = *pCoeffsCur++;
+            c5 = *pCoeffsCur++;
+            c6 = *pCoeffsCur++;
+            c7 = *pCoeffsCur++;
+
+            vecIn0 = vld1q(pSamples);
+            vecAcc0 = vfmaq(vecAcc0, vecIn0, c0);
+
+            vecIn0 = vld1q(&pSamples[1]);
+            vecAcc0 = vfmaq(vecAcc0, vecIn0, c1);
+
+            vecIn0 = vld1q(&pSamples[2]);
+            vecAcc0 = vfmaq(vecAcc0, vecIn0, c2);
+
+            vecIn0 = vld1q(&pSamples[3]);
+            vecAcc0 = vfmaq(vecAcc0, vecIn0, c3);
+
+            vecIn0 = vld1q(&pSamples[4]);
+            vecAcc0 = vfmaq(vecAcc0, vecIn0, c4);
+
+            vecIn0 = vld1q(&pSamples[5]);
+            vecAcc0 = vfmaq(vecAcc0, vecIn0, c5);
+
+            vecIn0 = vld1q(&pSamples[6]);
+            vecAcc0 = vfmaq(vecAcc0, vecIn0, c6);
+
+            vecIn0 = vld1q(&pSamples[7]);
+            vecAcc0 = vfmaq(vecAcc0, vecIn0, c7);
+
+            pSamples += 8;
+        }
+
+        vst1q(pOutput, vecAcc0);
+        pOutput += 8;
+        pSamples = pSamples - (i + 1) * 8 + 8;
+
+        blkCnt--;
+    }
+
+    blkCnt = blockSize & 7;
+    {
+        mve_pred16_t p0 = vctp16q(blkCnt);
+        int       i;
+        const float16_t *pCoeffsCur = pCoeffs;
+
+        vst1q(pStateCur, vld1q(pTempSrc));
+        pStateCur += 8;
+        pTempSrc += 8;
+
+        c0 = *pCoeffsCur++;
+        c1 = *pCoeffsCur++;
+        c2 = *pCoeffsCur++;
+        c3 = *pCoeffsCur++;
+        c4 = *pCoeffsCur++;
+        c5 = *pCoeffsCur++;
+        c6 = *pCoeffsCur++;
+        c7 = *pCoeffsCur++;
+
+        vecIn0 = vld1q(pSamples);
+        vecAcc0 = vmulq(vecIn0, c0);
+
+        vecIn0 = vld1q(&pSamples[1]);
+        vecAcc0 = vfmaq(vecAcc0, vecIn0, c1);
+
+        vecIn0 = vld1q(&pSamples[2]);
+        vecAcc0 = vfmaq(vecAcc0, vecIn0, c2);
+
+        vecIn0 = vld1q(&pSamples[3]);
+        vecAcc0 = vfmaq(vecAcc0, vecIn0, c3);
+
+        vecIn0 = vld1q(&pSamples[4]);
+        vecAcc0 = vfmaq(vecAcc0, vecIn0, c4);
+
+        vecIn0 = vld1q(&pSamples[5]);
+        vecAcc0 = vfmaq(vecAcc0, vecIn0, c5);
+
+        vecIn0 = vld1q(&pSamples[6]);
+        vecAcc0 = vfmaq(vecAcc0, vecIn0, c6);
+
+        vecIn0 = vld1q(&pSamples[7]);
+        vecAcc0 = vfmaq(vecAcc0, vecIn0, c7);
+
+        pSamples += 8;
+
+        for (i = 0; i <= ((numTaps - 9) / 8); i++)
+        {
+            c0 = *pCoeffsCur++;
+            c1 = *pCoeffsCur++;
+            c2 = *pCoeffsCur++;
+            c3 = *pCoeffsCur++;
+            c4 = *pCoeffsCur++;
+            c5 = *pCoeffsCur++;
+            c6 = *pCoeffsCur++;
+            c7 = *pCoeffsCur++;
+
+            vecIn0 = vld1q(pSamples);
+            vecAcc0 = vfmaq(vecAcc0, vecIn0, c0);
+
+            vecIn0 = vld1q(&pSamples[1]);
+            vecAcc0 = vfmaq(vecAcc0, vecIn0, c1);
+
+            vecIn0 = vld1q(&pSamples[2]);
+            vecAcc0 = vfmaq(vecAcc0, vecIn0, c2);
+
+            vecIn0 = vld1q(&pSamples[3]);
+            vecAcc0 = vfmaq(vecAcc0, vecIn0, c3);
+
+            vecIn0 = vld1q(&pSamples[4]);
+            vecAcc0 = vfmaq(vecAcc0, vecIn0, c4);
+
+            vecIn0 = vld1q(&pSamples[5]);
+            vecAcc0 = vfmaq(vecAcc0, vecIn0, c5);
+
+            vecIn0 = vld1q(&pSamples[6]);
+            vecAcc0 = vfmaq(vecAcc0, vecIn0, c6);
+
+            vecIn0 = vld1q(&pSamples[7]);
+            vecAcc0 = vfmaq(vecAcc0, vecIn0, c7);
+
+            pSamples += 8;
+        }
+
+        vstrhq_p_f16(pOutput, vecAcc0, p0);
+    }
+
+    /*
+     * Copy the samples back into the history buffer start
+     */
+    pTempSrc = &pState[blockSize];
+    pTempDest = pState;
+
+    blkCnt = numTaps >> 3;
+    while (blkCnt > 0U)
+    {
+        vst1q(pTempDest, vld1q(pTempSrc));
+        pTempSrc += 8;
+        pTempDest += 8;
+        blkCnt--;
+    }
+    blkCnt = numTaps & 7;
+    if (blkCnt > 0U)
+    {
+        mve_pred16_t p0 = vctp16q(blkCnt);
+        vstrhq_p_f16(pTempDest, vld1q(pTempSrc), p0);
+    }
+}
+
+#else
+
+void arm_fir_f16(
+  const arm_fir_instance_f16 * S,
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize)
+{
+        float16_t *pState = S->pState;                 /* State pointer */
+  const float16_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
+        float16_t *pStateCurnt;                        /* Points to the current sample of the state */
+        float16_t *px;                                 /* Temporary pointer for state buffer */
+  const float16_t *pb;                                 /* Temporary pointer for coefficient buffer */
+        _Float16 acc0;                                /* Accumulator */
+        uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
+        uint32_t i, tapCnt, blkCnt;                    /* Loop counters */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+        _Float16 acc1, acc2, acc3, acc4, acc5, acc6, acc7;     /* Accumulators */
+        _Float16 x0, x1, x2, x3, x4, x5, x6, x7;               /* Temporary variables to hold state values */
+        _Float16 c0;                                           /* Temporary variable to hold coefficient value */
+#endif
+
+  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
+  /* pStateCurnt points to the location where the new input data should be written */
+  pStateCurnt = &(S->pState[(numTaps - 1U)]);
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 8 output values simultaneously.
+   * The variables acc0 ... acc7 hold output values that are being computed:
+   *
+   *    acc0 =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]
+   *    acc1 =  b[numTaps-1] * x[n-numTaps]   + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]
+   *    acc2 =  b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps]   + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]
+   *    acc3 =  b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps]   +...+ b[0] * x[3]
+   */
+
+  blkCnt = blockSize >> 3U;
+
+  while (blkCnt > 0U)
+  {
+    /* Copy 4 new input samples into the state buffer. */
+    *pStateCurnt++ = *pSrc++;
+    *pStateCurnt++ = *pSrc++;
+    *pStateCurnt++ = *pSrc++;
+    *pStateCurnt++ = *pSrc++;
+
+    /* Set all accumulators to zero */
+    acc0 = 0.0f;
+    acc1 = 0.0f;
+    acc2 = 0.0f;
+    acc3 = 0.0f;
+    acc4 = 0.0f;
+    acc5 = 0.0f;
+    acc6 = 0.0f;
+    acc7 = 0.0f;
+
+    /* Initialize state pointer */
+    px = pState;
+
+    /* Initialize coefficient pointer */
+    pb = pCoeffs;
+
+    /* This is separated from the others to avoid
+     * a call to __aeabi_memmove which would be slower
+     */
+    *pStateCurnt++ = *pSrc++;
+    *pStateCurnt++ = *pSrc++;
+    *pStateCurnt++ = *pSrc++;
+    *pStateCurnt++ = *pSrc++;
+
+    /* Read the first 7 samples from the state buffer:  x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */
+    x0 = *px++;
+    x1 = *px++;
+    x2 = *px++;
+    x3 = *px++;
+    x4 = *px++;
+    x5 = *px++;
+    x6 = *px++;
+
+    /* Loop unrolling: process 8 taps at a time. */
+    tapCnt = numTaps >> 3U;
+
+    while (tapCnt > 0U)
+    {
+      /* Read the b[numTaps-1] coefficient */
+      c0 = *(pb++);
+
+      /* Read x[n-numTaps-3] sample */
+      x7 = *(px++);
+
+      /* acc0 +=  b[numTaps-1] * x[n-numTaps] */
+      acc0 += x0 * c0;
+
+      /* acc1 +=  b[numTaps-1] * x[n-numTaps-1] */
+      acc1 += x1 * c0;
+
+      /* acc2 +=  b[numTaps-1] * x[n-numTaps-2] */
+      acc2 += x2 * c0;
+
+      /* acc3 +=  b[numTaps-1] * x[n-numTaps-3] */
+      acc3 += x3 * c0;
+
+      /* acc4 +=  b[numTaps-1] * x[n-numTaps-4] */
+      acc4 += x4 * c0;
+
+      /* acc1 +=  b[numTaps-1] * x[n-numTaps-5] */
+      acc5 += x5 * c0;
+
+      /* acc2 +=  b[numTaps-1] * x[n-numTaps-6] */
+      acc6 += x6 * c0;
+
+      /* acc3 +=  b[numTaps-1] * x[n-numTaps-7] */
+      acc7 += x7 * c0;
+
+      /* Read the b[numTaps-2] coefficient */
+      c0 = *(pb++);
+
+      /* Read x[n-numTaps-4] sample */
+      x0 = *(px++);
+
+      /* Perform the multiply-accumulate */
+      acc0 += x1 * c0;
+      acc1 += x2 * c0;
+      acc2 += x3 * c0;
+      acc3 += x4 * c0;
+      acc4 += x5 * c0;
+      acc5 += x6 * c0;
+      acc6 += x7 * c0;
+      acc7 += x0 * c0;
+
+      /* Read the b[numTaps-3] coefficient */
+      c0 = *(pb++);
+
+      /* Read x[n-numTaps-5] sample */
+      x1 = *(px++);
+
+      /* Perform the multiply-accumulates */
+      acc0 += x2 * c0;
+      acc1 += x3 * c0;
+      acc2 += x4 * c0;
+      acc3 += x5 * c0;
+      acc4 += x6 * c0;
+      acc5 += x7 * c0;
+      acc6 += x0 * c0;
+      acc7 += x1 * c0;
+
+      /* Read the b[numTaps-4] coefficient */
+      c0 = *(pb++);
+
+      /* Read x[n-numTaps-6] sample */
+      x2 = *(px++);
+
+      /* Perform the multiply-accumulates */
+      acc0 += x3 * c0;
+      acc1 += x4 * c0;
+      acc2 += x5 * c0;
+      acc3 += x6 * c0;
+      acc4 += x7 * c0;
+      acc5 += x0 * c0;
+      acc6 += x1 * c0;
+      acc7 += x2 * c0;
+
+      /* Read the b[numTaps-4] coefficient */
+      c0 = *(pb++);
+
+      /* Read x[n-numTaps-6] sample */
+      x3 = *(px++);
+      /* Perform the multiply-accumulates */
+      acc0 += x4 * c0;
+      acc1 += x5 * c0;
+      acc2 += x6 * c0;
+      acc3 += x7 * c0;
+      acc4 += x0 * c0;
+      acc5 += x1 * c0;
+      acc6 += x2 * c0;
+      acc7 += x3 * c0;
+
+      /* Read the b[numTaps-4] coefficient */
+      c0 = *(pb++);
+
+      /* Read x[n-numTaps-6] sample */
+      x4 = *(px++);
+
+      /* Perform the multiply-accumulates */
+      acc0 += x5 * c0;
+      acc1 += x6 * c0;
+      acc2 += x7 * c0;
+      acc3 += x0 * c0;
+      acc4 += x1 * c0;
+      acc5 += x2 * c0;
+      acc6 += x3 * c0;
+      acc7 += x4 * c0;
+
+      /* Read the b[numTaps-4] coefficient */
+      c0 = *(pb++);
+
+      /* Read x[n-numTaps-6] sample */
+      x5 = *(px++);
+
+      /* Perform the multiply-accumulates */
+      acc0 += x6 * c0;
+      acc1 += x7 * c0;
+      acc2 += x0 * c0;
+      acc3 += x1 * c0;
+      acc4 += x2 * c0;
+      acc5 += x3 * c0;
+      acc6 += x4 * c0;
+      acc7 += x5 * c0;
+
+      /* Read the b[numTaps-4] coefficient */
+      c0 = *(pb++);
+
+      /* Read x[n-numTaps-6] sample */
+      x6 = *(px++);
+
+      /* Perform the multiply-accumulates */
+      acc0 += x7 * c0;
+      acc1 += x0 * c0;
+      acc2 += x1 * c0;
+      acc3 += x2 * c0;
+      acc4 += x3 * c0;
+      acc5 += x4 * c0;
+      acc6 += x5 * c0;
+      acc7 += x6 * c0;
+
+      /* Decrement loop counter */
+      tapCnt--;
+    }
+
+    /* Loop unrolling: Compute remaining outputs */
+    tapCnt = numTaps % 0x8U;
+
+    while (tapCnt > 0U)
+    {
+      /* Read coefficients */
+      c0 = *(pb++);
+
+      /* Fetch 1 state variable */
+      x7 = *(px++);
+
+      /* Perform the multiply-accumulates */
+      acc0 += x0 * c0;
+      acc1 += x1 * c0;
+      acc2 += x2 * c0;
+      acc3 += x3 * c0;
+      acc4 += x4 * c0;
+      acc5 += x5 * c0;
+      acc6 += x6 * c0;
+      acc7 += x7 * c0;
+
+      /* Reuse the present sample states for next sample */
+      x0 = x1;
+      x1 = x2;
+      x2 = x3;
+      x3 = x4;
+      x4 = x5;
+      x5 = x6;
+      x6 = x7;
+
+      /* Decrement loop counter */
+      tapCnt--;
+    }
+
+    /* Advance the state pointer by 8 to process the next group of 8 samples */
+    pState = pState + 8;
+
+    /* The results in the 8 accumulators, store in the destination buffer. */
+    *pDst++ = acc0;
+    *pDst++ = acc1;
+    *pDst++ = acc2;
+    *pDst++ = acc3;
+    *pDst++ = acc4;
+    *pDst++ = acc5;
+    *pDst++ = acc6;
+    *pDst++ = acc7;
+
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining output samples */
+  blkCnt = blockSize % 0x8U;
+
+#else
+
+  /* Initialize blkCnt with number of taps */
+  blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    /* Copy one sample at a time into state buffer */
+    *pStateCurnt++ = *pSrc++;
+
+    /* Set the accumulator to zero */
+    acc0 = 0.0f;
+
+    /* Initialize state pointer */
+    px = pState;
+
+    /* Initialize Coefficient pointer */
+    pb = pCoeffs;
+
+    i = numTaps;
+
+    /* Perform the multiply-accumulates */
+    while (i > 0U)
+    {
+      /* acc =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
+      acc0 += *px++ * *pb++;
+
+      i--;
+    }
+
+    /* Store result in destination buffer. */
+    *pDst++ = acc0;
+
+    /* Advance state pointer by 1 for the next sample */
+    pState = pState + 1U;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Processing is complete.
+     Now copy the last numTaps - 1 samples to the start of the state buffer.
+     This prepares the state buffer for the next function call. */
+
+  /* Points to the start of the state buffer */
+  pStateCurnt = S->pState;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 taps at a time */
+  tapCnt = (numTaps - 1U) >> 2U;
+
+  /* Copy data */
+  while (tapCnt > 0U)
+  {
+    *pStateCurnt++ = *pState++;
+    *pStateCurnt++ = *pState++;
+    *pStateCurnt++ = *pState++;
+    *pStateCurnt++ = *pState++;
+
+    /* Decrement loop counter */
+    tapCnt--;
+  }
+
+  /* Calculate remaining number of copies */
+  tapCnt = (numTaps - 1U) % 0x4U;
+
+#else
+
+  /* Initialize tapCnt with number of taps */
+  tapCnt = (numTaps - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  /* Copy remaining data */
+  while (tapCnt > 0U)
+  {
+    *pStateCurnt++ = *pState++;
+
+    /* Decrement loop counter */
+    tapCnt--;
+  }
+
+}
+
+#endif /* #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+* @} end of FIR group
+*/
--- a/Source/FilteringFunctions/arm_fir_init_f16.c
+++ b/Source/FilteringFunctions/arm_fir_init_f16.c
@ -0,0 +1,86 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_fir_init_f16.c
+ * Description:  Floating-point FIR filter initialization function
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/filtering_functions_f16.h"
+
+/**
+  @ingroup groupFilters
+ */
+
+/**
+  @addtogroup FIR
+  @{
+ */
+
+/**
+  @brief         Initialization function for the floating-point FIR filter.
+  @param[in,out] S          points to an instance of the floating-point FIR filter structure
+  @param[in] 	 numTaps    number of filter coefficients in the filter
+  @param[in]     pCoeffs    points to the filter coefficients buffer
+  @param[in]     pState     points to the state buffer
+  @param[in]     blockSize  number of samples processed per call
+  @return        none
+
+  @par           Details
+                   <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:
+  <pre>
+      {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
+  </pre>
+  @par
+                   <code>pState</code> points to the array of state variables.
+                   <code>pState</code> is of length <code>numTaps+blockSize-1</code> samples, where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_f16()</code>.
+  @par          Initialization of Helium version
+                 For Helium version the array of coefficients must be a multiple of 16 even if less
+                 then 16 coefficients are used. The additional coefficients must be set to 0.
+                 It does not mean that all the coefficients will be used in the filter (numTaps
+                 is still set to its right value in the init function.) It just means that
+                 the implementation may require to read more coefficients due to the vectorization and
+                 to avoid having to manage too many different cases in the code.
+
+ */
+
+void arm_fir_init_f16(
+        arm_fir_instance_f16 * S,
+        uint16_t numTaps,
+  const float16_t * pCoeffs,
+        float16_t * pState,
+        uint32_t blockSize)
+{
+  /* Assign filter taps */
+  S->numTaps = numTaps;
+
+  /* Assign coefficient pointer */
+  S->pCoeffs = pCoeffs;
+
+  /* Clear state buffer. The size is always (blockSize + numTaps - 1) */
+  memset(pState, 0, (numTaps + (blockSize - 1U)) * sizeof(float16_t));
+
+  /* Assign state pointer */
+  S->pState = pState;
+}
+
+/**
+  @} end of FIR group
+ */
--- a/Source/TransformFunctions/arm_cfft_init_f16.c
+++ b/Source/TransformFunctions/arm_cfft_init_f16.c
@ -64,7 +64,8 @@ arm_status arm_cfft_radix4by2_rearrange_twiddles_f16(arm_cfft_instance_f16 *S, i
                                                                  
        switch (S->fftLen >> (twidCoefModifier - 1)) {  

-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096)
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
+            || defined(ARM_TABLE_TWIDDLECOEF_F16_4096)
        case 4096U:                                                                                
            S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_4096_f16;
            S->rearranged_twiddle_stride1  =  rearranged_twiddle_stride1_4096_f16;     
@ -77,7 +78,8 @@ arm_status arm_cfft_radix4by2_rearrange_twiddles_f16(arm_cfft_instance_f16 *S, i
            break; 
 #endif                                  

-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_1024) || defined(ARM_TABLE_BITREVIDX_FXT_2048)                                                                                                   
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
+            || defined(ARM_TABLE_TWIDDLECOEF_F16_1024) || defined(ARM_TABLE_TWIDDLECOEF_F16_2048)                                                                                                  
        case 1024U:                                                                                
            S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_1024_f16;
            S->rearranged_twiddle_stride1  =  rearranged_twiddle_stride1_1024_f16;     
@ -90,7 +92,8 @@ arm_status arm_cfft_radix4by2_rearrange_twiddles_f16(arm_cfft_instance_f16 *S, i
            break;                                                                                 
 #endif 

- #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_256) || defined(ARM_TABLE_BITREVIDX_FXT_512)                                                                                                  
+ #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
+    || defined(ARM_TABLE_TWIDDLECOEF_F16_256) || defined(ARM_TABLE_TWIDDLECOEF_F16_512)                                                                                              
        case 256U:                                                                                 
            S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_256_f16;
            S->rearranged_twiddle_stride1  =  rearranged_twiddle_stride1_256_f16;     
@ -104,7 +107,8 @@ arm_status arm_cfft_radix4by2_rearrange_twiddles_f16(arm_cfft_instance_f16 *S, i
            break;                     
 #endif 

-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_64) || defined(ARM_TABLE_BITREVIDX_FXT_128)
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
+            || defined(ARM_TABLE_TWIDDLECOEF_F16_64) || defined(ARM_TABLE_TWIDDLECOEF_F16_128)
        case 64U:                                                                                  
            S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_64_f16;
            S->rearranged_twiddle_stride1  =  rearranged_twiddle_stride1_64_f16;     
@ -117,7 +121,8 @@ arm_status arm_cfft_radix4by2_rearrange_twiddles_f16(arm_cfft_instance_f16 *S, i
            break;  
 #endif                                                                               
              
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_16) || defined(ARM_TABLE_BITREVIDX_FXT_32)                                                                                                                                                                                                                
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
+            || defined(ARM_TABLE_TWIDDLECOEF_F16_16) || defined(ARM_TABLE_TWIDDLECOEF_F16_32)                                                                                                                                                                                                             
        case 16U:                                                                                  
            S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_16_f16;
            S->rearranged_twiddle_stride1  =  rearranged_twiddle_stride1_16_f16;     
--- a/Source/TransformFunctions/arm_cfft_init_f32.c
+++ b/Source/TransformFunctions/arm_cfft_init_f32.c
@ -63,7 +63,8 @@ arm_status arm_cfft_radix4by2_rearrange_twiddles_f32(arm_cfft_instance_f32 *S, i
                                                                  
        switch (S->fftLen >> (twidCoefModifier - 1)) {  

-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_4096)
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
+            || defined(ARM_TABLE_TWIDDLECOEF_F32_4096)                                                                                                
        case 4096U:                                                                                
            S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_4096_f32;
            S->rearranged_twiddle_stride1  =  rearranged_twiddle_stride1_4096_f32;     
@ -76,7 +77,8 @@ arm_status arm_cfft_radix4by2_rearrange_twiddles_f32(arm_cfft_instance_f32 *S, i
            break; 
 #endif                                  

-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_1024) || defined(ARM_TABLE_BITREVIDX_FXT_2048)                                                                                                   
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
+            || defined(ARM_TABLE_TWIDDLECOEF_F32_1024) || defined(ARM_TABLE_TWIDDLECOEF_F32_2048)                                                                                                  
        case 1024U:                                                                                
            S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_1024_f32;
            S->rearranged_twiddle_stride1  =  rearranged_twiddle_stride1_1024_f32;     
@ -89,7 +91,8 @@ arm_status arm_cfft_radix4by2_rearrange_twiddles_f32(arm_cfft_instance_f32 *S, i
            break;                                                                                 
 #endif 

- #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_256) || defined(ARM_TABLE_BITREVIDX_FXT_512)                                                                                                  
+ #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
+            || defined(ARM_TABLE_TWIDDLECOEF_F32_256) || defined(ARM_TABLE_TWIDDLECOEF_F32_512)                                                                                                 
        case 256U:                                                                                 
            S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_256_f32;
            S->rearranged_twiddle_stride1  =  rearranged_twiddle_stride1_256_f32;     
@ -103,7 +106,8 @@ arm_status arm_cfft_radix4by2_rearrange_twiddles_f32(arm_cfft_instance_f32 *S, i
            break;                     
 #endif 

-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_64) || defined(ARM_TABLE_BITREVIDX_FXT_128)
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
+            || defined(ARM_TABLE_TWIDDLECOEF_F32_64) || defined(ARM_TABLE_TWIDDLECOEF_F32_128)
        case 64U:                                                                                  
            S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_64_f32;
            S->rearranged_twiddle_stride1  =  rearranged_twiddle_stride1_64_f32;     
@ -116,7 +120,8 @@ arm_status arm_cfft_radix4by2_rearrange_twiddles_f32(arm_cfft_instance_f32 *S, i
            break;  
 #endif                                                                               
              
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREVIDX_FXT_16) || defined(ARM_TABLE_BITREVIDX_FXT_32)                                                                                                                                                                                                                
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
+            || defined(ARM_TABLE_TWIDDLECOEF_F32_16) || defined(ARM_TABLE_TWIDDLECOEF_F32_32)                                                                                                                                                                                                               
        case 16U:                                                                                  
            S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_16_f32;
            S->rearranged_twiddle_stride1  =  rearranged_twiddle_stride1_16_f32;     
--- a/Testing/CMakeLists.txt
+++ b/Testing/CMakeLists.txt
@ -328,6 +328,7 @@ if ((NOT ARMAC5) AND (FLOAT16TESTS) AND ((FLOAT16) OR (MVEF) OR (HELIUM) OR (NEO
 set(TESTSRC16 
  Source/Tests/BasicTestsF16.cpp
  Source/Tests/ComplexTestsF16.cpp
+  Source/Tests/FIRF16.cpp
  Source/Tests/TransformCF16.cpp
  Source/Tests/TransformRF16.cpp
  )
--- a/Testing/Include/Tests/FIRF16.h
+++ b/Testing/Include/Tests/FIRF16.h
@ -0,0 +1,26 @@
+#include "Test.h"
+#include "Pattern.h"
+
+#include "dsp/filtering_functions_f16.h"
+
+class FIRF16:public Client::Suite
+    {
+        public:
+            FIRF16(Testing::testID_t id);
+            virtual void setUp(Testing::testID_t,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr);
+            virtual void tearDown(Testing::testID_t,Client::PatternMgr *mgr);
+        private:
+            #include "FIRF16_decl.h"
+            
+            Client::Pattern<float16_t> coefs;
+            Client::Pattern<float16_t> inputs;
+            Client::RefPattern<int16_t> configs;
+            Client::LocalPattern<float16_t> output;
+            Client::LocalPattern<float16_t> state;
+            // Reference patterns are not loaded when we are in dump mode
+            Client::RefPattern<float16_t> ref;
+
+
+            arm_fir_instance_f16 S;
+
+    };
--- a/Testing/PatternGeneration/FIR.py
+++ b/Testing/PatternGeneration/FIR.py
@ -3,7 +3,7 @@ import numpy as np
 import itertools
 import Tools
 from scipy import signal
-from pylab import figure, clf, plot, xlabel, ylabel, xlim, ylim, title, grid, axes, show,semilogx, semilogy
+#from pylab import figure, clf, plot, xlabel, ylabel, xlim, ylim, title, grid, axes, show,semilogx, semilogy

 # Those patterns are used for tests and benchmarks.
 # For tests, there is the need to add tests for saturation
@ -57,7 +57,7 @@ def writeTests(config,format):
    if format == 0 or format == 31:
       blk = [1, 2, 3, 8, 9,10,11, 16, 23]
       taps = [1, 2, 3, 4, 5, 6, 7, 8, 11, 16, 23, 25]
-    elif format == 15:
+    elif format == 15 or format == 16:
       blk = [1, 2, 3, 12,13,14,15]
       taps = [2, 3, 4, 5, 6, 7, 8, 11, 25]
    elif format == 7:
@ -99,6 +99,7 @@ def generatePatterns():
    PARAMDIR = os.path.join("Parameters","DSP","Filtering","FIR","FIR")
    
    configf32=Tools.Config(PATTERNDIR,PARAMDIR,"f32")
+    configf16=Tools.Config(PATTERNDIR,PARAMDIR,"f16")
    configq31=Tools.Config(PATTERNDIR,PARAMDIR,"q31")
    configq15=Tools.Config(PATTERNDIR,PARAMDIR,"q15")
    configq7=Tools.Config(PATTERNDIR,PARAMDIR,"q7")
@ -106,6 +107,7 @@ def generatePatterns():
    
    
    writeTests(configf32,0)
+    writeTests(configf16,16)
    writeTests(configq31,31)
    writeTests(configq15,15)
    writeTests(configq7,7)
--- a/Testing/Patterns/DSP/Filtering/FIR/FIRF16/Coefs1_f16.txt
+++ b/Testing/Patterns/DSP/Filtering/FIR/FIRF16/Coefs1_f16.txt
@ -0,0 +1,130 @@
+H
+64
+// 0.330498
+0x354a
+// -0.216810
+0xb2f0
+// -0.528238
+0xb83a
+// 0.405195
+0x367c
+// -0.083506
+0xad58
+// 0.000449
+0xf59
+// -0.464291
+0xb76e
+// -0.019857
+0xa515
+// -0.169947
+0xb170
+// -0.376044
+0xb604
+// 0.090843
+0x2dd0
+// -0.221458
+0xb316
+// 0.084262
+0x2d65
+// -0.138296
+0xb06d
+// 0.059415
+0x2b9b
+// 0.198837
+0x325d
+// -0.063674
+0xac13
+// 0.214492
+0x32dd
+// -0.005482
+0x9d9d
+// 0.359475
+0x35c0
+// -0.569871
+0xb88f
+// 0.278625
+0x3475
+// 0.432925
+0x36ed
+// 0.453892
+0x3743
+// -0.437651
+0xb701
+// 0.021434
+0x257d
+// -0.654628
+0xb93d
+// 0.416193
+0x36a9
+// 0.317476
+0x3514
+// 0.582392
+0x38a9
+// -0.319969
+0xb51f
+// -0.717655
+0xb9be
+// -0.137898
+0xb06a
+// -0.513659
+0xb81c
+// -0.127180
+0xb012
+// 0.091017
+0x2dd3
+// -0.389085
+0xb63a
+// 0.281055
+0x347f
+// 0.036319
+0x28a6
+// -0.454822
+0xb747
+// -0.097364
+0xae3b
+// -0.018346
+0xa4b2
+// -0.132280
+0xb03c
+// -0.019581
+0xa503
+// -0.053694
+0xaadf
+// -1.000000
+0xbc00
+// 0.072400
+0x2ca2
+// -0.344655
+0xb584
+// -0.215652
+0xb2e7
+// 0.648358
+0x3930
+// -0.058424
+0xab7a
+// -0.373601
+0xb5fa
+// -0.612667
+0xb8e7
+// -0.230776
+0xb363
+// -0.874476
+0xbaff
+// -0.135680
+0xb057
+// 0.435408
+0x36f7
+// 0.882500
+0x3b0f
+// 0.276670
+0x346d
+// -0.165710
+0xb14d
+// 0.532691
+0x3843
+// -0.246518
+0xb3e3
+// -0.192607
+0xb22a
+// -0.966899
+0xbbbc
--- a/Testing/Patterns/DSP/Filtering/FIR/FIRF16/FirCoefs1_f16.txt
+++ b/Testing/Patterns/DSP/Filtering/FIR/FIRF16/FirCoefs1_f16.txt
@ -0,0 +1,996 @@
+H
+497
+// 0.500000
+0x3800
+// 0.250000
+0x3400
+// 0.600000
+0x38cd
+// 0.400000
+0x3666
+// 0.200000
+0x3266
+// 0.666667
+0x3955
+// 0.500000
+0x3800
+// 0.333333
+0x3555
+// 0.166667
+0x3155
+// 0.714286
+0x39b7
+// 0.571429
+0x3892
+// 0.428571
+0x36db
+// 0.285714
+0x3492
+// 0.142857
+0x3092
+// 0.750000
+0x3a00
+// 0.625000
+0x3900
+// 0.500000
+0x3800
+// 0.375000
+0x3600
+// 0.250000
+0x3400
+// 0.125000
+0x3000
+// 0.777778
+0x3a39
+// 0.666667
+0x3955
+// 0.555556
+0x3872
+// 0.444444
+0x371c
+// 0.333333
+0x3555
+// 0.222222
+0x331c
+// 0.111111
+0x2f1c
+// 0.800000
+0x3a66
+// 0.700000
+0x399a
+// 0.600000
+0x38cd
+// 0.500000
+0x3800
+// 0.400000
+0x3666
+// 0.300000
+0x34cd
+// 0.200000
+0x3266
+// 0.100000
+0x2e66
+// 0.846154
+0x3ac5
+// 0.769231
+0x3a27
+// 0.692308
+0x398a
+// 0.615385
+0x38ec
+// 0.538462
+0x384f
+// 0.461538
+0x3762
+// 0.384615
+0x3627
+// 0.307692
+0x34ec
+// 0.230769
+0x3362
+// 0.153846
+0x30ec
+// 0.076923
+0x2cec
+// 0.925926
+0x3b68
+// 0.888889
+0x3b1c
+// 0.851852
+0x3ad1
+// 0.814815
+0x3a85
+// 0.777778
+0x3a39
+// 0.740741
+0x39ed
+// 0.703704
+0x39a1
+// 0.666667
+0x3955
+// 0.629630
+0x3909
+// 0.592593
+0x38be
+// 0.555556
+0x3872
+// 0.518519
+0x3826
+// 0.481481
+0x37b4
+// 0.444444
+0x371c
+// 0.407407
+0x3685
+// 0.370370
+0x35ed
+// 0.333333
+0x3555
+// 0.296296
+0x34be
+// 0.259259
+0x3426
+// 0.222222
+0x331c
+// 0.185185
+0x31ed
+// 0.148148
+0x30be
+// 0.111111
+0x2f1c
+// 0.074074
+0x2cbe
+// 0.037037
+0x28be
+// 0.500000
+0x3800
+// 0.250000
+0x3400
+// 0.600000
+0x38cd
+// 0.400000
+0x3666
+// 0.200000
+0x3266
+// 0.666667
+0x3955
+// 0.500000
+0x3800
+// 0.333333
+0x3555
+// 0.166667
+0x3155
+// 0.714286
+0x39b7
+// 0.571429
+0x3892
+// 0.428571
+0x36db
+// 0.285714
+0x3492
+// 0.142857
+0x3092
+// 0.750000
+0x3a00
+// 0.625000
+0x3900
+// 0.500000
+0x3800
+// 0.375000
+0x3600
+// 0.250000
+0x3400
+// 0.125000
+0x3000
+// 0.777778
+0x3a39
+// 0.666667
+0x3955
+// 0.555556
+0x3872
+// 0.444444
+0x371c
+// 0.333333
+0x3555
+// 0.222222
+0x331c
+// 0.111111
+0x2f1c
+// 0.800000
+0x3a66
+// 0.700000
+0x399a
+// 0.600000
+0x38cd
+// 0.500000
+0x3800
+// 0.400000
+0x3666
+// 0.300000
+0x34cd
+// 0.200000
+0x3266
+// 0.100000
+0x2e66
+// 0.846154
+0x3ac5
+// 0.769231
+0x3a27
+// 0.692308
+0x398a
+// 0.615385
+0x38ec
+// 0.538462
+0x384f
+// 0.461538
+0x3762
+// 0.384615
+0x3627
+// 0.307692
+0x34ec
+// 0.230769
+0x3362
+// 0.153846
+0x30ec
+// 0.076923
+0x2cec
+// 0.925926
+0x3b68
+// 0.888889
+0x3b1c
+// 0.851852
+0x3ad1
+// 0.814815
+0x3a85
+// 0.777778
+0x3a39
+// 0.740741
+0x39ed
+// 0.703704
+0x39a1
+// 0.666667
+0x3955
+// 0.629630
+0x3909
+// 0.592593
+0x38be
+// 0.555556
+0x3872
+// 0.518519
+0x3826
+// 0.481481
+0x37b4
+// 0.444444
+0x371c
+// 0.407407
+0x3685
+// 0.370370
+0x35ed
+// 0.333333
+0x3555
+// 0.296296
+0x34be
+// 0.259259
+0x3426
+// 0.222222
+0x331c
+// 0.185185
+0x31ed
+// 0.148148
+0x30be
+// 0.111111
+0x2f1c
+// 0.074074
+0x2cbe
+// 0.037037
+0x28be
+// 0.500000
+0x3800
+// 0.250000
+0x3400
+// 0.600000
+0x38cd
+// 0.400000
+0x3666
+// 0.200000
+0x3266
+// 0.666667
+0x3955
+// 0.500000
+0x3800
+// 0.333333
+0x3555
+// 0.166667
+0x3155
+// 0.714286
+0x39b7
+// 0.571429
+0x3892
+// 0.428571
+0x36db
+// 0.285714
+0x3492
+// 0.142857
+0x3092
+// 0.750000
+0x3a00
+// 0.625000
+0x3900
+// 0.500000
+0x3800
+// 0.375000
+0x3600
+// 0.250000
+0x3400
+// 0.125000
+0x3000
+// 0.777778
+0x3a39
+// 0.666667
+0x3955
+// 0.555556
+0x3872
+// 0.444444
+0x371c
+// 0.333333
+0x3555
+// 0.222222
+0x331c
+// 0.111111
+0x2f1c
+// 0.800000
+0x3a66
+// 0.700000
+0x399a
+// 0.600000
+0x38cd
+// 0.500000
+0x3800
+// 0.400000
+0x3666
+// 0.300000
+0x34cd
+// 0.200000
+0x3266
+// 0.100000
+0x2e66
+// 0.846154
+0x3ac5
+// 0.769231
+0x3a27
+// 0.692308
+0x398a
+// 0.615385
+0x38ec
+// 0.538462
+0x384f
+// 0.461538
+0x3762
+// 0.384615
+0x3627
+// 0.307692
+0x34ec
+// 0.230769
+0x3362
+// 0.153846
+0x30ec
+// 0.076923
+0x2cec
+// 0.925926
+0x3b68
+// 0.888889
+0x3b1c
+// 0.851852
+0x3ad1
+// 0.814815
+0x3a85
+// 0.777778
+0x3a39
+// 0.740741
+0x39ed
+// 0.703704
+0x39a1
+// 0.666667
+0x3955
+// 0.629630
+0x3909
+// 0.592593
+0x38be
+// 0.555556
+0x3872
+// 0.518519
+0x3826
+// 0.481481
+0x37b4
+// 0.444444
+0x371c
+// 0.407407
+0x3685
+// 0.370370
+0x35ed
+// 0.333333
+0x3555
+// 0.296296
+0x34be
+// 0.259259
+0x3426
+// 0.222222
+0x331c
+// 0.185185
+0x31ed
+// 0.148148
+0x30be
+// 0.111111
+0x2f1c
+// 0.074074
+0x2cbe
+// 0.037037
+0x28be
+// 0.500000
+0x3800
+// 0.250000
+0x3400
+// 0.600000
+0x38cd
+// 0.400000
+0x3666
+// 0.200000
+0x3266
+// 0.666667
+0x3955
+// 0.500000
+0x3800
+// 0.333333
+0x3555
+// 0.166667
+0x3155
+// 0.714286
+0x39b7
+// 0.571429
+0x3892
+// 0.428571
+0x36db
+// 0.285714
+0x3492
+// 0.142857
+0x3092
+// 0.750000
+0x3a00
+// 0.625000
+0x3900
+// 0.500000
+0x3800
+// 0.375000
+0x3600
+// 0.250000
+0x3400
+// 0.125000
+0x3000
+// 0.777778
+0x3a39
+// 0.666667
+0x3955
+// 0.555556
+0x3872
+// 0.444444
+0x371c
+// 0.333333
+0x3555
+// 0.222222
+0x331c
+// 0.111111
+0x2f1c
+// 0.800000
+0x3a66
+// 0.700000
+0x399a
+// 0.600000
+0x38cd
+// 0.500000
+0x3800
+// 0.400000
+0x3666
+// 0.300000
+0x34cd
+// 0.200000
+0x3266
+// 0.100000
+0x2e66
+// 0.846154
+0x3ac5
+// 0.769231
+0x3a27
+// 0.692308
+0x398a
+// 0.615385
+0x38ec
+// 0.538462
+0x384f
+// 0.461538
+0x3762
+// 0.384615
+0x3627
+// 0.307692
+0x34ec
+// 0.230769
+0x3362
+// 0.153846
+0x30ec
+// 0.076923
+0x2cec
+// 0.925926
+0x3b68
+// 0.888889
+0x3b1c
+// 0.851852
+0x3ad1
+// 0.814815
+0x3a85
+// 0.777778
+0x3a39
+// 0.740741
+0x39ed
+// 0.703704
+0x39a1
+// 0.666667
+0x3955
+// 0.629630
+0x3909
+// 0.592593
+0x38be
+// 0.555556
+0x3872
+// 0.518519
+0x3826
+// 0.481481
+0x37b4
+// 0.444444
+0x371c
+// 0.407407
+0x3685
+// 0.370370
+0x35ed
+// 0.333333
+0x3555
+// 0.296296
+0x34be
+// 0.259259
+0x3426
+// 0.222222
+0x331c
+// 0.185185
+0x31ed
+// 0.148148
+0x30be
+// 0.111111
+0x2f1c
+// 0.074074
+0x2cbe
+// 0.037037
+0x28be
+// 0.500000
+0x3800
+// 0.250000
+0x3400
+// 0.600000
+0x38cd
+// 0.400000
+0x3666
+// 0.200000
+0x3266
+// 0.666667
+0x3955
+// 0.500000
+0x3800
+// 0.333333
+0x3555
+// 0.166667
+0x3155
+// 0.714286
+0x39b7
+// 0.571429
+0x3892
+// 0.428571
+0x36db
+// 0.285714
+0x3492
+// 0.142857
+0x3092
+// 0.750000
+0x3a00
+// 0.625000
+0x3900
+// 0.500000
+0x3800
+// 0.375000
+0x3600
+// 0.250000
+0x3400
+// 0.125000
+0x3000
+// 0.777778
+0x3a39
+// 0.666667
+0x3955
+// 0.555556
+0x3872
+// 0.444444
+0x371c
+// 0.333333
+0x3555
+// 0.222222
+0x331c
+// 0.111111
+0x2f1c
+// 0.800000
+0x3a66
+// 0.700000
+0x399a
+// 0.600000
+0x38cd
+// 0.500000
+0x3800
+// 0.400000
+0x3666
+// 0.300000
+0x34cd
+// 0.200000
+0x3266
+// 0.100000
+0x2e66
+// 0.846154
+0x3ac5
+// 0.769231
+0x3a27
+// 0.692308
+0x398a
+// 0.615385
+0x38ec
+// 0.538462
+0x384f
+// 0.461538
+0x3762
+// 0.384615
+0x3627
+// 0.307692
+0x34ec
+// 0.230769
+0x3362
+// 0.153846
+0x30ec
+// 0.076923
+0x2cec
+// 0.925926
+0x3b68
+// 0.888889
+0x3b1c
+// 0.851852
+0x3ad1
+// 0.814815
+0x3a85
+// 0.777778
+0x3a39
+// 0.740741
+0x39ed
+// 0.703704
+0x39a1
+// 0.666667
+0x3955
+// 0.629630
+0x3909
+// 0.592593
+0x38be
+// 0.555556
+0x3872
+// 0.518519
+0x3826
+// 0.481481
+0x37b4
+// 0.444444
+0x371c
+// 0.407407
+0x3685
+// 0.370370
+0x35ed
+// 0.333333
+0x3555
+// 0.296296
+0x34be
+// 0.259259
+0x3426
+// 0.222222
+0x331c
+// 0.185185
+0x31ed
+// 0.148148
+0x30be
+// 0.111111
+0x2f1c
+// 0.074074
+0x2cbe
+// 0.037037
+0x28be
+// 0.500000
+0x3800
+// 0.250000
+0x3400
+// 0.600000
+0x38cd
+// 0.400000
+0x3666
+// 0.200000
+0x3266
+// 0.666667
+0x3955
+// 0.500000
+0x3800
+// 0.333333
+0x3555
+// 0.166667
+0x3155
+// 0.714286
+0x39b7
+// 0.571429
+0x3892
+// 0.428571
+0x36db
+// 0.285714
+0x3492
+// 0.142857
+0x3092
+// 0.750000
+0x3a00
+// 0.625000
+0x3900
+// 0.500000
+0x3800
+// 0.375000
+0x3600
+// 0.250000
+0x3400
+// 0.125000
+0x3000
+// 0.777778
+0x3a39
+// 0.666667
+0x3955
+// 0.555556
+0x3872
+// 0.444444
+0x371c
+// 0.333333
+0x3555
+// 0.222222
+0x331c
+// 0.111111
+0x2f1c
+// 0.800000
+0x3a66
+// 0.700000
+0x399a
+// 0.600000
+0x38cd
+// 0.500000
+0x3800
+// 0.400000
+0x3666
+// 0.300000
+0x34cd
+// 0.200000
+0x3266
+// 0.100000
+0x2e66
+// 0.846154
+0x3ac5
+// 0.769231
+0x3a27
+// 0.692308
+0x398a
+// 0.615385
+0x38ec
+// 0.538462
+0x384f
+// 0.461538
+0x3762
+// 0.384615
+0x3627
+// 0.307692
+0x34ec
+// 0.230769
+0x3362
+// 0.153846
+0x30ec
+// 0.076923
+0x2cec
+// 0.925926
+0x3b68
+// 0.888889
+0x3b1c
+// 0.851852
+0x3ad1
+// 0.814815
+0x3a85
+// 0.777778
+0x3a39
+// 0.740741
+0x39ed
+// 0.703704
+0x39a1
+// 0.666667
+0x3955
+// 0.629630
+0x3909
+// 0.592593
+0x38be
+// 0.555556
+0x3872
+// 0.518519
+0x3826
+// 0.481481
+0x37b4
+// 0.444444
+0x371c
+// 0.407407
+0x3685
+// 0.370370
+0x35ed
+// 0.333333
+0x3555
+// 0.296296
+0x34be
+// 0.259259
+0x3426
+// 0.222222
+0x331c
+// 0.185185
+0x31ed
+// 0.148148
+0x30be
+// 0.111111
+0x2f1c
+// 0.074074
+0x2cbe
+// 0.037037
+0x28be
+// 0.500000
+0x3800
+// 0.250000
+0x3400
+// 0.600000
+0x38cd
+// 0.400000
+0x3666
+// 0.200000
+0x3266
+// 0.666667
+0x3955
+// 0.500000
+0x3800
+// 0.333333
+0x3555
+// 0.166667
+0x3155
+// 0.714286
+0x39b7
+// 0.571429
+0x3892
+// 0.428571
+0x36db
+// 0.285714
+0x3492
+// 0.142857
+0x3092
+// 0.750000
+0x3a00
+// 0.625000
+0x3900
+// 0.500000
+0x3800
+// 0.375000
+0x3600
+// 0.250000
+0x3400
+// 0.125000
+0x3000
+// 0.777778
+0x3a39
+// 0.666667
+0x3955
+// 0.555556
+0x3872
+// 0.444444
+0x371c
+// 0.333333
+0x3555
+// 0.222222
+0x331c
+// 0.111111
+0x2f1c
+// 0.800000
+0x3a66
+// 0.700000
+0x399a
+// 0.600000
+0x38cd
+// 0.500000
+0x3800
+// 0.400000
+0x3666
+// 0.300000
+0x34cd
+// 0.200000
+0x3266
+// 0.100000
+0x2e66
+// 0.846154
+0x3ac5
+// 0.769231
+0x3a27
+// 0.692308
+0x398a
+// 0.615385
+0x38ec
+// 0.538462
+0x384f
+// 0.461538
+0x3762
+// 0.384615
+0x3627
+// 0.307692
+0x34ec
+// 0.230769
+0x3362
+// 0.153846
+0x30ec
+// 0.076923
+0x2cec
+// 0.925926
+0x3b68
+// 0.888889
+0x3b1c
+// 0.851852
+0x3ad1
+// 0.814815
+0x3a85
+// 0.777778
+0x3a39
+// 0.740741
+0x39ed
+// 0.703704
+0x39a1
+// 0.666667
+0x3955
+// 0.629630
+0x3909
+// 0.592593
+0x38be
+// 0.555556
+0x3872
+// 0.518519
+0x3826
+// 0.481481
+0x37b4
+// 0.444444
+0x371c
+// 0.407407
+0x3685
+// 0.370370
+0x35ed
+// 0.333333
+0x3555
+// 0.296296
+0x34be
+// 0.259259
+0x3426
+// 0.222222
+0x331c
+// 0.185185
+0x31ed
+// 0.148148
+0x30be
+// 0.111111
+0x2f1c
+// 0.074074
+0x2cbe
+// 0.037037
+0x28be
--- a/Testing/Patterns/DSP/Filtering/FIR/FIRF16/FirConfigs1_s16.txt
+++ b/Testing/Patterns/DSP/Filtering/FIR/FIRF16/FirConfigs1_s16.txt
@ -0,0 +1,254 @@
+H
+126
+// 1
+0x0001
+// 2
+0x0002
+// 1
+0x0001
+// 3
+0x0003
+// 1
+0x0001
+// 4
+0x0004
+// 1
+0x0001
+// 5
+0x0005
+// 1
+0x0001
+// 6
+0x0006
+// 1
+0x0001
+// 7
+0x0007
+// 1
+0x0001
+// 8
+0x0008
+// 1
+0x0001
+// 11
+0x000B
+// 1
+0x0001
+// 25
+0x0019
+// 2
+0x0002
+// 2
+0x0002
+// 2
+0x0002
+// 3
+0x0003
+// 2
+0x0002
+// 4
+0x0004
+// 2
+0x0002
+// 5
+0x0005
+// 2
+0x0002
+// 6
+0x0006
+// 2
+0x0002
+// 7
+0x0007
+// 2
+0x0002
+// 8
+0x0008
+// 2
+0x0002
+// 11
+0x000B
+// 2
+0x0002
+// 25
+0x0019
+// 3
+0x0003
+// 2
+0x0002
+// 3
+0x0003
+// 3
+0x0003
+// 3
+0x0003
+// 4
+0x0004
+// 3
+0x0003
+// 5
+0x0005
+// 3
+0x0003
+// 6
+0x0006
+// 3
+0x0003
+// 7
+0x0007
+// 3
+0x0003
+// 8
+0x0008
+// 3
+0x0003
+// 11
+0x000B
+// 3
+0x0003
+// 25
+0x0019
+// 12
+0x000C
+// 2
+0x0002
+// 12
+0x000C
+// 3
+0x0003
+// 12
+0x000C
+// 4
+0x0004
+// 12
+0x000C
+// 5
+0x0005
+// 12
+0x000C
+// 6
+0x0006
+// 12
+0x000C
+// 7
+0x0007
+// 12
+0x000C
+// 8
+0x0008
+// 12
+0x000C
+// 11
+0x000B
+// 12
+0x000C
+// 25
+0x0019
+// 13
+0x000D
+// 2
+0x0002
+// 13
+0x000D
+// 3
+0x0003
+// 13
+0x000D
+// 4
+0x0004
+// 13
+0x000D
+// 5
+0x0005
+// 13
+0x000D
+// 6
+0x0006
+// 13
+0x000D
+// 7
+0x0007
+// 13
+0x000D
+// 8
+0x0008
+// 13
+0x000D
+// 11
+0x000B
+// 13
+0x000D
+// 25
+0x0019
+// 14
+0x000E
+// 2
+0x0002
+// 14
+0x000E
+// 3
+0x0003
+// 14
+0x000E
+// 4
+0x0004
+// 14
+0x000E
+// 5
+0x0005
+// 14
+0x000E
+// 6
+0x0006
+// 14
+0x000E
+// 7
+0x0007
+// 14
+0x000E
+// 8
+0x0008
+// 14
+0x000E
+// 11
+0x000B
+// 14
+0x000E
+// 25
+0x0019
+// 15
+0x000F
+// 2
+0x0002
+// 15
+0x000F
+// 3
+0x0003
+// 15
+0x000F
+// 4
+0x0004
+// 15
+0x000F
+// 5
+0x0005
+// 15
+0x000F
+// 6
+0x0006
+// 15
+0x000F
+// 7
+0x0007
+// 15
+0x000F
+// 8
+0x0008
+// 15
+0x000F
+// 11
+0x000B
+// 15
+0x000F
+// 25
+0x0019
--- a/Testing/Patterns/DSP/Filtering/FIR/FIRF16/FirInput1_f16.txt
+++ b/Testing/Patterns/DSP/Filtering/FIR/FIRF16/FirInput1_f16.txt
@ -0,0 +1,94 @@
+H
+46
+// -0.000700
+0x91bd
+// 0.021809
+0x2595
+// 0.027493
+0x270a
+// 0.026437
+0x26c5
+// 0.007935
+0x2010
+// -0.007331
+0x9f82
+// -0.024370
+0xa63d
+// -0.030877
+0xa7e8
+// -0.018988
+0xa4dc
+// -0.004488
+0x9c98
+// 0.015286
+0x23d4
+// 0.030232
+0x27bd
+// 0.028046
+0x272e
+// 0.009504
+0x20de
+// -0.008154
+0xa02d
+// -0.023052
+0xa5e7
+// -0.029021
+0xa76e
+// -0.023819
+0xa619
+// 0.000666
+0x1174
+// 0.018105
+0x24a3
+// 0.027556
+0x270e
+// 0.024663
+0x2650
+// 0.009321
+0x20c6
+// -0.006156
+0x9e4e
+// -0.025390
+0xa680
+// -0.029759
+0xa79e
+// -0.014619
+0xa37c
+// -0.001256
+0x9525
+// 0.022857
+0x25da
+// 0.030488
+0x27ce
+// 0.028626
+0x2754
+// 0.012083
+0x2230
+// -0.010159
+0xa134
+// -0.024086
+0xa62a
+// -0.033333
+0xa844
+// -0.020435
+0xa53b
+// -0.001083
+0x946f
+// 0.019899
+0x2518
+// 0.028454
+0x2749
+// 0.026614
+0x26d0
+// 0.006058
+0x1e34
+// -0.011197
+0xa1bc
+// -0.025509
+0xa688
+// -0.031663
+0xa80e
+// -0.021418
+0xa57c
+// 0.004721
+0x1cd6
--- a/Testing/Patterns/DSP/Filtering/FIR/FIRF16/FirRefs1_f16.txt
+++ b/Testing/Patterns/DSP/Filtering/FIR/FIRF16/FirRefs1_f16.txt
--- a/Testing/Patterns/DSP/Filtering/FIR/FIRF16/Refs1_f16.txt
+++ b/Testing/Patterns/DSP/Filtering/FIR/FIRF16/Refs1_f16.txt
@ -0,0 +1,514 @@
+H
+256
+// 0.383858
+0x3624
+// -0.360281
+0xb5c4
+// -0.108248
+0xaeee
+// -0.356475
+0xb5b4
+// 0.093465
+0x2dfb
+// -0.410823
+0xb693
+// -0.397555
+0xb65c
+// 0.760141
+0x3a15
+// -0.311272
+0xb4fb
+// 0.232246
+0x336f
+// -0.794597
+0xba5b
+// 0.338903
+0x356c
+// 0.571877
+0x3893
+// -0.367889
+0xb5e3
+// -0.026122
+0xa6b0
+// 0.036031
+0x289d
+// -0.168775
+0xb167
+// 0.001169
+0x14ca
+// 0.631633
+0x390e
+// 0.067260
+0x2c4e
+// 0.214385
+0x32dc
+// -0.219711
+0xb308
+// -0.196264
+0xb248
+// 0.251620
+0x3407
+// 0.087506
+0x2d9a
+// 0.043158
+0x2986
+// 0.357896
+0x35ba
+// -0.233053
+0xb375
+// 0.652629
+0x3939
+// 0.319134
+0x351b
+// 0.231096
+0x3365
+// 1.000000
+0x3c00
+// 0.103619
+0x2ea2
+// -0.053190
+0xaacf
+// -0.030475
+0xa7cd
+// 0.506094
+0x380c
+// -0.085849
+0xad7f
+// 0.996009
+0x3bf8
+// -0.257541
+0xb41f
+// -0.277745
+0xb472
+// 0.289254
+0x34a1
+// -0.240127
+0xb3af
+// 0.302976
+0x34d9
+// -0.059987
+0xabae
+// 0.326989
+0x353b
+// -0.299167
+0xb4c9
+// 0.063774
+0x2c15
+// -0.137762
+0xb069
+// 0.188525
+0x3208
+// -0.766262
+0xba21
+// -0.096278
+0xae29
+// 0.188865
+0x320b
+// -0.633632
+0xb912
+// -0.283286
+0xb488
+// -0.180676
+0xb1c8
+// 0.290336
+0x34a5
+// 0.059201
+0x2b94
+// 0.392815
+0x3649
+// 0.291631
+0x34ab
+// 0.139361
+0x3076
+// 0.015437
+0x23e7
+// -0.715859
+0xb9ba
+// -0.228692
+0xb351
+// 0.068302
+0x2c5f
+// -0.526484
+0xb836
+// 0.651278
+0x3936
+// 0.329696
+0x3546
+// 0.505235
+0x380b
+// -0.335248
+0xb55d
+// 0.072160
+0x2c9e
+// -0.303078
+0xb4d9
+// -0.104510
+0xaeb0
+// 0.233797
+0x337b
+// -0.489524
+0xb7d5
+// 0.139492
+0x3077
+// 0.109968
+0x2f0a
+// -0.176948
+0xb1aa
+// 0.482679
+0x37b9
+// 0.129985
+0x3029
+// 0.211463
+0x32c4
+// -0.037170
+0xa8c2
+// -0.375432
+0xb602
+// 0.063445
+0x2c0f
+// 0.129749
+0x3027
+// -0.250616
+0xb403
+// 0.408141
+0x3688
+// 0.520295
+0x382a
+// 0.285543
+0x3492
+// 0.045666
+0x29d8
+// -0.157962
+0xb10e
+// -0.034793
+0xa874
+// -0.235224
+0xb387
+// 0.271029
+0x3456
+// 0.249988
+0x3400
+// 0.109455
+0x2f01
+// 0.359216
+0x35bf
+// -0.034223
+0xa861
+// -0.613673
+0xb8e9
+// 0.088313
+0x2da7
+// -0.205604
+0xb294
+// 0.428466
+0x36db
+// 0.379569
+0x3613
+// -0.299336
+0xb4ca
+// 0.458983
+0x3758
+// 0.651217
+0x3936
+// -0.270324
+0xb453
+// 0.190474
+0x3218
+// 0.556901
+0x3875
+// -0.782642
+0xba43
+// -0.121448
+0xafc6
+// -0.212072
+0xb2c9
+// -0.600759
+0xb8ce
+// -0.131040
+0xb031
+// 0.589764
+0x38b8
+// 0.041692
+0x2956
+// 0.419055
+0x36b4
+// 0.100536
+0x2e6f
+// -0.183686
+0xb1e1
+// -0.339428
+0xb56e
+// 0.521166
+0x382b
+// -0.388635
+0xb638
+// 0.143214
+0x3095
+// 0.598850
+0x38ca
+// 0.002518
+0x1928
+// -0.034847
+0xa876
+// 0.238035
+0x339e
+// -0.177845
+0xb1b1
+// -0.322159
+0xb528
+// 0.140149
+0x307c
+// 0.329944
+0x3547
+// -0.107482
+0xaee1
+// -0.188600
+0xb209
+// -0.275420
+0xb468
+// 0.068589
+0x2c64
+// 0.221843
+0x3319
+// 0.367855
+0x35e3
+// -0.213981
+0xb2d9
+// 0.029023
+0x276e
+// -0.238657
+0xb3a3
+// -0.251368
+0xb406
+// 0.014687
+0x2385
+// -0.592525
+0xb8bd
+// -0.043726
+0xa999
+// -0.324220
+0xb530
+// 0.502311
+0x3805
+// 0.354737
+0x35ad
+// -0.292617
+0xb4af
+// -0.118101
+0xaf8f
+// 0.206575
+0x329c
+// -0.050570
+0xaa79
+// 0.202377
+0x327a
+// 0.173775
+0x3190
+// 0.306190
+0x34e6
+// -0.363015
+0xb5cf
+// -0.222011
+0xb31b
+// 0.047552
+0x2a16
+// 0.223897
+0x332a
+// -0.141249
+0xb085
+// -0.227664
+0xb349
+// 0.641983
+0x3923
+// 0.047110
+0x2a08
+// -0.190337
+0xb217
+// 0.605096
+0x38d7
+// 0.525760
+0x3835
+// -0.032498
+0xa829
+// -0.189156
+0xb20e
+// -0.229228
+0xb356
+// 0.698481
+0x3996
+// -0.017210
+0xa468
+// -0.408740
+0xb68a
+// 0.375436
+0x3602
+// -0.305202
+0xb4e2
+// -0.055640
+0xab1f
+// 0.213179
+0x32d2
+// 0.344974
+0x3585
+// 0.389034
+0x3639
+// -0.385519
+0xb62b
+// -0.605079
+0xb8d7
+// -0.402558
+0xb671
+// 0.079455
+0x2d16
+// 0.230710
+0x3362
+// -0.869931
+0xbaf6
+// 0.533997
+0x3846
+// -0.278426
+0xb474
+// -0.246206
+0xb3e1
+// 0.536629
+0x384b
+// 0.260969
+0x342d
+// 0.422810
+0x36c4
+// -0.358079
+0xb5bb
+// -0.201306
+0xb271
+// -0.137197
+0xb064
+// 0.106299
+0x2ece
+// 0.405284
+0x367c
+// -0.395406
+0xb654
+// -0.270691
+0xb455
+// 0.077241
+0x2cf2
+// -0.108960
+0xaef9
+// -0.475029
+0xb79a
+// 0.235909
+0x338d
+// 0.240788
+0x33b5
+// 0.118769
+0x2f9a
+// 0.885914
+0x3b16
+// -0.309430
+0xb4f3
+// 0.024907
+0x2660
+// 0.805678
+0x3a72
+// -0.223015
+0xb323
+// -0.274012
+0xb462
+// 0.560700
+0x387c
+// 0.253083
+0x340d
+// -0.208701
+0xb2ae
+// 0.793855
+0x3a5a
+// 0.092311
+0x2de8
+// 0.000585
+0x10cb
+// 0.113034
+0x2f3c
+// 0.385109
+0x3629
+// -0.138169
+0xb06c
+// -0.037009
+0xa8bd
+// -0.762359
+0xba19
+// 0.518755
+0x3826
+// -0.285450
+0xb491
+// 0.195220
+0x323f
+// 0.043038
+0x2982
+// 0.092252
+0x2de7
+// 0.592847
+0x38be
+// -0.179904
+0xb1c2
+// 0.316811
+0x3512
+// 0.756446
+0x3a0d
+// -0.121211
+0xafc2
+// -0.124608
+0xaffa
+// -0.101613
+0xae81
+// 0.001223
+0x1503
+// 0.517774
+0x3824
+// -0.512054
+0xb819
+// 0.138212
+0x306c
+// -0.518981
+0xb827
+// 0.048176
+0x2a2b
+// -0.119868
+0xafac
+// 0.246059
+0x33e0
+// 0.233722
+0x337b
+// -0.217484
+0xb2f6
+// -0.210270
+0xb2bb
+// 0.151679
+0x30db
+// 0.303623
+0x34dc
+// 0.318946
+0x351a
+// 0.219740
+0x3308
+// 0.335878
+0x3560
+// 0.158669
+0x3114
+// -0.202058
+0xb277
+// -0.591399
+0xb8bb
+// -0.241789
+0xb3bd
+// 0.118708
+0x2f99
+// 0.025184
+0x2672
+// 0.495268
+0x37ed
+// 0.283356
+0x3489
+// -0.354557
+0xb5ac
+// 0.525900
+0x3835
--- a/Testing/Patterns/DSP/Filtering/FIR/FIRF16/Samples1_f16.txt
+++ b/Testing/Patterns/DSP/Filtering/FIR/FIRF16/Samples1_f16.txt
@ -0,0 +1,514 @@
+H
+256
+// 0.249984
+0x3400
+// -0.183097
+0xb1dc
+// -0.344601
+0xb583
+// 0.098066
+0x2e47
+// -0.400134
+0xb667
+// 0.319700
+0x351d
+// -0.329309
+0xb545
+// 0.403234
+0x3674
+// 0.190989
+0x321d
+// 0.182858
+0x31da
+// -0.332209
+0xb551
+// -0.034188
+0xa860
+// -0.261380
+0xb42f
+// -0.123044
+0xafe0
+// 0.156372
+0x3101
+// 0.122502
+0x2fd7
+// -0.345881
+0xb589
+// -0.451564
+0xb73a
+// -0.797421
+0xba61
+// 0.159252
+0x3119
+// -0.113934
+0xaf4b
+// -0.177704
+0xb1b0
+// 0.132971
+0x3041
+// -0.175539
+0xb19e
+// -0.115232
+0xaf60
+// -0.634750
+0xb914
+// -0.451905
+0xb73b
+// 0.027005
+0x26ea
+// -0.319930
+0xb51e
+// 0.158835
+0x3115
+// 0.428410
+0x36db
+// 0.028448
+0x2748
+// -0.047753
+0xaa1d
+// 0.383866
+0x3624
+// -0.040763
+0xa938
+// -0.078810
+0xad0b
+// 0.338680
+0x356b
+// 0.448964
+0x372f
+// -0.469767
+0xb784
+// -0.226947
+0xb343
+// -0.178926
+0xb1ba
+// -0.119506
+0xafa6
+// -0.254654
+0xb413
+// -0.627118
+0xb904
+// 0.169415
+0x316c
+// 0.034287
+0x2864
+// 0.302811
+0x34d8
+// -0.345401
+0xb587
+// -0.031438
+0xa806
+// -0.001290
+0x9549
+// 0.339376
+0x356e
+// -0.136957
+0xb062
+// -0.218204
+0xb2fc
+// -0.531362
+0xb840
+// -0.225418
+0xb337
+// -0.490645
+0xb7da
+// 0.189584
+0x3211
+// 0.134361
+0x304d
+// -0.133553
+0xb046
+// 0.299372
+0x34ca
+// 0.053582
+0x2adc
+// -0.238793
+0xb3a4
+// -0.011138
+0xa1b4
+// -0.025163
+0xa671
+// -0.141425
+0xb087
+// 0.155496
+0x30fa
+// -0.843599
+0xbac0
+// 0.404869
+0x367a
+// 0.279935
+0x347b
+// -0.171104
+0xb17a
+// 0.798979
+0x3a64
+// 0.419563
+0x36b7
+// -0.046483
+0xa9f3
+// 0.164040
+0x3140
+// -0.343224
+0xb57e
+// 0.355330
+0x35af
+// 0.249571
+0x33fc
+// 0.448248
+0x372c
+// -0.112597
+0xaf35
+// 0.002446
+0x1902
+// 0.512631
+0x381a
+// 0.390085
+0x363e
+// 0.031414
+0x2805
+// 0.290983
+0x34a8
+// -0.473040
+0xb792
+// -0.234847
+0xb384
+// -0.064832
+0xac26
+// -0.174688
+0xb197
+// 0.362366
+0x35cc
+// 0.274910
+0x3466
+// 0.004258
+0x1c5c
+// 0.108512
+0x2ef2
+// -0.285746
+0xb492
+// 0.025406
+0x2681
+// -0.093021
+0xadf4
+// -0.363160
+0xb5d0
+// 0.528219
+0x383a
+// 0.358702
+0x35bd
+// 0.306481
+0x34e7
+// -0.179833
+0xb1c1
+// -0.032838
+0xa834
+// -0.046751
+0xa9fc
+// 1.000000
+0x3c00
+// 0.308833
+0x34f1
+// -0.181706
+0xb1d1
+// 0.090539
+0x2dcb
+// 0.313973
+0x3506
+// 0.034424
+0x2868
+// -0.251419
+0xb406
+// -0.322614
+0xb529
+// -0.305809
+0xb4e5
+// 0.129173
+0x3022
+// -0.694126
+0xb98e
+// -0.250858
+0xb404
+// 0.059264
+0x2b96
+// -0.128325
+0xb01b
+// -0.159713
+0xb11c
+// 0.413051
+0x369c
+// 0.387051
+0x3631
+// 0.182918
+0x31da
+// -0.385751
+0xb62c
+// -0.224448
+0xb32f
+// 0.038416
+0x28eb
+// -0.396644
+0xb659
+// 0.263277
+0x3436
+// 0.298354
+0x34c6
+// -0.118298
+0xaf92
+// 0.059839
+0x2ba9
+// 0.015236
+0x23cd
+// -0.385397
+0xb62b
+// -0.050137
+0xaa6b
+// 0.225182
+0x3335
+// 0.091417
+0x2dda
+// -0.332301
+0xb551
+// 0.051487
+0x2a97
+// -0.338015
+0xb569
+// -0.138657
+0xb070
+// -0.062845
+0xac06
+// 0.239372
+0x33a9
+// -0.412823
+0xb69b
+// -0.847904
+0xbac9
+// 0.204396
+0x328a
+// -0.319114
+0xb51b
+// -0.042128
+0xa964
+// 0.555310
+0x3871
+// 0.238792
+0x33a4
+// -0.252236
+0xb409
+// -0.017146
+0xa464
+// -0.023100
+0xa5ea
+// 0.196936
+0x324d
+// -0.113372
+0xaf41
+// 0.220568
+0x330f
+// 0.220745
+0x3310
+// 0.142593
+0x3090
+// 0.202789
+0x327d
+// -0.089607
+0xadbc
+// 0.043039
+0x2982
+// -0.022674
+0xa5ce
+// -0.266567
+0xb444
+// 0.017297
+0x246e
+// 0.046384
+0x29f0
+// 0.086227
+0x2d85
+// -0.238717
+0xb3a4
+// 0.118165
+0x2f90
+// -0.112214
+0xaf2f
+// 0.124228
+0x2ff3
+// -0.032142
+0xa81d
+// -0.469288
+0xb782
+// -0.083456
+0xad57
+// -0.114390
+0xaf52
+// -0.376088
+0xb604
+// -0.256146
+0xb419
+// 0.564330
+0x3884
+// -0.455378
+0xb749
+// 0.037945
+0x28db
+// -0.086562
+0xad8a
+// -0.020960
+0xa55e
+// -0.052032
+0xaaa9
+// -0.020560
+0xa543
+// 0.181430
+0x31ce
+// -0.168339
+0xb163
+// 0.160549
+0x3123
+// 0.337014
+0x3564
+// -0.011247
+0xa1c2
+// 0.535964
+0x384a
+// -0.272158
+0xb45b
+// 0.296389
+0x34be
+// -0.177760
+0xb1b0
+// -0.417893
+0xb6b0
+// 0.489028
+0x37d3
+// -0.074416
+0xacc3
+// -0.218618
+0xb2ff
+// -0.061131
+0xabd3
+// -0.285138
+0xb490
+// -0.323742
+0xb52e
+// 0.128847
+0x3020
+// 0.263543
+0x3437
+// 0.234468
+0x3381
+// 0.057744
+0x2b64
+// 0.575727
+0x389b
+// 0.167712
+0x315e
+// -0.088527
+0xadaa
+// -0.849734
+0xbacc
+// -0.200050
+0xb267
+// 0.324250
+0x3530
+// 0.161737
+0x312d
+// 0.153342
+0x30e8
+// 0.302206
+0x34d6
+// 0.116038
+0x2f6d
+// -0.099881
+0xae64
+// 0.186304
+0x31f6
+// -0.223249
+0xb325
+// 0.025895
+0x26a1
+// -0.298294
+0xb4c6
+// -0.318191
+0xb517
+// 0.098580
+0x2e4f
+// 0.073843
+0x2cba
+// -0.385885
+0xb62d
+// 0.126810
+0x300f
+// 0.259714
+0x3428
+// -0.073260
+0xacb0
+// 0.077473
+0x2cf5
+// -0.363027
+0xb5cf
+// -0.003902
+0x9bfe
+// -0.234876
+0xb384
+// 0.374649
+0x35ff
+// -0.454872
+0xb747
+// -0.472326
+0xb78f
+// 0.230260
+0x335e
+// -0.576768
+0xb89d
+// -0.010350
+0xa14d
+// -0.147272
+0xb0b6
+// -0.017430
+0xa476
+// 0.242852
+0x33c5
+// -0.381067
+0xb619
+// 0.063783
+0x2c15
+// -0.257065
+0xb41d
+// -0.306546
+0xb4e8
+// -0.061511
+0xabe0
+// -0.162114
+0xb130
+// -0.046601
+0xa9f7
+// -0.430016
+0xb6e1
+// 0.296375
+0x34be
+// -0.323816
+0xb52e
+// -0.117019
+0xaf7d
+// -0.294586
+0xb4b7
+// -0.557569
+0xb876
+// -0.087492
+0xad99
+// -0.076313
+0xace2
+// -0.036039
+0xa89d
+// -0.620596
+0xb8f7
+// 0.160799
+0x3125
+// -0.250968
+0xb404
+// -0.121415
+0xafc5
+// 0.004643
+0x1cc1
+// 0.555032
+0x3871
--- a/Testing/Source/Tests/FIRF16.cpp
+++ b/Testing/Source/Tests/FIRF16.cpp
@ -0,0 +1,148 @@
+#include "FIRF16.h"
+#include <stdio.h>
+#include "Error.h"
+
+#define SNR_THRESHOLD 60
+
+/* 
+
+Reference patterns are generated with
+a double precision computation.
+
+*/
+#define REL_ERROR (1.0e-2)
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+static __ALIGNED(8) float16_t coeffArray[32];
+#endif 
+
+void checkInnerTail(float16_t *b)
+{
+    ASSERT_TRUE(b[0] == 0.0f);
+    ASSERT_TRUE(b[1] == 0.0f);
+    ASSERT_TRUE(b[2] == 0.0f);
+    ASSERT_TRUE(b[3] == 0.0f);
+}
+
+    void FIRF16::test_fir_f16()
+    {
+        
+
+        const int16_t *configp = configs.ptr();
+        float16_t *statep = state.ptr();
+        const float16_t *orgcoefsp = coefs.ptr();
+        
+        const float16_t *coefsp;
+        const float16_t *inputp = inputs.ptr();
+        float16_t *outp = output.ptr();
+
+        unsigned long i;
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+        int j;
+#endif
+        int blockSize;
+        int numTaps;
+
+        
+
+        /*
+
+        Python script is generating different tests with
+        different blockSize and numTaps.
+
+        We loop on those configs.
+
+        */
+        for(i=0; i < configs.nbSamples() ; i += 2)
+        {
+           blockSize = configp[0];
+           numTaps = configp[1];
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+           /* Copy coefficients and pad to zero 
+           */
+           memset(coeffArray,0,32);
+           for(j=0;j < numTaps; j++)
+           {
+              coeffArray[j] = orgcoefsp[j];
+           }
+   
+           coefsp = coeffArray;
+#else
+           coefsp = orgcoefsp;
+#endif
+
+           /*
+
+           The filter is initialized with the coefs, blockSize and numTaps.
+
+           */
+           arm_fir_init_f16(&this->S,numTaps,coefsp,statep,blockSize);
+
+           /*
+
+           Input pointer is reset since the same input pattern is used
+
+           */
+           inputp = inputs.ptr();
+
+           /*
+           
+           Python script is filtering a 2*blockSize number of samples.
+           We do the same filtering in two pass to check (indirectly that
+           the state management of the fir is working.)
+
+           */
+
+           arm_fir_f16(&this->S,inputp,outp,blockSize);
+           outp += blockSize;
+           checkInnerTail(outp);
+           
+           inputp += blockSize;
+           arm_fir_f16(&this->S,inputp,outp,blockSize);
+           outp += blockSize;
+           checkInnerTail(outp);
+
+           configp += 2;
+           orgcoefsp += numTaps;
+
+        }
+
+
+        ASSERT_EMPTY_TAIL(output);
+
+        ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
+
+        ASSERT_REL_ERROR(output,ref,REL_ERROR);
+
+    } 
+
+ 
+    void FIRF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr)
+    {
+      
+       (void)params;
+       
+       switch(id)
+       {
+        case FIRF16::TEST_FIR_F16_1:
+        break;
+
+       }
+      
+
+       inputs.reload(FIRF16::FIRINPUTS_F16_ID,mgr);
+       coefs.reload(FIRF16::FIRCOEFS_F16_ID,mgr);
+       configs.reload(FIRF16::FIRCONFIGS_S16_ID,mgr);
+       ref.reload(FIRF16::FIRREFS_F16_ID,mgr);
+
+       output.create(ref.nbSamples(),FIRF16::OUT_F16_ID,mgr);
+       /* Max blockSize + numTaps - 1 as generated by Python script */
+       state.create(47,FIRF16::OUT_F16_ID,mgr);
+    }
+
+    void FIRF16::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)
+    {
+        (void)id;
+        output.dump(mgr);
+    }
--- a/Testing/desc_f16.txt
+++ b/Testing/desc_f16.txt
@ -140,6 +140,34 @@ group Root {

        }

+        group Filtering Tests {
+           class = FilteringTests
+           folder = Filtering
+
+           group FIR {
+             class = FIR 
+             folder = FIR
+
+             suite FIR F16 {
+                class = FIRF16 
+                folder = FIRF16
+
+                Pattern FIRINPUTS_F16_ID : FirInput1_f16.txt
+                Pattern FIRCOEFS_F16_ID : FirCoefs1_f16.txt
+                Pattern FIRCONFIGS_S16_ID : FirConfigs1_s16.txt 
+
+                Pattern FIRREFS_F16_ID : FirRefs1_f16.txt
+
+                Output  OUT_F16_ID : Output
+
+                Functions {
+                  arm_fir_f16:test_fir_f16
+                }
+
+             }
+           }
+        }
+
        group Transform Tests {
           class = TransformTests
           folder = Transform 
--- a/Toolchain/GCC.cmake
+++ b/Toolchain/GCC.cmake
@ -15,7 +15,7 @@ function(compilerSpecificCompileOptions PROJECTNAME ROOT)
  # Add support for the type __fp16 even if there is no HW
  # support for it.
  if (FLOAT16)
-  target_compile_options(${PROJECTNAME} PUBLIC "-mfp16-format=alternative")
+  target_compile_options(${PROJECTNAME} PUBLIC "-mfp16-format=ieee")
  endif()

  if ((OPTIMIZED) AND (NOT DISABLEOPTIM))