CMSIS-DSP: Improvements to the float16 support for building.

6 years ago · c33adc9485
parent 1464ad53f7
commit c33adc9485
62 changed files with 15091 additions and 14444 deletions
--- a/Include/arm_common_tables.h
+++ b/Include/arm_common_tables.h
@ -31,6 +31,11 @@

 #include "arm_math.h"

+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
 #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
  /* Double Precision Float CFFT twiddles */
  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024)
@ -110,44 +115,6 @@
    #define twiddleCoef twiddleCoef_4096
  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */

-  /* F16 */
-  #if !defined(__CC_ARM)
-  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_16)
-    extern const float16_t twiddleCoefF16_16[32];
-  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_32)
-    extern const float16_t twiddleCoefF16_32[64];
-  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_64)
-    extern const float16_t twiddleCoefF16_64[128];
-  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_128)
-    extern const float16_t twiddleCoefF16_128[256];
-  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_256)
-    extern const float16_t twiddleCoefF16_256[512];
-  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_512)
-    extern const float16_t twiddleCoefF16_512[1024];
-  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_1024)
-    extern const float16_t twiddleCoefF16_1024[2048];
-  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_2048)
-    extern const float16_t twiddleCoefF16_2048[4096];
-  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-
-  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_4096)
-    extern const float16_t twiddleCoefF16_4096[8192];
-  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
-  #endif /* ARMAC5 */
  /* Q31 */

  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_16)
@ -553,5 +520,9 @@
 extern const unsigned char hwLUT[256];
 #endif /* (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) */

+#ifdef   __cplusplus
+}
+#endif
+
 #endif /*  ARM_COMMON_TABLES_H */

--- a/Include/arm_common_tables_f16.h
+++ b/Include/arm_common_tables_f16.h
@ -0,0 +1,90 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_common_tables_f16.h
+ * Description:  Extern declaration for common tables
+ *
+ * $Date:        27. January 2017
+ * $Revision:    V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _ARM_COMMON_TABLES_F16_H
+#define _ARM_COMMON_TABLES_F16_H
+
+#include "arm_math_f16.h"
+#include "arm_common_tables.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
+
+  /* F16 */
+  #if !defined(__CC_ARM) && defined(ARM_FLOAT16_SUPPORTED)
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_16)
+    extern const float16_t twiddleCoefF16_16[32];
+  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
+
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_32)
+    extern const float16_t twiddleCoefF16_32[64];
+  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
+
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_64)
+    extern const float16_t twiddleCoefF16_64[128];
+  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
+
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_128)
+    extern const float16_t twiddleCoefF16_128[256];
+  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
+
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_256)
+    extern const float16_t twiddleCoefF16_256[512];
+  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
+
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_512)
+    extern const float16_t twiddleCoefF16_512[1024];
+  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
+
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_1024)
+    extern const float16_t twiddleCoefF16_1024[2048];
+  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
+
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_2048)
+    extern const float16_t twiddleCoefF16_2048[4096];
+  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
+
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_4096)
+    extern const float16_t twiddleCoefF16_4096[8192];
+  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
+  #endif /* ARMAC5 */
+    
+#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /*  _ARM_COMMON_TABLES_F16_H */
+
+  
--- a/Include/arm_const_structs.h
+++ b/Include/arm_const_structs.h
@ -33,6 +33,10 @@
 #include "arm_math.h"
 #include "arm_common_tables.h"

+#ifdef   __cplusplus
+extern "C"
+{
+#endif
   extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len16;
   extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len32;
   extern const arm_cfft_instance_f64 arm_cfft_sR_f64_len64;
@ -53,18 +57,6 @@
   extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len2048;
   extern const arm_cfft_instance_f32 arm_cfft_sR_f32_len4096;

-   #if !defined(__CC_ARM)
-   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len16;
-   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len32;
-   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len64;
-   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len128;
-   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len256;
-   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len512;
-   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len1024;
-   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len2048;
-   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len4096;
-   #endif
-
   extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len16;
   extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len32;
   extern const arm_cfft_instance_q31 arm_cfft_sR_q31_len64;
@ -85,4 +77,9 @@
   extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len2048;
   extern const arm_cfft_instance_q15 arm_cfft_sR_q15_len4096;

+#ifdef   __cplusplus
+}
 #endif
+
+#endif
+
--- a/Include/arm_const_structs_f16.h
+++ b/Include/arm_const_structs_f16.h
@ -0,0 +1,57 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_const_structs_f16.h
+ * Description:  Constant structs that are initialized for user convenience.
+ *               For example, some can be given as arguments to the arm_cfft_f16() function.
+ *
+ * $Date:        20. April 2020
+ * $Revision:    V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _ARM_CONST_STRUCTS_F16_H
+#define _ARM_CONST_STRUCTS_F16_H
+
+#include "arm_math_f16.h"
+#include "arm_common_tables_f16.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if !defined(__CC_ARM) && defined(ARM_FLOAT16_SUPPORTED)
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len16;
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len32;
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len64;
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len128;
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len256;
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len512;
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len1024;
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len2048;
+   extern const arm_cfft_instance_f16 arm_cfft_sR_f16_len4096;
+#endif
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif
--- a/Include/arm_helium_utils.h
+++ b/Include/arm_helium_utils.h
@ -29,6 +29,11 @@
 #ifndef _ARM_UTILS_HELIUM_H_
 #define _ARM_UTILS_HELIUM_H_

+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
 /***************************************

 Definitions available for MVEF and MVEI
@ -360,4 +365,8 @@ __STATIC_INLINE q15x8_t FAST_VSQRT_Q15(q15x8_t vecIn)

 #endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEI) */

+#ifdef   __cplusplus
+}
 #endif
+
+#endif
--- a/Include/arm_math.h
+++ b/Include/arm_math.h
@ -396,6 +396,11 @@ extern "C"

 #if defined(ARM_MATH_NEON)
 #include <arm_neon.h>
+#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+  #if !defined(ARM_MATH_NEON_FLOAT16)
+  #define ARM_MATH_NEON_FLOAT16
+  #endif
+#endif
 #endif

 #if !defined(ARM_MATH_AUTOVECTORIZE)
@ -410,13 +415,14 @@ extern "C"
  #if !defined(ARM_MATH_MVEF)
    #define ARM_MATH_MVEF
  #endif
-  #if !defined(ARM_MATH_FLOAT16)
-    #define ARM_MATH_FLOAT16
+  #if !defined(ARM_MATH_MVE_FLOAT16)
+    #define ARM_MATH_MVE_FLOAT16
  #endif
 #endif
-  
+
 #endif /*!defined(ARM_MATH_AUTOVECTORIZE)*/

+
 #if defined (ARM_MATH_HELIUM)
  #if !defined(ARM_MATH_MVEF)
    #define ARM_MATH_MVEF
@ -426,15 +432,23 @@ extern "C"
    #define ARM_MATH_MVEI
  #endif

-  #if !defined(ARM_MATH_FLOAT16)
-    #define ARM_MATH_FLOAT16
+  #if !defined(ARM_MATH_MVE_FLOAT16)
+    #define ARM_MATH_MVE_FLOAT16
  #endif
 #endif

+#ifdef   __cplusplus
+}
+#endif
+
 #if __ARM_FEATURE_MVE
 #include <arm_mve.h>
 #endif

+#ifdef   __cplusplus
+extern "C"
+{
+#endif
 /**
   * @brief 8-bit fractional data type in 1.7 format.
   */
@ -455,17 +469,6 @@ extern "C"
   */
  typedef int64_t q63_t;

-  /**
-   * @brief 16-bit floating-point type definition.
-   * This is already defined in arm_mve.h
-   *
-   * This is not fully supported on ARM AC5.
-   */
-
-#if !defined( __CC_ARM ) && !(__ARM_FEATURE_MVE & 2)
-  typedef __fp16 float16_t;
-#endif
-
  /**
   * @brief 32-bit floating-point type definition.
   */
@ -569,13 +572,6 @@ extern "C"
   */
  typedef float32x4_t f32x4_t;

-#if defined(ARM_MATH_FLOAT16)
-  /**
-   * @brief 16-bit floating-point 128-bit vector data type
-   */
-  typedef __ALIGNED(2) float16x8_t f16x8_t;
-#endif
-
  /**
   * @brief 32-bit floating-point 128-bit vector pair data type
   */
@ -586,18 +582,6 @@ extern "C"
   */
  typedef float32x4x4_t f32x4x4_t;

-#if defined(ARM_MATH_FLOAT16)
-  /**
-   * @brief 16-bit floating-point 128-bit vector pair data type
-   */
-  typedef float16x8x2_t f16x8x2_t;
-
-  /**
-   * @brief 16-bit floating-point 128-bit vector quadruplet data type
-   */
-  typedef float16x8x4_t f16x8x4_t;
-#endif
-
  /**
   * @brief 32-bit ubiquitous 128-bit vector data type
   */
@ -607,17 +591,6 @@ extern "C"
      int32x4_t       i;
  } any32x4_t;

-#if defined(ARM_MATH_FLOAT16)
-  /**
-   * @brief 16-bit ubiquitous 128-bit vector data type
-   */
-  typedef union _any16x8_t
-  {
-      float16x8_t     f;
-      int16x8_t       i;
-  } any16x8_t;
-#endif
-
 #endif

 #if defined(ARM_MATH_NEON)
@ -641,24 +614,11 @@ extern "C"
   */
  typedef float32x2_t  f32x2_t;

-#if defined(ARM_MATH_FLOAT16)
-  /**
-   * @brief 16-bit float 64-bit vector data type.
-   */
-  typedef  __ALIGNED(2) float16x4_t f16x4_t;
-#endif 
-
  /**
   * @brief 32-bit floating-point 128-bit vector triplet data type
   */
  typedef float32x4x3_t f32x4x3_t;

-#if defined(ARM_MATH_FLOAT16)
-  /**
-   * @brief 16-bit floating-point 128-bit vector triplet data type
-   */
-  typedef float16x8x3_t f16x8x3_t;
-#endif

  /**
   * @brief 32-bit fractional 128-bit vector triplet data type in 1.31 format
@ -690,22 +650,6 @@ extern "C"
   */
  typedef float32x2x4_t f32x2x4_t;

-#if defined(ARM_MATH_FLOAT16)
-  /**
-   * @brief 16-bit floating-point 64-bit vector pair data type
-   */
-  typedef float16x4x2_t f16x4x2_t;
-
-  /**
-   * @brief 16-bit floating-point 64-bit vector triplet data type
-   */
-  typedef float16x4x3_t f16x4x3_t;
-
-  /**
-   * @brief 16-bit floating-point 64-bit vector quadruplet data type
-   */
-  typedef float16x4x4_t f16x4x4_t;
-#endif 

  /**
   * @brief 32-bit fractional 64-bit vector pair data type in 1.31 format
@ -761,16 +705,6 @@ extern "C"
      int32x2_t       i;
  } any32x2_t;

-#if defined(ARM_MATH_FLOAT16)
-  /**
-   * @brief 16-bit ubiquitous 64-bit vector data type
-   */
-  typedef union _any16x4_t
-  {
-      float16x4_t     f;
-      int16x4_t       i;
-  } any16x4_t;
-#endif 

  /**
   * @brief 32-bit status 64-bit vector data type.
@ -796,30 +730,21 @@ extern "C"
 #define F64_MAX   ((float64_t)DBL_MAX)
 #define F32_MAX   ((float32_t)FLT_MAX)

-#if !defined( __CC_ARM )
-#define F16_MAX   ((float16_t)FLT_MAX)
-#endif
+

 #define F64_MIN   (-DBL_MAX)
 #define F32_MIN   (-FLT_MAX)

-#if !defined( __CC_ARM )
-#define F16_MIN   (-(float16_t)FLT_MAX)
-#endif
+

 #define F64_ABSMAX   ((float64_t)DBL_MAX)
 #define F32_ABSMAX   ((float32_t)FLT_MAX)

-#if !defined( __CC_ARM )
-#define F16_ABSMAX   ((float16_t)FLT_MAX)
-#endif
+

 #define F64_ABSMIN   ((float64_t)0.0)
 #define F32_ABSMIN   ((float32_t)0.0)

-#if !defined( __CC_ARM )
-#define F16_ABSMIN   ((float16_t)0.0)
-#endif

 #define Q31_MAX   ((q31_t)(0x7FFFFFFFL))
 #define Q15_MAX   ((q15_t)(0x7FFF))
@ -2974,20 +2899,6 @@ void arm_mat_init_f32(
        float32_t * pDst,
        uint32_t blockSize);

-#if !defined( __CC_ARM )
-  /**
-   * @brief Floating-point vector multiplication.
-   * @param[in]  pSrcA      points to the first input vector
-   * @param[in]  pSrcB      points to the second input vector
-   * @param[out] pDst       points to the output vector
-   * @param[in]  blockSize  number of samples in each vector
-   */
-  void arm_mult_f16(
-  const float16_t * pSrcA,
-  const float16_t * pSrcB,
-        float16_t * pDst,
-        uint32_t blockSize);
-#endif

  /**
   * @brief Instance structure for the Q15 CFFT/CIFFT function.
@ -3109,23 +3020,6 @@ void arm_mat_init_f32(
          float32_t onebyfftLen;             /**< value of 1/fftLen. */
  } arm_cfft_radix2_instance_f32;

-  /**
-   * @brief Instance structure for the floating-point CFFT/CIFFT function.
-   */
-
-#if !defined( __CC_ARM )
-  typedef struct
-  {
-          uint16_t fftLen;                   /**< length of the FFT. */
-          uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
-          uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    const float16_t *pTwiddle;               /**< points to the Twiddle factor table. */
-    const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
-          uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-          uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
-          float16_t onebyfftLen;             /**< value of 1/fftLen. */
-  } arm_cfft_radix2_instance_f16;
-#endif

 /* Deprecated */
  arm_status arm_cfft_radix2_init_f32(
@ -3154,22 +3048,7 @@ void arm_mat_init_f32(
          float32_t onebyfftLen;             /**< value of 1/fftLen. */
  } arm_cfft_radix4_instance_f32;

-  /**
-   * @brief Instance structure for the floating-point CFFT/CIFFT function.
-   */
-#if !defined( __CC_ARM )
-  typedef struct
-  {
-          uint16_t fftLen;                   /**< length of the FFT. */
-          uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
-          uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    const float16_t *pTwiddle;               /**< points to the Twiddle factor table. */
-    const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
-          uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-          uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
-          float16_t onebyfftLen;             /**< value of 1/fftLen. */
-  } arm_cfft_radix4_instance_f16;
-#endif
+

 /* Deprecated */
  arm_status arm_cfft_radix4_init_f32(
@ -3260,26 +3139,7 @@ void arm_cfft_q31(
 #endif
  } arm_cfft_instance_f32;

-  /**
-   * @brief Instance structure for the floating-point CFFT/CIFFT function.
-   */
-#if !defined( __CC_ARM )
-  typedef struct
-  {
-          uint16_t fftLen;                   /**< length of the FFT. */
-    const float16_t *pTwiddle;         /**< points to the Twiddle factor table. */
-    const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
-          uint16_t bitRevLength;             /**< bit reversal table length. */
-#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-   const uint32_t *rearranged_twiddle_tab_stride1_arr;        /**< Per stage reordered twiddle pointer (offset 1) */                                                       \
-   const uint32_t *rearranged_twiddle_tab_stride2_arr;        /**< Per stage reordered twiddle pointer (offset 2) */                                                       \
-   const uint32_t *rearranged_twiddle_tab_stride3_arr;        /**< Per stage reordered twiddle pointer (offset 3) */                                                       \
-   const float16_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */                                                                   \
-   const float16_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */                                                                   \
-   const float16_t *rearranged_twiddle_stride3;
-#endif
-  } arm_cfft_instance_f16;
-#endif
+

  arm_status arm_cfft_init_f32(
  arm_cfft_instance_f32 * S,
@ -3291,17 +3151,6 @@ void arm_cfft_q31(
        uint8_t ifftFlag,
        uint8_t bitReverseFlag);

-#if !defined( __CC_ARM )
-  arm_status arm_cfft_init_f16(
-  arm_cfft_instance_f16 * S,
-  uint16_t fftLen);
-
-  void arm_cfft_f16(
-  const arm_cfft_instance_f16 * S,
-        float16_t * p1,
-        uint8_t ifftFlag,
-        uint8_t bitReverseFlag);
-#endif

  /**
   * @brief Instance structure for the Double Precision Floating-point CFFT/CIFFT function.
@ -3601,20 +3450,7 @@ arm_status arm_rfft_fast_init_f32 (
        float32_t * pDst,
        uint32_t blockSize);

-#if !defined( __CC_ARM )
-  /**
-   * @brief Floating-point vector addition.
-   * @param[in]  pSrcA      points to the first input vector
-   * @param[in]  pSrcB      points to the second input vector
-   * @param[out] pDst       points to the output vector
-   * @param[in]  blockSize  number of samples in each vector
-   */
-  void arm_add_f16(
-  const float16_t * pSrcA,
-  const float16_t * pSrcB,
-        float16_t * pDst,
-        uint32_t blockSize);
-#endif
+

  /**
   * @brief Q7 vector addition.
@ -3671,20 +3507,7 @@ arm_status arm_rfft_fast_init_f32 (
        float32_t * pDst,
        uint32_t blockSize);

-#if !defined( __CC_ARM )
-  /**
-   * @brief Floating-point vector subtraction.
-   * @param[in]  pSrcA      points to the first input vector
-   * @param[in]  pSrcB      points to the second input vector
-   * @param[out] pDst       points to the output vector
-   * @param[in]  blockSize  number of samples in each vector
-   */
-  void arm_sub_f16(
-  const float16_t * pSrcA,
-  const float16_t * pSrcB,
-        float16_t * pDst,
-        uint32_t blockSize);
-#endif
+

  /**
   * @brief Q7 vector subtraction.
@ -3741,20 +3564,7 @@ arm_status arm_rfft_fast_init_f32 (
        float32_t * pDst,
        uint32_t blockSize);

-#if !defined( __CC_ARM )
-    /**
-   * @brief Multiplies a floating-point vector by a scalar.
-   * @param[in]  pSrc       points to the input vector
-   * @param[in]  scale      scale factor to be applied
-   * @param[out] pDst       points to the output vector
-   * @param[in]  blockSize  number of samples in the vector
-   */
-  void arm_scale_f16(
-  const float16_t * pSrc,
-        float16_t scale,
-        float16_t * pDst,
-        uint32_t blockSize);
-#endif
+

  /**
   * @brief Multiplies a Q7 vector by a scalar.
@ -3827,18 +3637,7 @@ arm_status arm_rfft_fast_init_f32 (
        float32_t * pDst,
        uint32_t blockSize);

-#if !defined( __CC_ARM )
-    /**
-   * @brief Floating-point vector absolute value.
-   * @param[in]  pSrc       points to the input buffer
-   * @param[out] pDst       points to the output buffer
-   * @param[in]  blockSize  number of samples in each vector
-   */
-  void arm_abs_f16(
-  const float16_t * pSrc,
-        float16_t * pDst,
-        uint32_t blockSize);
-#endif
+


  /**
@ -3878,20 +3677,7 @@ arm_status arm_rfft_fast_init_f32 (
        uint32_t blockSize,
        float32_t * result);

-#if !defined( __CC_ARM )
-  /**
-   * @brief Dot product of floating-point vectors.
-   * @param[in]  pSrcA      points to the first input vector
-   * @param[in]  pSrcB      points to the second input vector
-   * @param[in]  blockSize  number of samples in each vector
-   * @param[out] result     output result returned here
-   */
-  void arm_dot_prod_f16(
-  const float16_t * pSrcA,
-  const float16_t * pSrcB,
-        uint32_t blockSize,
-        float16_t * result);
-#endif
+

  /**
   * @brief Dot product of Q7 vectors.
@ -3990,20 +3776,7 @@ arm_status arm_rfft_fast_init_f32 (
        float32_t * pDst,
        uint32_t blockSize);

-#if !defined( __CC_ARM )
-  /**
-   * @brief  Adds a constant offset to a floating-point vector.
-   * @param[in]  pSrc       points to the input vector
-   * @param[in]  offset     is the offset to be added
-   * @param[out] pDst       points to the output vector
-   * @param[in]  blockSize  number of samples in the vector
-   */
-  void arm_offset_f16(
-  const float16_t * pSrc,
-        float16_t offset,
-        float16_t * pDst,
-        uint32_t blockSize);
-#endif
+

  /**
   * @brief  Adds a constant offset to a Q7 vector.
@ -4058,18 +3831,7 @@ arm_status arm_rfft_fast_init_f32 (
        float32_t * pDst,
        uint32_t blockSize);

-#if !defined( __CC_ARM )
-  /**
-   * @brief  Negates the elements of a floating-point vector.
-   * @param[in]  pSrc       points to the input vector
-   * @param[out] pDst       points to the output vector
-   * @param[in]  blockSize  number of samples in the vector
-   */
-  void arm_negate_f16(
-  const float16_t * pSrc,
-        float16_t * pDst,
-        uint32_t blockSize);
-#endif
+
  /**
   * @brief  Negates the elements of a Q7 vector.
   * @param[in]  pSrc       points to the input vector
--- a/Include/arm_math_f16.h
+++ b/Include/arm_math_f16.h
@ -0,0 +1,310 @@
+/******************************************************************************
+ * @file     arm_math_f16.h
+ * @brief    Public header file for f16 function of the CMSIS DSP Library
+ * @version  V1.8.1
+ * @date     20. April 2020
+ ******************************************************************************/
+/*
+ * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _ARM_MATH_F16_H
+#define _ARM_MATH_F16_H
+
+#include "arm_math.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+#if !defined( __CC_ARM )
+
+/**
+ * @brief 16-bit floating-point type definition.
+ * This is already defined in arm_mve.h
+ *
+ * This is not fully supported on ARM AC5.
+ */
+
+/*
+
+Check if the type __fp16 is available.
+If it is not available, f16 version of the kernels
+won't be built.
+
+*/
+#if !(__ARM_FEATURE_MVE & 2) && !(__ARM_NEON)
+  #if defined(__ARM_FP16_FORMAT_IEEE) || defined(__ARM_FP16_FORMAT_ALTERNATIVE)
+  typedef __fp16 float16_t;
+  #define ARM_FLOAT16_SUPPORTED
+  #endif
+#else
+  #define ARM_FLOAT16_SUPPORTED
+#endif
+
+#if defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF) /* floating point vector*/
+  
+#if defined(ARM_MATH_MVE_FLOAT16) || defined(ARM_MATH_NEON_FLOAT16)
+  /**
+   * @brief 16-bit floating-point 128-bit vector data type
+   */
+  typedef __ALIGNED(2) float16x8_t f16x8_t;
+
+  /**
+   * @brief 16-bit floating-point 128-bit vector pair data type
+   */
+  typedef float16x8x2_t f16x8x2_t;
+
+  /**
+   * @brief 16-bit floating-point 128-bit vector quadruplet data type
+   */
+  typedef float16x8x4_t f16x8x4_t;
+
+  /**
+   * @brief 16-bit ubiquitous 128-bit vector data type
+   */
+  typedef union _any16x8_t
+  {
+      float16x8_t     f;
+      int16x8_t       i;
+  } any16x8_t;
+#endif
+
+#endif
+
+#if defined(ARM_MATH_NEON)
+ 
+
+#if defined(ARM_MATH_NEON_FLOAT16)
+  /**
+   * @brief 16-bit float 64-bit vector data type.
+   */
+  typedef  __ALIGNED(2) float16x4_t f16x4_t;
+
+  /**
+   * @brief 16-bit floating-point 128-bit vector triplet data type
+   */
+  typedef float16x8x3_t f16x8x3_t;
+
+  /**
+   * @brief 16-bit floating-point 64-bit vector pair data type
+   */
+  typedef float16x4x2_t f16x4x2_t;
+
+  /**
+   * @brief 16-bit floating-point 64-bit vector triplet data type
+   */
+  typedef float16x4x3_t f16x4x3_t;
+
+  /**
+   * @brief 16-bit floating-point 64-bit vector quadruplet data type
+   */
+  typedef float16x4x4_t f16x4x4_t;
+
+  /**
+   * @brief 16-bit ubiquitous 64-bit vector data type
+   */
+  typedef union _any16x4_t
+  {
+      float16x4_t     f;
+      int16x4_t       i;
+  } any16x4_t;
+#endif 
+
+#endif
+
+
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+#define F16_MAX   ((float16_t)FLT_MAX)
+#define F16_MIN   (-(float16_t)FLT_MAX)
+
+#define F16_ABSMAX   ((float16_t)FLT_MAX)
+#define F16_ABSMIN   ((float16_t)0.0)
+
+  /**
+   * @brief Floating-point vector addition.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void arm_add_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+  /**
+   * @brief Floating-point vector subtraction.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void arm_sub_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+    /**
+   * @brief Multiplies a floating-point vector by a scalar.
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  scale      scale factor to be applied
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void arm_scale_f16(
+  const float16_t * pSrc,
+        float16_t scale,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+    /**
+   * @brief Floating-point vector absolute value.
+   * @param[in]  pSrc       points to the input buffer
+   * @param[out] pDst       points to the output buffer
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void arm_abs_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Adds a constant offset to a floating-point vector.
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  offset     is the offset to be added
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void arm_offset_f16(
+  const float16_t * pSrc,
+        float16_t offset,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+  /**
+   * @brief Dot product of floating-point vectors.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[in]  blockSize  number of samples in each vector
+   * @param[out] result     output result returned here
+   */
+  void arm_dot_prod_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        uint32_t blockSize,
+        float16_t * result);
+
+  /**
+   * @brief Floating-point vector multiplication.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void arm_mult_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+  /**
+   * @brief  Negates the elements of a floating-point vector.
+   * @param[in]  pSrc       points to the input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void arm_negate_f16(
+  const float16_t * pSrc,
+        float16_t * pDst,
+        uint32_t blockSize);
+
+  /**
+   * @brief Instance structure for the floating-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+          uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const float16_t *pTwiddle;               /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+          float16_t onebyfftLen;             /**< value of 1/fftLen. */
+  } arm_cfft_radix2_instance_f16;
+
+  /**
+   * @brief Instance structure for the floating-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+          uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const float16_t *pTwiddle;               /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+          float16_t onebyfftLen;             /**< value of 1/fftLen. */
+  } arm_cfft_radix4_instance_f16;
+
+  /**
+   * @brief Instance structure for the floating-point CFFT/CIFFT function.
+   */
+  typedef struct
+  {
+          uint16_t fftLen;                   /**< length of the FFT. */
+    const float16_t *pTwiddle;         /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
+          uint16_t bitRevLength;             /**< bit reversal table length. */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+   const uint32_t *rearranged_twiddle_tab_stride1_arr;        /**< Per stage reordered twiddle pointer (offset 1) */                                                       \
+   const uint32_t *rearranged_twiddle_tab_stride2_arr;        /**< Per stage reordered twiddle pointer (offset 2) */                                                       \
+   const uint32_t *rearranged_twiddle_tab_stride3_arr;        /**< Per stage reordered twiddle pointer (offset 3) */                                                       \
+   const float16_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */                                                                   \
+   const float16_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */                                                                   \
+   const float16_t *rearranged_twiddle_stride3;
+#endif
+  } arm_cfft_instance_f16;
+
+
+  arm_status arm_cfft_init_f16(
+  arm_cfft_instance_f16 * S,
+  uint16_t fftLen);
+
+  void arm_cfft_f16(
+  const arm_cfft_instance_f16 * S,
+        float16_t * p1,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
+  
+#endif /* ARM_FLOAT16_SUPPORTED*/
+#endif /* !defined( __CC_ARM ) */
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /* _ARM_MATH_F16_H */
+
+
--- a/Include/arm_mve_tables.h
+++ b/Include/arm_mve_tables.h
@ -31,7 +31,10 @@

 #include "arm_math.h"

- 
+#ifdef   __cplusplus
+extern "C"
+{
+#endif


 
@ -97,67 +100,6 @@ extern float32_t rearranged_twiddle_stride3_4096_f32[2728];



-#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_16) || defined(ARM_TABLE_TWIDDLECOEF_F16_32)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_16_f16[2];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_16_f16[2];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_16_f16[2];
-extern float16_t rearranged_twiddle_stride1_16_f16[8];
-extern float16_t rearranged_twiddle_stride2_16_f16[8];
-extern float16_t rearranged_twiddle_stride3_16_f16[8];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_64) || defined(ARM_TABLE_TWIDDLECOEF_F16_128)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_64_f16[3];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_64_f16[3];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_64_f16[3];
-extern float16_t rearranged_twiddle_stride1_64_f16[40];
-extern float16_t rearranged_twiddle_stride2_64_f16[40];
-extern float16_t rearranged_twiddle_stride3_64_f16[40];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_256) || defined(ARM_TABLE_TWIDDLECOEF_F16_512)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_256_f16[4];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_256_f16[4];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_256_f16[4];
-extern float16_t rearranged_twiddle_stride1_256_f16[168];
-extern float16_t rearranged_twiddle_stride2_256_f16[168];
-extern float16_t rearranged_twiddle_stride3_256_f16[168];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_1024) || defined(ARM_TABLE_TWIDDLECOEF_F16_2048)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_1024_f16[5];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_1024_f16[5];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_1024_f16[5];
-extern float16_t rearranged_twiddle_stride1_1024_f16[680];
-extern float16_t rearranged_twiddle_stride2_1024_f16[680];
-extern float16_t rearranged_twiddle_stride3_1024_f16[680];
-#endif
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_4096) || defined(ARM_TABLE_TWIDDLECOEF_F16_8192)
-
-extern uint32_t rearranged_twiddle_tab_stride1_arr_4096_f16[6];
-extern uint32_t rearranged_twiddle_tab_stride2_arr_4096_f16[6];
-extern uint32_t rearranged_twiddle_tab_stride3_arr_4096_f16[6];
-extern float16_t rearranged_twiddle_stride1_4096_f16[2728];
-extern float16_t rearranged_twiddle_stride2_4096_f16[2728];
-extern float16_t rearranged_twiddle_stride3_4096_f16[2728];
-#endif
-
-
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
-
-#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
-
-
-
 #if defined(ARM_MATH_MVEI) 

 #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
@ -280,16 +222,9 @@ extern q15_t rearranged_twiddle_stride3_4096_q15[2728];



-#if defined(ARM_MATH_MVEI) 
-
-#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
-
-
-#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
-
-#endif /* defined(ARM_MATH_MVEI) */
-
-
+#ifdef   __cplusplus
+}
+#endif

 #endif /*_ARM_MVE_TABLES_H*/

--- a/Include/arm_mve_tables_f16.h
+++ b/Include/arm_mve_tables_f16.h
@ -0,0 +1,108 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mve_tables_f16.h
+ * Description:  common tables like fft twiddle factors, Bitreverse, reciprocal etc
+ *               used for MVE implementation only
+ *
+ * $Date:        14. April 2020
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ #ifndef _ARM_MVE_TABLES_F16_H
+ #define _ARM_MVE_TABLES_F16_H
+
+ #include "arm_math_f16.h"
+
+#ifdef   __cplusplus
+extern "C"
+{
+#endif
+
+
+ 
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_16) || defined(ARM_TABLE_TWIDDLECOEF_F16_32)
+
+extern uint32_t rearranged_twiddle_tab_stride1_arr_16_f16[2];
+extern uint32_t rearranged_twiddle_tab_stride2_arr_16_f16[2];
+extern uint32_t rearranged_twiddle_tab_stride3_arr_16_f16[2];
+extern float16_t rearranged_twiddle_stride1_16_f16[8];
+extern float16_t rearranged_twiddle_stride2_16_f16[8];
+extern float16_t rearranged_twiddle_stride3_16_f16[8];
+#endif
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_64) || defined(ARM_TABLE_TWIDDLECOEF_F16_128)
+
+extern uint32_t rearranged_twiddle_tab_stride1_arr_64_f16[3];
+extern uint32_t rearranged_twiddle_tab_stride2_arr_64_f16[3];
+extern uint32_t rearranged_twiddle_tab_stride3_arr_64_f16[3];
+extern float16_t rearranged_twiddle_stride1_64_f16[40];
+extern float16_t rearranged_twiddle_stride2_64_f16[40];
+extern float16_t rearranged_twiddle_stride3_64_f16[40];
+#endif
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_256) || defined(ARM_TABLE_TWIDDLECOEF_F16_512)
+
+extern uint32_t rearranged_twiddle_tab_stride1_arr_256_f16[4];
+extern uint32_t rearranged_twiddle_tab_stride2_arr_256_f16[4];
+extern uint32_t rearranged_twiddle_tab_stride3_arr_256_f16[4];
+extern float16_t rearranged_twiddle_stride1_256_f16[168];
+extern float16_t rearranged_twiddle_stride2_256_f16[168];
+extern float16_t rearranged_twiddle_stride3_256_f16[168];
+#endif
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_1024) || defined(ARM_TABLE_TWIDDLECOEF_F16_2048)
+
+extern uint32_t rearranged_twiddle_tab_stride1_arr_1024_f16[5];
+extern uint32_t rearranged_twiddle_tab_stride2_arr_1024_f16[5];
+extern uint32_t rearranged_twiddle_tab_stride3_arr_1024_f16[5];
+extern float16_t rearranged_twiddle_stride1_1024_f16[680];
+extern float16_t rearranged_twiddle_stride2_1024_f16[680];
+extern float16_t rearranged_twiddle_stride3_1024_f16[680];
+#endif
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_4096) || defined(ARM_TABLE_TWIDDLECOEF_F16_8192)
+
+extern uint32_t rearranged_twiddle_tab_stride1_arr_4096_f16[6];
+extern uint32_t rearranged_twiddle_tab_stride2_arr_4096_f16[6];
+extern uint32_t rearranged_twiddle_tab_stride3_arr_4096_f16[6];
+extern float16_t rearranged_twiddle_stride1_4096_f16[2728];
+extern float16_t rearranged_twiddle_stride2_4096_f16[2728];
+extern float16_t rearranged_twiddle_stride3_4096_f16[2728];
+#endif
+
+
+#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
+
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /*_ARM_MVE_TABLES_F16_H*/
+
--- a/Scripts/genMVETwiddleCoefs.py
+++ b/Scripts/genMVETwiddleCoefs.py
@ -9,7 +9,9 @@ import Tools

 parser = argparse.ArgumentParser(description='Generate C arrays')
 parser.add_argument('-f', nargs='?',type = str, default="../Source/CommonTables/arm_mve_tables.c", help="C File path")
+parser.add_argument('-f16', nargs='?',type = str, default="../Source/CommonTables/arm_mve_tables_f16.c", help="C File path")
 parser.add_argument('-he', nargs='?',type = str, default="../Include/arm_mve_tables.h", help="H File path")
+parser.add_argument('-he16', nargs='?',type = str, default="../Include/arm_mve_tables_f16.h", help="H File path")

 args = parser.parse_args()

@ -323,7 +325,7 @@ def reorderTwiddle(theType,conjugate,f,h,n):

 cheader="""/* ----------------------------------------------------------------------
 * Project:      CMSIS DSP Library
- * Title:        arm_mve_tables.c
+ * Title:        arm_mve_tables%s.c
 * Description:  common tables like fft twiddle factors, Bitreverse, reciprocal etc
 *               used for MVE implementation only
 *
@ -351,7 +353,7 @@ cheader="""/* ------------------------------------------------------------------

 """ 

-cifdeMVEF="""#include "arm_math.h"
+cifdeMVEF="""#include "arm_math%s.h"

 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)

@ -379,7 +381,7 @@ cfooterMVEI="""

 hheader="""/* ----------------------------------------------------------------------
 * Project:      CMSIS DSP Library
- * Title:        arm_mve_tables.h
+ * Title:        arm_mve_tables%s.h
 * Description:  common tables like fft twiddle factors, Bitreverse, reciprocal etc
 *               used for MVE implementation only
 *
@ -405,12 +407,15 @@ hheader="""/* ------------------------------------------------------------------
 * limitations under the License.
 */

- #ifndef _ARM_MVE_TABLES_H
- #define _ARM_MVE_TABLES_H
+ #ifndef _ARM_MVE_TABLES_%sH
+ #define _ARM_MVE_TABLES_%sH

- #include "arm_math.h"
+ #include "arm_math%s.h"

- 
+#ifdef   __cplusplus
+extern "C"
+{
+#endif


 """ 
@ -442,18 +447,41 @@ hfooterMVEI="""
 """

 hfooter="""
-#endif /*_ARM_MVE_TABLES_H*/
+#ifdef   __cplusplus
+}
+#endif
+
+#endif /*_ARM_MVE_TABLES_%sH*/
 """

+with open(args.f16,'w') as f:
+  with open(args.he16,'w') as h:
+     print(cheader % "_f16",file=f)
+     print(hheader % ("_f16","F16_","F16_","_f16"),file=h)

+     print("#if defined(ARM_FLOAT16_SUPPORTED)",file=f)
+
+     print(cifdeMVEF % "_f16",file=f)
+     print(hifdefMVEF,file=h)
+     reorderTwiddle(F16,False,f,h,16)
+     reorderTwiddle(F16,False,f,h,64)
+     reorderTwiddle(F16,False,f,h,256)
+     reorderTwiddle(F16,False,f,h,1024)
+     reorderTwiddle(F16,False,f,h,4096)
+     print(cfooterMVEF,file=f)
+     print(hfooterMVEF,file=h)
+
+     print("#endif /* if defined(ARM_FLOAT16_SUPPORTED) */",file=f)
+
+     print(hfooter % "F16_",file=h)

 with open(args.f,'w') as f:
  with open(args.he,'w') as h:
-     print(cheader,file=f)
-     print(hheader,file=h)
+     print(cheader % "",file=f)
+     print(hheader % ("","","",""),file=h)

    
-     print(cifdeMVEF,file=f)
+     print(cifdeMVEF % "",file=f)
     print(hifdefMVEF,file=h)
     reorderTwiddle(F32,False,f,h,16)
     reorderTwiddle(F32,False,f,h,64)
@ -463,16 +491,6 @@ with open(args.f,'w') as f:
     print(cfooterMVEF,file=f)
     print(hfooterMVEF,file=h)

-     print(cifdeMVEF,file=f)
-     print(hifdefMVEF,file=h)
-     reorderTwiddle(F16,False,f,h,16)
-     reorderTwiddle(F16,False,f,h,64)
-     reorderTwiddle(F16,False,f,h,256)
-     reorderTwiddle(F16,False,f,h,1024)
-     reorderTwiddle(F16,False,f,h,4096)
-     print(cfooterMVEF,file=f)
-     print(hfooterMVEF,file=h)
-
     print(cifdeMVEI,file=f)
     print(hifdefMVEI,file=h)
     reorderTwiddle(Q31,True,f,h,16)
@ -493,14 +511,14 @@ with open(args.f,'w') as f:
     print(cfooterMVEI,file=f)
     print(hfooterMVEI,file=h)

-     print(cifdeMVEI,file=f)
-     print(hifdefMVEI,file=h)
-     reorderTwiddle(Q7,True,f,h,16)
-     reorderTwiddle(Q7,True,f,h,64)
-     reorderTwiddle(Q7,True,f,h,256)
-     reorderTwiddle(Q7,True,f,h,1024)
-     reorderTwiddle(Q7,True,f,h,4096)
-     print(cfooterMVEI,file=f)
-     print(hfooterMVEI,file=h)
-
-     print(hfooter,file=h)
+     #print(cifdeMVEI,file=f)
+     #print(hifdefMVEI,file=h)
+     #reorderTwiddle(Q7,True,f,h,16)
+     #reorderTwiddle(Q7,True,f,h,64)
+     #reorderTwiddle(Q7,True,f,h,256)
+     #reorderTwiddle(Q7,True,f,h,1024)
+     #reorderTwiddle(Q7,True,f,h,4096)
+     #print(cfooterMVEI,file=f)
+     #print(hfooterMVEI,file=h)
+
+     print(hfooter % "",file=h)
--- a/Source/BasicMathFunctions/BasicMathFunctionsF16.c
+++ b/Source/BasicMathFunctions/BasicMathFunctionsF16.c
@ -0,0 +1,36 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        BasicMathFunctionsF16.c
+ * Description:  Combination of all basic math function f16 source files.
+ *
+ * $Date:        20. April 2020
+ * $Revision:    V1.1.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_abs_f16.c"
+#include "arm_add_f16.c"
+#include "arm_dot_prod_f16.c"
+#include "arm_mult_f16.c"
+#include "arm_negate_f16.c"
+#include "arm_offset_f16.c"
+#include "arm_scale_f16.c"
+#include "arm_sub_f16.c"
--- a/Source/BasicMathFunctions/arm_abs_f16.c
+++ b/Source/BasicMathFunctions/arm_abs_f16.c
@ -24,7 +24,7 @@
 * limitations under the License.
 */

-#include "arm_math.h"
+#include "arm_math_f16.h"
 #include <math.h>

 /**
@ -108,6 +108,7 @@ void arm_abs_f16(
 }

 #else
+#if defined(ARM_FLOAT16_SUPPORTED)
 void arm_abs_f16(
  const float16_t * pSrc,
        float16_t * pDst,
@ -115,7 +116,7 @@ void arm_abs_f16(
 {
        uint32_t blkCnt;                               /* Loop counter */

-#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
+#if defined(ARM_MATH_NEON_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
    f16x8_t vec1;
    f16x8_t res;

@ -188,6 +189,7 @@ void arm_abs_f16(
  }

 }
+#endif /* defined(ARM_FLOAT16_SUPPORTED */
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 /**
  @} end of BasicAbs group
--- a/Source/BasicMathFunctions/arm_add_f16.c
+++ b/Source/BasicMathFunctions/arm_add_f16.c
@ -24,7 +24,7 @@
 * limitations under the License.
 */

-#include "arm_math.h"
+#include "arm_math_f16.h"

 /**
  @ingroup groupMath
@ -109,6 +109,7 @@ void arm_add_f16(
 }

 #else
+#if defined(ARM_FLOAT16_SUPPORTED)
 void arm_add_f16(
  const float16_t * pSrcA,
  const float16_t * pSrcB,
@ -158,6 +159,7 @@ void arm_add_f16(
  }

 }
+#endif /* defined(ARM_FLOAT16_SUPPORTED) */
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
--- a/Source/BasicMathFunctions/arm_dot_prod_f16.c
+++ b/Source/BasicMathFunctions/arm_dot_prod_f16.c
@ -26,7 +26,7 @@
 * limitations under the License.
 */

-#include "arm_math.h"
+#include "arm_math_f16.h"

 /**
  @ingroup groupMath
@ -118,7 +118,7 @@ void arm_dot_prod_f16(
 }

 #else
-
+#if defined(ARM_FLOAT16_SUPPORTED)
 void arm_dot_prod_f16(
  const float16_t * pSrcA,
  const float16_t * pSrcB,
@ -177,7 +177,7 @@ void arm_dot_prod_f16(
  /* Store result in destination buffer */
  *result = sum;
 }
-
+#endif
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 /**
  @} end of BasicDotProd group
--- a/Source/BasicMathFunctions/arm_mult_f16.c
+++ b/Source/BasicMathFunctions/arm_mult_f16.c
@ -24,7 +24,7 @@
 * limitations under the License.
 */

-#include "arm_math.h"
+#include "arm_math_f16.h"

 /**
  @ingroup groupMath
@ -107,6 +107,7 @@ void arm_mult_f16(
 }

 #else
+#if defined(ARM_FLOAT16_SUPPORTED)
 void arm_mult_f16(
  const float16_t * pSrcA,
  const float16_t * pSrcB,
@ -159,6 +160,7 @@ void arm_mult_f16(
  }

 }
+#endif
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
--- a/Source/BasicMathFunctions/arm_negate_f16.c
+++ b/Source/BasicMathFunctions/arm_negate_f16.c
@ -24,7 +24,7 @@
 * limitations under the License.
 */

-#include "arm_math.h"
+#include "arm_math_f16.h"

 /**
  @ingroup groupMath
@ -103,6 +103,7 @@ void arm_negate_f16(
 }

 #else
+#if defined(ARM_FLOAT16_SUPPORTED)
 void arm_negate_f16(
  const float16_t * pSrc,
        float16_t * pDst,
@ -155,6 +156,7 @@ void arm_negate_f16(
  }

 }
+#endif
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
--- a/Source/BasicMathFunctions/arm_offset_f16.c
+++ b/Source/BasicMathFunctions/arm_offset_f16.c
@ -24,7 +24,7 @@
 * limitations under the License.
 */

-#include "arm_math.h"
+#include "arm_math_f16.h"

 /**
  @ingroup groupMath
@ -106,6 +106,7 @@ void arm_offset_f16(
 }

 #else
+#if defined(ARM_FLOAT16_SUPPORTED)
 void arm_offset_f16(
  const float16_t * pSrc,
        float16_t offset,
@ -159,6 +160,7 @@ void arm_offset_f16(
  }

 }
+#endif
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
--- a/Source/BasicMathFunctions/arm_scale_f16.c
+++ b/Source/BasicMathFunctions/arm_scale_f16.c
@ -26,7 +26,7 @@
 * limitations under the License.
 */

-#include "arm_math.h"
+#include "arm_math_f16.h"

 /**
  @ingroup groupMath
@ -122,6 +122,7 @@ void arm_scale_f16(
 }

 #else
+#if defined(ARM_FLOAT16_SUPPORTED)
 void arm_scale_f16(
  const float16_t *pSrc,
        float16_t scale,
@ -174,6 +175,7 @@ void arm_scale_f16(
  }

 }
+#endif
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
--- a/Source/BasicMathFunctions/arm_sub_f16.c
+++ b/Source/BasicMathFunctions/arm_sub_f16.c
@ -26,7 +26,7 @@
 * limitations under the License.
 */

-#include "arm_math.h"
+#include "arm_math_f16.h"

 /**
  @ingroup groupMath
@ -110,6 +110,7 @@ void arm_sub_f16(
 }

 #else
+#if defined(ARM_FLOAT16_SUPPORTED)
 void arm_sub_f16(
  const float16_t * pSrcA,
  const float16_t * pSrcB,
@ -162,6 +163,7 @@ void arm_sub_f16(
  }

 }
+#endif
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
--- a/Source/CMakeLists.txt
+++ b/Source/CMakeLists.txt
@ -20,7 +20,7 @@ option(MATRIXCHECK "Matrix Checks" OFF)
 option(HELIUM "Helium acceleration (MVEF and MVEI supported)" OFF)
 option(MVEF "MVEF intrinsics supported" OFF)
 option(MVEI "MVEI intrinsics supported" OFF)
-option(FLOAT16 "Float16 intrinsics supported" OFF)
+option(MVEFLOAT16 "Float16 MVE intrinsics supported" OFF)

 # Select which parts of the CMSIS-DSP must be compiled.
 # There are some dependencies between the parts but they are not tracked
--- a/Source/CommonTables/CMakeLists.txt
+++ b/Source/CommonTables/CMakeLists.txt
@ -5,7 +5,7 @@ project(CMSISDSPCommon)
 include(configLib)
 include(configDsp)

-add_library(CMSISDSPCommon STATIC arm_common_tables.c)
+add_library(CMSISDSPCommon STATIC arm_common_tables.c arm_common_tables_f16.c)

 configLib(CMSISDSPCommon ${ROOT})
 configDsp(CMSISDSPCommon ${ROOT})
@ -25,6 +25,7 @@ include(interpol)
 interpol(CMSISDSPCommon)

 target_sources(CMSISDSPCommon PRIVATE arm_const_structs.c)
+target_sources(CMSISDSPCommon PRIVATE arm_const_structs_f16.c)


 ### Includes
@ -36,6 +37,7 @@ endif()

 if (HELIUM OR MVEF)
    target_sources(CMSISDSPCommon PRIVATE "${DSP}/Source/CommonTables/arm_mve_tables.c")
+    target_sources(CMSISDSPCommon PRIVATE "${DSP}/Source/CommonTables/arm_mve_tables_f16.c")
 endif()


--- a/Source/CommonTables/CommonTablesF16.c
+++ b/Source/CommonTables/CommonTablesF16.c
@ -0,0 +1,31 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        CommonTables.c
+ * Description:  Combination of all common table source files.
+ *
+ * $Date:        08. January 2020
+ * $Revision:    V1.1.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_common_tables_f16.c"
+#include "arm_const_structs_f16.c"
+#include "arm_mve_tables_f16.c"
--- a/Source/CommonTables/arm_common_tables.c
+++ b/Source/CommonTables/arm_common_tables.c
--- a/Source/CommonTables/arm_common_tables_f16.c
+++ b/Source/CommonTables/arm_common_tables_f16.c
--- a/Source/CommonTables/arm_const_structs.c
+++ b/Source/CommonTables/arm_const_structs.c
@ -95,54 +95,6 @@ const arm_cfft_instance_f64 arm_cfft_sR_f64_len4096 = {
 #if !defined(ARM_MATH_MVEF) || defined(ARM_MATH_AUTOVECTORIZE)


-/* 
-
-Those structures cannot be used to initialize the MVE version of the FFT F32 instances.
-So they are not compiled when MVE is defined.
-
-For the MVE version, the new arm_cfft_init_f32 must be used.
-
-
-*/
-
-#if !defined(__CC_ARM)
-const arm_cfft_instance_f16 arm_cfft_sR_f16_len16 = {
-  16, twiddleCoefF16_16, armBitRevIndexTable_fixed_16, ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH
-};
-
-const arm_cfft_instance_f16 arm_cfft_sR_f16_len32 = {
-  32, twiddleCoefF16_32, armBitRevIndexTable_fixed_32, ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH
-};
-
-const arm_cfft_instance_f16 arm_cfft_sR_f16_len64 = {
-  64, twiddleCoefF16_64, armBitRevIndexTable_fixed_64, ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH
-};
-
-const arm_cfft_instance_f16 arm_cfft_sR_f16_len128 = {
-  128, twiddleCoefF16_128, armBitRevIndexTable_fixed_128, ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH
-};
-
-const arm_cfft_instance_f16 arm_cfft_sR_f16_len256 = {
-  256, twiddleCoefF16_256, armBitRevIndexTable_fixed_256, ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH
-};
-
-const arm_cfft_instance_f16 arm_cfft_sR_f16_len512 = {
-  512, twiddleCoefF16_512, armBitRevIndexTable_fixed_512, ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH
-};
-
-const arm_cfft_instance_f16 arm_cfft_sR_f16_len1024 = {
-  1024, twiddleCoefF16_1024, armBitRevIndexTable_fixed_1024, ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH
-};
-
-const arm_cfft_instance_f16 arm_cfft_sR_f16_len2048 = {
-  2048, twiddleCoefF16_2048, armBitRevIndexTable_fixed_2048, ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH
-};
-
-const arm_cfft_instance_f16 arm_cfft_sR_f16_len4096 = {
-  4096, twiddleCoefF16_4096, armBitRevIndexTable_fixed_4096, ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH
-};
-#endif 
-
 #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_BITREVIDX_FLT_16))
 const arm_cfft_instance_f32 arm_cfft_sR_f32_len16 = {
  16, twiddleCoef_16, armBitRevIndexTable16, ARMBITREVINDEXTABLE_16_TABLE_LENGTH
--- a/Source/CommonTables/arm_const_structs_f16.c
+++ b/Source/CommonTables/arm_const_structs_f16.c
@ -0,0 +1,101 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_const_structs_f16.c
+ * Description:  Constant structs that are initialized for user convenience.
+ *               For example, some can be given as arguments to the arm_cfft_f32() or arm_rfft_f32() functions.
+ *
+ * $Date:        27. January 2017
+ * $Revision:    V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include "arm_const_structs_f16.h"
+
+
+/*
+ALLOW TABLE is true when config table is enabled and the Tramsform folder is included 
+for compilation.
+*/
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
+
+
+/* Floating-point structs */
+#if !defined(ARM_MATH_MVEF) || defined(ARM_MATH_AUTOVECTORIZE)
+
+
+/* 
+
+Those structures cannot be used to initialize the MVE version of the FFT F32 instances.
+So they are not compiled when MVE is defined.
+
+For the MVE version, the new arm_cfft_init_f32 must be used.
+
+
+*/
+
+#if !defined(__CC_ARM)
+const arm_cfft_instance_f16 arm_cfft_sR_f16_len16 = {
+  16, twiddleCoefF16_16, armBitRevIndexTable_fixed_16, ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH
+};
+
+const arm_cfft_instance_f16 arm_cfft_sR_f16_len32 = {
+  32, twiddleCoefF16_32, armBitRevIndexTable_fixed_32, ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH
+};
+
+const arm_cfft_instance_f16 arm_cfft_sR_f16_len64 = {
+  64, twiddleCoefF16_64, armBitRevIndexTable_fixed_64, ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH
+};
+
+const arm_cfft_instance_f16 arm_cfft_sR_f16_len128 = {
+  128, twiddleCoefF16_128, armBitRevIndexTable_fixed_128, ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH
+};
+
+const arm_cfft_instance_f16 arm_cfft_sR_f16_len256 = {
+  256, twiddleCoefF16_256, armBitRevIndexTable_fixed_256, ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH
+};
+
+const arm_cfft_instance_f16 arm_cfft_sR_f16_len512 = {
+  512, twiddleCoefF16_512, armBitRevIndexTable_fixed_512, ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH
+};
+
+const arm_cfft_instance_f16 arm_cfft_sR_f16_len1024 = {
+  1024, twiddleCoefF16_1024, armBitRevIndexTable_fixed_1024, ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH
+};
+
+const arm_cfft_instance_f16 arm_cfft_sR_f16_len2048 = {
+  2048, twiddleCoefF16_2048, armBitRevIndexTable_fixed_2048, ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH
+};
+
+const arm_cfft_instance_f16 arm_cfft_sR_f16_len4096 = {
+  4096, twiddleCoefF16_4096, armBitRevIndexTable_fixed_4096, ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH
+};
+#endif 
+
+#endif /* !defined(ARM_MATH_MVEF) || defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+#endif
+
+#endif
--- a/Source/CommonTables/arm_mve_tables.c
+++ b/Source/CommonTables/arm_mve_tables.c
--- a/Source/CommonTables/arm_mve_tables_f16.c
+++ b/Source/CommonTables/arm_mve_tables_f16.c
--- a/Source/TransformFunctions/TransformFunctionsF16.c
+++ b/Source/TransformFunctions/TransformFunctionsF16.c
@ -0,0 +1,33 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        TransformFunctionsF16.c
+ * Description:  Combination of all transform function f16 source files.
+ *
+ * $Date:        20. April 2020
+ * $Revision:    V1.0.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_cfft_f16.c"
+#include "arm_cfft_init_f16.c"
+#include "arm_cfft_radix2_f16.c"
+#include "arm_cfft_radix4_f16.c"
+
--- a/Source/TransformFunctions/arm_bitreversal.c
+++ b/Source/TransformFunctions/arm_bitreversal.c
@ -27,6 +27,7 @@
 */

 #include "arm_math.h"
+#include "arm_math_f16.h"
 #include "arm_common_tables.h"

 /*
@ -38,7 +39,7 @@
 * @return none.
 */

-#if !defined(__CC_ARM)
+#if !defined(__CC_ARM) && defined(ARM_FLOAT16_SUPPORTED)
 void arm_bitreversal_f16(
 float16_t * pSrc,
 uint16_t fftSize,
--- a/Source/TransformFunctions/arm_cfft_f16.c
+++ b/Source/TransformFunctions/arm_cfft_f16.c
@ -26,15 +26,15 @@
 * limitations under the License.
 */

-#include "arm_math.h"
-#include "arm_common_tables.h"
+#include "arm_math_f16.h"
+#include "arm_common_tables_f16.h"


 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)

 #include "arm_helium_utils.h"
 #include "arm_vec_fft.h"
-#include "arm_mve_tables.h"
+#include "arm_mve_tables_f16.h"


 static float16_t arm_inverse_fft_length_f16(uint16_t fftLen)
@ -641,6 +641,8 @@ void arm_cfft_f16(

 #else

+#if defined(ARM_FLOAT16_SUPPORTED)
+
 extern void arm_bitreversal_16(
        uint16_t * pSrc,
  const uint16_t bitRevLen,
@ -892,6 +894,7 @@ void arm_cfft_f16(
        }
    }
 }
+#endif /* if defined(ARM_FLOAT16_SUPPORTED) */
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
--- a/Source/TransformFunctions/arm_cfft_init_f16.c
+++ b/Source/TransformFunctions/arm_cfft_init_f16.c
@ -49,15 +49,15 @@
                variables declared in arm_const_structs.h
 */

-#include "arm_math.h"
-#include "arm_common_tables.h"
-#include "arm_const_structs.h"
+#include "arm_math_f16.h"
+#include "arm_common_tables_f16.h"
+#include "arm_const_structs_f16.h"


 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)

 #include "arm_vec_fft.h"
-#include "arm_mve_tables.h"
+#include "arm_mve_tables_f16.h"

 arm_status arm_cfft_radix4by2_rearrange_twiddles_f16(arm_cfft_instance_f16 *S, int twidCoefModifier)
 {
@ -257,6 +257,9 @@ arm_status arm_cfft_init_f16(
        return (status);     
 }
 #else
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
 arm_status arm_cfft_init_f16(
  arm_cfft_instance_f16 * S,
  uint16_t fftLen)
@ -347,6 +350,7 @@ arm_status arm_cfft_init_f16(

        return (status);
 }
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */

 /**
--- a/Source/TransformFunctions/arm_cfft_radix2_f16.c
+++ b/Source/TransformFunctions/arm_cfft_radix2_f16.c
@ -26,7 +26,9 @@
 * limitations under the License.
 */

-#include "arm_math.h"
+#include "arm_math_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)

 void arm_radix2_butterfly_f16(
        float16_t * pSrc,
@ -470,3 +472,4 @@ float16_t onebyfftLen)
 }


+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
--- a/Source/TransformFunctions/arm_cfft_radix4_f16.c
+++ b/Source/TransformFunctions/arm_cfft_radix4_f16.c
@ -26,7 +26,9 @@
 * limitations under the License.
 */

-#include "arm_math.h"
+#include "arm_math_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)

 extern void arm_bitreversal_f16(
        float16_t * pSrc,
@ -1267,4 +1269,4 @@ float16_t onebyfftLen)
 #endif /* #if defined (ARM_MATH_DSP) */
 }

-
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
--- a/Source/configDsp.cmake
+++ b/Source/configDsp.cmake
@ -26,8 +26,8 @@ if (NEON OR NEONEXPERIMENTAL)
    target_include_directories(${project} PRIVATE "${root}/CMSIS/DSP/ComputeLibrary/Include")
 endif()

-if (FLOAT16)
-    target_compile_definitions(${project} PRIVATE ARM_MATH_FLOAT16) 
+if (MVEFLOAT16)
+    target_compile_definitions(${project} PRIVATE ARM_MATH_MVE_FLOAT16) 
 endif()

 if (HELIUM OR MVEF OR SUPPORT)
--- a/Source/fft.cmake
+++ b/Source/fft.cmake
@ -86,6 +86,92 @@ else()
 endif()
 endif()

+#######################################
+#
+# CFFT F16
+#
+
+
+if (CONFIGTABLE AND CFFT_F16_16)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_F16_16)
+if (HELIUM OR MVEF)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FXT_16)
+else()
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FLT_16)
+endif()
+endif()
+
+if (CONFIGTABLE AND CFFT_F16_32)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_F16_32)
+if (HELIUM OR MVEF)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FXT_32)
+else()
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FLT_32)
+endif()
+endif()
+
+if (CONFIGTABLE AND CFFT_F16_64)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_F16_64)
+if (HELIUM OR MVEF)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FXT_64)
+else()
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FLT_64)
+endif()
+endif()
+
+if (CONFIGTABLE AND CFFT_F16_128)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_F16_128)
+if (HELIUM OR MVEF)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FXT_128)
+else()
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FLT_128)
+endif()
+endif()
+
+if (CONFIGTABLE AND CFFT_F16_256)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_F16_256)
+if (HELIUM OR MVEF)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FXT_256)
+else()
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FLT_256)
+endif()
+endif()
+
+if (CONFIGTABLE AND CFFT_F16_512)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_F16_512)
+if (HELIUM OR MVEF)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FXT_512)
+else()
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FLT_512)
+endif()
+endif()
+
+if (CONFIGTABLE AND CFFT_F16_1024)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_F16_1024)
+if (HELIUM OR MVEF)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FXT_1024)
+else()
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FLT_1024)
+endif()
+endif()
+
+if (CONFIGTABLE AND CFFT_F16_2048)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_F16_2048)
+if (HELIUM OR MVEF)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FXT_2048)
+else()
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FLT_2048)
+endif()
+endif()
+
+if (CONFIGTABLE AND CFFT_F16_4096)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_F16_4096)
+if (HELIUM OR MVEF)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FXT_4096)
+else()
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FLT_4096)
+endif()
+endif()

 #######################################
 #
--- a/Testing/CMakeLists.txt
+++ b/Testing/CMakeLists.txt
@ -221,7 +221,7 @@ set(TESTSRC
  Source/Tests/ExampleCategoryQ7.cpp
  )

-if (NOT ARMAC5)
+if ((NOT ARMAC5) AND ((FLOAT16) OR (MVEF) OR (HELIUM) OR (NEON) OR (NEONEXPERIMENTAL)))
 set(TESTSRC16 
  Source/Tests/BasicTestsF16.cpp
  Source/Tests/TransformCF16.cpp
--- a/Testing/FrameworkInclude/Error.h
+++ b/Testing/FrameworkInclude/Error.h
@ -28,10 +28,12 @@
 #ifndef _ASSERT_H_
 #define _ASSERT_H_
 #include "arm_math.h"
+#include "arm_math_f16.h"
 #include <exception>
 #include "Test.h"
 #include "Pattern.h"

+
 #define UNKNOWN_ERROR 1
 #define EQUAL_ERROR 2
 #define NEAR_EQUAL_ERROR 3
@ -79,7 +81,7 @@ to get the line number.
 (SNR functions to finish implementing)

 */
-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 extern void assert_relative_error(unsigned long nb,float16_t &a, float16_t &b, double threshold);
 extern void assert_relative_error(unsigned long nb,AnyPattern<float16_t> &pa, AnyPattern<float16_t> &pb, double threshold);
 #endif
@ -97,7 +99,7 @@ extern void assert_close_error(unsigned long nb,AnyPattern<float64_t> &pref, Any
 extern void assert_close_error(unsigned long nb,float32_t &ref, float32_t &val, double absthreshold, double relthreshold);
 extern void assert_close_error(unsigned long nb,AnyPattern<float32_t> &pref, AnyPattern<float32_t> &pval, double absthreshold, double relthreshold);

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 extern void assert_close_error(unsigned long nb,float16_t &ref, float16_t &val, double absthreshold, double relthreshold);
 extern void assert_close_error(unsigned long nb,AnyPattern<float16_t> &pref, AnyPattern<float16_t> &pval, double absthreshold, double relthreshold);
 #endif
@ -105,7 +107,7 @@ extern void assert_close_error(unsigned long nb,AnyPattern<float16_t> &pref, Any
 extern void assert_snr_error(unsigned long nb,AnyPattern<float64_t> &pa,AnyPattern<float64_t> &pb, float64_t threshold);
 extern void assert_snr_error(unsigned long nb,AnyPattern<float32_t> &pa,AnyPattern<float32_t> &pb, float32_t threshold);

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 extern void assert_snr_error(unsigned long nb,AnyPattern<float16_t> &pa,AnyPattern<float16_t> &pb, float32_t threshold);
 #endif

@ -117,7 +119,7 @@ extern void assert_snr_error(unsigned long nb,AnyPattern<q7_t> &pa,AnyPattern<q7
 extern void assert_snr_error(unsigned long nb,float64_t pa,float64_t pb, float32_t threshold);
 extern void assert_snr_error(unsigned long nb,float32_t pa,float32_t pb, float32_t threshold);

-#if !defined (__CC_ARM)
+#if !defined (__CC_ARM) && defined(ARM_FLOAT16_SUPPORTED)
 extern void assert_snr_error(unsigned long nb,float16_t pa,float16_t pb, float32_t threshold);
 #endif 

@ -132,7 +134,7 @@ extern void assert_false(unsigned long nb,bool cond);
 extern void assert_not_empty(unsigned long nb, AnyPattern<float64_t> &p);
 extern void assert_not_empty(unsigned long nb, AnyPattern<float32_t> &p);

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 extern void assert_not_empty(unsigned long nb, AnyPattern<float16_t> &p);
 #endif 

--- a/Testing/FrameworkInclude/FPGA.h
+++ b/Testing/FrameworkInclude/FPGA.h
@ -29,6 +29,8 @@
 #define _FPGA_H_
 #include <string>
 #include "stdlib.h"
+#include "arm_math.h"
+#include "arm_math_f16.h"

 namespace Client
 {
@ -60,7 +62,7 @@ FPGA driver. Used to read a C array describing how to drive the test.

      virtual void ImportPattern_f64(Testing::PatternID_t,char*,Testing::nbSamples_t nb);
      virtual void ImportPattern_f32(Testing::PatternID_t,char*,Testing::nbSamples_t nb);
-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
      virtual void ImportPattern_f16(Testing::PatternID_t,char*,Testing::nbSamples_t nb);
 #endif   
      virtual void ImportPattern_q63(Testing::PatternID_t,char*,Testing::nbSamples_t nb);
@ -78,7 +80,7 @@ FPGA driver. Used to read a C array describing how to drive the test.

      virtual void DumpPattern_f64(Testing::outputID_t,Testing::nbSamples_t nb, float64_t* data);
      virtual void DumpPattern_f32(Testing::outputID_t,Testing::nbSamples_t nb, float32_t* data);
-#if !defined( __CC_ARM )    
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
      virtual void DumpPattern_f16(Testing::outputID_t,Testing::nbSamples_t nb, float16_t* data);
 #endif     
      virtual void DumpPattern_q63(Testing::outputID_t,Testing::nbSamples_t nb, q63_t* data);
--- a/Testing/FrameworkInclude/Pattern.h
+++ b/Testing/FrameworkInclude/Pattern.h
@ -30,6 +30,8 @@

 #include "Test.h"
 #include "Pattern.h"
+#include "arm_math.h"
+#include "arm_math_f16.h"

 namespace Client {

@ -45,7 +47,7 @@ float64_t *loadPattern(Testing::PatternID_t id, PatternMgr *mgr,Testing::nbSampl
 template <>
 float32_t *loadPattern(Testing::PatternID_t id, PatternMgr *mgr,Testing::nbSamples_t &nb, Testing::nbSamples_t maxSamples);

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 template <>
 float16_t *loadPattern(Testing::PatternID_t id, PatternMgr *mgr,Testing::nbSamples_t &nb, Testing::nbSamples_t maxSamples);
 #endif
@ -83,7 +85,7 @@ float64_t *localPattern(Testing::nbSamples_t nb, PatternMgr *mgr);
 template <>
 float32_t *localPattern(Testing::nbSamples_t nb, PatternMgr *mgr);

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 template <>
 float16_t *localPattern(Testing::nbSamples_t nb, PatternMgr *mgr);
 #endif
@ -111,7 +113,7 @@ uint8_t *localPattern(Testing::nbSamples_t nb, PatternMgr *mgr);

 extern void dumpPattern(Testing::outputID_t id,Testing::nbSamples_t nb,float64_t* data,PatternMgr *mgr);
 extern void dumpPattern(Testing::outputID_t id,Testing::nbSamples_t,float32_t*,PatternMgr *);
-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 extern void dumpPattern(Testing::outputID_t id,Testing::nbSamples_t,float16_t*,PatternMgr *);
 #endif
 extern void dumpPattern(Testing::outputID_t id,Testing::nbSamples_t,q63_t*,PatternMgr *);
--- a/Testing/FrameworkInclude/Semihosting.h
+++ b/Testing/FrameworkInclude/Semihosting.h
@ -30,6 +30,9 @@
 #include <string>
 #include <memory>
 #include <stdio.h>
+#include "arm_math.h"
+#include "arm_math_f16.h"
+

 namespace Client
 {
@ -64,7 +67,7 @@ Semihosting driver. Used to read a text file describing how to drive the test.
      
      virtual void ImportPattern_f64(Testing::PatternID_t,char*,Testing::nbSamples_t nb=0);
      virtual void ImportPattern_f32(Testing::PatternID_t,char*,Testing::nbSamples_t nb=0);
-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
      virtual void ImportPattern_f16(Testing::PatternID_t,char*,Testing::nbSamples_t nb=0);
 #endif
      virtual void ImportPattern_q63(Testing::PatternID_t,char*,Testing::nbSamples_t nb=0);
@ -83,7 +86,7 @@ Semihosting driver. Used to read a text file describing how to drive the test.

      virtual void DumpPattern_f64(Testing::outputID_t,Testing::nbSamples_t nb, float64_t*);
      virtual void DumpPattern_f32(Testing::outputID_t,Testing::nbSamples_t nb, float32_t*);
-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
      virtual void DumpPattern_f16(Testing::outputID_t,Testing::nbSamples_t nb, float16_t*);
 #endif
      virtual void DumpPattern_q63(Testing::outputID_t,Testing::nbSamples_t nb, q63_t*);
--- a/Testing/FrameworkInclude/Test.h
+++ b/Testing/FrameworkInclude/Test.h
@ -33,6 +33,8 @@
 #include <queue>
 #include <cstdio>
 #include "arm_math.h"
+#include "arm_math_f16.h"
+

 // This special value means no limit on the number of samples.
 // It is used when importing patterns and we want to read
@ -298,7 +300,7 @@ API of Memory managers used in the test framework
      */
      virtual void ImportPattern_f64(Testing::PatternID_t,char*,Testing::nbSamples_t nb=MAX_NB_SAMPLES)=0;
      virtual void ImportPattern_f32(Testing::PatternID_t,char*,Testing::nbSamples_t nb=MAX_NB_SAMPLES)=0;
-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
      virtual void ImportPattern_f16(Testing::PatternID_t,char*,Testing::nbSamples_t nb=MAX_NB_SAMPLES)=0;
 #endif
      virtual void ImportPattern_q63(Testing::PatternID_t,char*,Testing::nbSamples_t nb=MAX_NB_SAMPLES)=0;
@ -330,7 +332,7 @@ API of Memory managers used in the test framework
      */
      virtual void DumpPattern_f64(Testing::outputID_t,Testing::nbSamples_t nb, float64_t*)=0;
      virtual void DumpPattern_f32(Testing::outputID_t,Testing::nbSamples_t nb, float32_t*)=0;
-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
      virtual void DumpPattern_f16(Testing::outputID_t,Testing::nbSamples_t nb, float16_t*)=0;
 #endif
      virtual void DumpPattern_q63(Testing::outputID_t,Testing::nbSamples_t nb, q63_t*)=0;
@ -412,7 +414,7 @@ public:
    */
    float64_t *load_f64(Testing::PatternID_t,Testing::nbSamples_t&,Testing::nbSamples_t maxSamples=MAX_NB_SAMPLES);
    float32_t *load_f32(Testing::PatternID_t,Testing::nbSamples_t&,Testing::nbSamples_t maxSamples=MAX_NB_SAMPLES);
-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
    float16_t *load_f16(Testing::PatternID_t,Testing::nbSamples_t&,Testing::nbSamples_t maxSamples=MAX_NB_SAMPLES);
 #endif
    q63_t *load_q63(Testing::PatternID_t,Testing::nbSamples_t&,Testing::nbSamples_t maxSamples=MAX_NB_SAMPLES);
@ -432,7 +434,7 @@ public:
    */
    float64_t *local_f64(Testing::nbSamples_t);
    float32_t *local_f32(Testing::nbSamples_t);
-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
    float16_t *local_f16(Testing::nbSamples_t);
 #endif
    q63_t *local_q63(Testing::nbSamples_t);
@ -449,7 +451,7 @@ public:
    */
    void dumpPattern_f64(Testing::outputID_t,Testing::nbSamples_t,float64_t*);
    void dumpPattern_f32(Testing::outputID_t,Testing::nbSamples_t,float32_t*);
-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
    void dumpPattern_f16(Testing::outputID_t,Testing::nbSamples_t,float16_t*);
 #endif
    
--- a/Testing/FrameworkInclude/Timing.h
+++ b/Testing/FrameworkInclude/Timing.h
@ -3,6 +3,8 @@

 #include "Test.h"
 #include "arm_math.h"
+#include "arm_math_f16.h"
+
 void initCycleMeasurement();
 void cycleMeasurementStart();
 void cycleMeasurementStop();
--- a/Testing/FrameworkSource/Error.cpp
+++ b/Testing/FrameworkSource/Error.cpp
@ -29,6 +29,7 @@
 #include <stdio.h>
 #include "Error.h"
 #include "arm_math.h"
+#include "arm_math_f16.h"

 namespace Client {

@ -68,7 +69,7 @@ void assert_near_equal(unsigned long nb,float32_t pa, float32_t pb, float32_t th
    }
 };

-#if !defined (__CC_ARM)
+#if !defined (__CC_ARM) && defined(ARM_FLOAT16_SUPPORTED)
 template <> 
 void assert_near_equal(unsigned long nb,float16_t pa, float16_t pb, float16_t threshold)
 {
@ -135,7 +136,7 @@ void assert_not_empty(unsigned long nb, AnyPattern<float32_t> &p)
  assert_not_empty_generic(nb,p);
 }

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 void assert_not_empty(unsigned long nb, AnyPattern<float16_t> &p)
 {
  assert_not_empty_generic(nb,p);
@ -217,7 +218,7 @@ void assert_relative_error(unsigned long nb,float32_t &a, float32_t &b, double t
    }
 };

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 void assert_relative_error(unsigned long nb,float16_t &a, float16_t &b, double threshold)
 {
    double rel,delta,average;
@ -301,7 +302,7 @@ void assert_relative_error(unsigned long nb,AnyPattern<float32_t> &pa, AnyPatter
    }
 };

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 void assert_relative_error(unsigned long nb,AnyPattern<float16_t> &pa, AnyPattern<float16_t> &pb, double threshold)
 {
    ASSERT_NOT_EMPTY(pa);
@ -420,7 +421,7 @@ void assert_close_error(unsigned long nb,AnyPattern<float32_t> &pref, AnyPattern
    }
 };

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 void assert_close_error(unsigned long nb,float16_t &ref, float16_t &val, double absthreshold,double relthreshold)
 {
    
@ -528,7 +529,7 @@ float arm_snr_f32(float *pRef, float *pTest, uint32_t buffSize)

 }

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 float arm_snr_f16(float16_t *pRef, float16_t *pTest, uint32_t buffSize)
 {
  float EnergySignal = 0.0, EnergyError = 0.0;
@ -761,7 +762,7 @@ void assert_snr_error(unsigned long nb,float32_t a,float32_t b, float32_t thresh
   }
 }

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 void assert_snr_error(unsigned long nb,AnyPattern<float16_t> &pa,AnyPattern<float16_t> &pb, float32_t threshold)
 {
   float32_t snr;
@ -790,7 +791,7 @@ void assert_snr_error(unsigned long nb,AnyPattern<float16_t> &pa,AnyPattern<floa
 }
 #endif

-#if !defined (__CC_ARM)
+#if !defined (__CC_ARM) && defined(ARM_FLOAT16_SUPPORTED)
 void assert_snr_error(unsigned long nb,float16_t a,float16_t b, float32_t threshold)
 {
   float32_t snr;
--- a/Testing/FrameworkSource/FPGA.cpp
+++ b/Testing/FrameworkSource/FPGA.cpp
@ -37,6 +37,8 @@
 #include <stdio.h>
 #include <string.h>
 #include "Generators.h"
+#include "arm_math.h"
+#include "arm_math_f16.h"

 namespace Client
 {
@ -540,7 +542,7 @@ namespace Client

    }

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
    void FPGA::ImportPattern_f16(Testing::PatternID_t id,char* p,Testing::nbSamples_t nb)
    {
        unsigned long offset,i;
@ -741,7 +743,7 @@ namespace Client
        }
    }

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
    void FPGA::DumpPattern_f16(Testing::outputID_t id,Testing::nbSamples_t nb, float16_t* data)
    {
        std::string fileName = this->getOutputPath(id); 
--- a/Testing/FrameworkSource/Pattern.cpp
+++ b/Testing/FrameworkSource/Pattern.cpp
@ -30,6 +30,8 @@
 */
 #include "Test.h"
 #include "Pattern.h"
+#include "arm_math.h"
+#include "arm_math_f16.h"

 namespace Client {

@ -45,7 +47,7 @@ float32_t *loadPattern(Testing::PatternID_t id, Client::PatternMgr *mgr,Testing:
    return(mgr->load_f32(id,nb,maxSamples));
 }

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 template <> 
 float16_t *loadPattern(Testing::PatternID_t id, Client::PatternMgr *mgr,Testing::nbSamples_t &nb, Testing::nbSamples_t maxSamples)
 {
@ -108,7 +110,7 @@ float32_t *localPattern(Testing::PatternID_t id, Client::PatternMgr *mgr)
    return(mgr->local_f32(id));
 }

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 template <> 
 float16_t *localPattern(Testing::PatternID_t id, Client::PatternMgr *mgr)
 {
@ -168,7 +170,7 @@ void dumpPattern(Testing::outputID_t id,Testing::nbSamples_t nbSamples,float32_t
  mgr->dumpPattern_f32(id,nbSamples,data);
 }

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 void dumpPattern(Testing::outputID_t id,Testing::nbSamples_t nbSamples,float16_t* data,PatternMgr *mgr)
 {
  mgr->dumpPattern_f16(id,nbSamples,data);
--- a/Testing/FrameworkSource/PatternMgr.cpp
+++ b/Testing/FrameworkSource/PatternMgr.cpp
@ -29,6 +29,8 @@
 * limitations under the License.
 */
 #include "Test.h"
+#include "arm_math.h"
+#include "arm_math_f16.h"

 namespace Client
 {
@ -46,7 +48,7 @@ TYPE *PatternMgr::local_##EXT(Testing::nbSamples_t nbSamples) \

 LOCAL(float64_t,f64)
 LOCAL(float32_t,f32)
-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 LOCAL(float16_t,f16)
 #endif
 LOCAL(q63_t,q63)
@ -94,7 +96,7 @@ float32_t *PatternMgr::load_f32(Testing::PatternID_t id,Testing::nbSamples_t& nb
   
 }

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 float16_t *PatternMgr::load_f16(Testing::PatternID_t id,Testing::nbSamples_t& nbSamples,Testing::nbSamples_t maxSamples)
 {
    nbSamples=m_io->GetPatternSize(id);
@ -245,7 +247,7 @@ void PatternMgr::dumpPattern_f32(Testing::outputID_t id,Testing::nbSamples_t nbS
   m_io->DumpPattern_f32(id,nbSamples,data);
 }

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
 void PatternMgr::dumpPattern_f16(Testing::outputID_t id,Testing::nbSamples_t nbSamples,float16_t* data)
 {
   m_io->DumpPattern_f16(id,nbSamples,data);
--- a/Testing/FrameworkSource/Semihosting.cpp
+++ b/Testing/FrameworkSource/Semihosting.cpp
@ -37,6 +37,8 @@
 #include <stdlib.h>
 #include "Generators.h"
 #include "Semihosting.h"
+#include "arm_math.h"
+#include "arm_math_f16.h"


 namespace Client
@ -668,7 +670,7 @@ namespace Client
          
      }

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
      void Semihosting::ImportPattern_f16(Testing::PatternID_t id,char* p,Testing::nbSamples_t nb)
      {
          char tmp[256];
@ -1015,7 +1017,7 @@ namespace Client
            }
      }

-#if !defined( __CC_ARM )
+#if !defined( __CC_ARM ) && defined(ARM_FLOAT16_SUPPORTED)
      void Semihosting::DumpPattern_f16(Testing::outputID_t id,Testing::nbSamples_t nb, float16_t* data)
      {
            std::string fileName = this->getOutputPath(id);
--- a/Testing/Include/Tests/TransformCF16.h
+++ b/Testing/Include/Tests/TransformCF16.h
@ -1,5 +1,6 @@
 #include "Test.h"
 #include "Pattern.h"
+#include "arm_math_f16.h"
 class TransformCF16:public Client::Suite
    {
        public:
--- a/Testing/Source/Tests/BasicTestsF16.cpp
+++ b/Testing/Source/Tests/BasicTestsF16.cpp
@ -2,6 +2,8 @@
 #include <stdio.h>
 #include "Error.h"

+#include "arm_math_f16.h"
+
 #define SNR_THRESHOLD 62
 #define SNR_DOTPROD_THRESHOLD 40

--- a/Testing/Source/Tests/TransformCF16.cpp
+++ b/Testing/Source/Tests/TransformCF16.cpp
@ -1,8 +1,7 @@
+#include "arm_math_f16.h"
 #include "TransformCF16.h"
 #include <stdio.h>
 #include "Error.h"
-#include "arm_math.h"
-#include "arm_const_structs.h"
 #include "Test.h"

 #define SNR_THRESHOLD 58
--- a/Testing/Source/Tests/TransformCF32.cpp
+++ b/Testing/Source/Tests/TransformCF32.cpp
@ -2,7 +2,6 @@
 #include <stdio.h>
 #include "Error.h"
 #include "arm_math.h"
-#include "arm_const_structs.h"
 #include "Test.h"

 #define SNR_THRESHOLD 120
--- a/Testing/Source/Tests/TransformCF64.cpp
+++ b/Testing/Source/Tests/TransformCF64.cpp
@ -2,7 +2,6 @@
 #include <stdio.h>
 #include "Error.h"
 #include "arm_math.h"
-#include "arm_const_structs.h"
 #include "Test.h"

 #define SNR_THRESHOLD 250
--- a/Testing/Source/Tests/TransformCQ15.cpp
+++ b/Testing/Source/Tests/TransformCQ15.cpp
@ -2,7 +2,6 @@
 #include <stdio.h>
 #include "Error.h"
 #include "arm_math.h"
-#include "arm_const_structs.h"
 #include "Test.h"

 #define SNR_THRESHOLD 30
--- a/Testing/Source/Tests/TransformCQ31.cpp
+++ b/Testing/Source/Tests/TransformCQ31.cpp
@ -2,7 +2,6 @@
 #include <stdio.h>
 #include "Error.h"
 #include "arm_math.h"
-#include "arm_const_structs.h"
 #include "Test.h"

 #define SNR_THRESHOLD 90
--- a/Testing/Source/Tests/TransformRF32.cpp
+++ b/Testing/Source/Tests/TransformRF32.cpp
@ -2,7 +2,6 @@
 #include <stdio.h>
 #include "Error.h"
 #include "arm_math.h"
-#include "arm_const_structs.h"
 #include "Test.h"


--- a/Testing/Source/Tests/TransformRF64.cpp
+++ b/Testing/Source/Tests/TransformRF64.cpp
@ -2,7 +2,6 @@
 #include <stdio.h>
 #include "Error.h"
 #include "arm_math.h"
-#include "arm_const_structs.h"
 #include "Test.h"


--- a/Testing/Source/Tests/TransformRQ15.cpp
+++ b/Testing/Source/Tests/TransformRQ15.cpp
@ -2,7 +2,6 @@
 #include <stdio.h>
 #include "Error.h"
 #include "arm_math.h"
-#include "arm_const_structs.h"
 #include "Test.h"


--- a/Testing/Source/Tests/TransformRQ31.cpp
+++ b/Testing/Source/Tests/TransformRQ31.cpp
@ -2,7 +2,6 @@
 #include <stdio.h>
 #include "Error.h"
 #include "arm_math.h"
-#include "arm_const_structs.h"
 #include "Test.h"


--- a/Toolchain/AC5.cmake
+++ b/Toolchain/AC5.cmake
@ -20,7 +20,7 @@ function(compilerSpecificCompileOptions PROJECTNAME ROOT)
  #cmake_print_variables(${PROJECTNAME} DISABLEHALF DISABLEOPTIM)
  # Add support for the type __fp16 even if there is no HW
  # support for it. But support disabled when building boot code
-  if (NOT DISABLEHALF)
+  if ((NOT DISABLEHALF) AND (FLOAT16))
  target_compile_options(${PROJECTNAME} PRIVATE "--fp16_format=alternative")
  endif()
  
--- a/Toolchain/GCC.cmake
+++ b/Toolchain/GCC.cmake
@ -14,7 +14,9 @@ function(compilerSpecificCompileOptions PROJECTNAME ROOT)

  # Add support for the type __fp16 even if there is no HW
  # support for it.
+  if (FLOAT16)
  target_compile_options(${PROJECTNAME} PUBLIC "-mfp16-format=alternative")
+  endif()

  if ((OPTIMIZED) AND (NOT DISABLEOPTIM))
    target_compile_options(${PROJECTNAME} PUBLIC "-O2")
--- a/configCore.cmake
+++ b/configCore.cmake
@ -7,6 +7,7 @@ SET(CORTEXM ON)
 option(HARDFP "Hard floating point" ON)
 option(LITTLEENDIAN "Little endian" ON)
 option(FASTMATHCOMPUTATIONS "Fast Math enabled" OFF)
+option(FLOAT16 "Scalar float16 supported" OFF)

 # More detailed identification for benchmark results
 SET(COREID ARMCM7)