CMSIS-DSP: Added arm_rfft_fast_f16

6 years ago · 8b465544a1
parent 3129c4e1c2
commit 8b465544a1
14 changed files with 6120 additions and 2 deletions
--- a/Include/arm_common_tables_f16.h
+++ b/Include/arm_common_tables_f16.h
@ -75,6 +75,40 @@ extern "C"
  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_4096)
    extern const float16_t twiddleCoefF16_4096[8192];
  #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) */
+  
+ 
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_32)
+  extern const float16_t twiddleCoefF16_rfft_32[32];
+  #endif
+
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_64)
+  extern const float16_t twiddleCoefF16_rfft_64[64];
+  #endif
+
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_128)
+  extern const float16_t twiddleCoefF16_rfft_128[128];
+  #endif
+
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_256)
+  extern const float16_t twiddleCoefF16_rfft_256[256];
+  #endif
+
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_512)
+  extern const float16_t twiddleCoefF16_rfft_512[512];
+  #endif
+
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_1024)
+  extern const float16_t twiddleCoefF16_rfft_1024[1024];
+  #endif
+
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_2048)
+  extern const float16_t twiddleCoefF16_rfft_2048[2048];
+  #endif
+
+  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_4096)
+  extern const float16_t twiddleCoefF16_rfft_4096[4096];
+  #endif
+
  #endif /* ARMAC5 */
    
 #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
--- a/Include/dsp/transform_functions_f16.h
+++ b/Include/dsp/transform_functions_f16.h
@ -99,6 +99,27 @@ extern "C"
        float16_t * p1,
        uint8_t ifftFlag,
        uint8_t bitReverseFlag);
+
+  /**
+   * @brief Instance structure for the floating-point RFFT/RIFFT function.
+   */
+typedef struct
+  {
+          arm_cfft_instance_f16 Sint;      /**< Internal CFFT structure. */
+          uint16_t fftLenRFFT;             /**< length of the real sequence */
+    const float16_t * pTwiddleRFFT;        /**< Twiddle factors real stage  */
+  } arm_rfft_fast_instance_f16 ;
+
+arm_status arm_rfft_fast_init_f16 (
+         arm_rfft_fast_instance_f16 * S,
+         uint16_t fftLen);
+
+
+  void arm_rfft_fast_f16(
+        const arm_rfft_fast_instance_f16 * S,
+        float16_t * p, float16_t * pOut,
+        uint8_t ifftFlag);
+  
 #endif /* defined(ARM_FLOAT16_SUPPORTED)*/

 #ifdef   __cplusplus
--- a/Source/CMakeLists.txt
+++ b/Source/CMakeLists.txt
@ -135,6 +135,15 @@ option(RFFT_F64_512         "rfft f64 512"                      OFF)
 option(RFFT_F64_2048        "rfft f64 2048"                     OFF)
 option(RFFT_F64_8192        "rfft f64 8192"                     OFF)

+option(RFFT_FAST_F16_32     "rfft fast f16 32"                  OFF)
+option(RFFT_FAST_F16_64     "rfft fast f16 64"                  OFF)
+option(RFFT_FAST_F16_128    "rfft fast f16 128"                 OFF)
+option(RFFT_FAST_F16_256    "rfft fast f16 256"                 OFF)
+option(RFFT_FAST_F16_512    "rfft fast f16 512"                 OFF)
+option(RFFT_FAST_F16_1024   "rfft fast f16 1024"                OFF)
+option(RFFT_FAST_F16_2048   "rfft fast f16 2048"                OFF)
+option(RFFT_FAST_F16_4096   "rfft fast f16 4096"                OFF)
+
 option(RFFT_Q31_32          "rfft q31 32"                       OFF)
 option(RFFT_Q31_64          "rfft q31 64"                       OFF)
 option(RFFT_Q31_128         "rfft q31 128"                      OFF)
--- a/Source/CommonTables/arm_common_tables_f16.c
+++ b/Source/CommonTables/arm_common_tables_f16.c
--- a/Source/TransformFunctions/CMakeLists.txt
+++ b/Source/TransformFunctions/CMakeLists.txt
@ -112,7 +112,15 @@ target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_f64.c)
 target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_init_f64.c)
 endif()

-
+if (NOT CONFIGTABLE OR ALLFFT OR RFFT_FAST_F32_32 OR RFFT_FAST_F32_64 OR RFFT_FAST_F32_128
+   OR RFFT_FAST_F32_256 OR RFFT_FAST_F32_512 OR RFFT_FAST_F32_1024 OR RFFT_FAST_F32_2048
+   OR RFFT_FAST_F32_4096 )
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_f16.c)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_init_f16.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_f16.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_f16.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix8_f16.c)
+endif()

 if (NOT CONFIGTABLE OR ALLFFT OR RFFT_F32_128 OR RFFT_F32_512 OR RFFT_F32_2048 OR RFFT_F32_8192)
 target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_f32.c)
--- a/Source/TransformFunctions/TransformFunctionsF16.c
+++ b/Source/TransformFunctions/TransformFunctionsF16.c
@ -30,4 +30,6 @@
 #include "arm_cfft_init_f16.c"
 #include "arm_cfft_radix2_f16.c"
 #include "arm_cfft_radix4_f16.c"
-
+#include "arm_rfft_fast_init_f16.c"
+#include "arm_rfft_fast_f16.c"
+#include "arm_cfft_radix8_f16.c"
--- a/Source/TransformFunctions/arm_cfft_radix8_f16.c
+++ b/Source/TransformFunctions/arm_cfft_radix8_f16.c
@ -0,0 +1,287 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cfft_radix8_f16.c
+ * Description:  Radix-8 Decimation in Frequency CFFT & CIFFT Floating point processing function
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/transform_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+
+/* ----------------------------------------------------------------------
+ * Internal helper function used by the FFTs
+ * -------------------------------------------------------------------- */
+
+/**
+  brief         Core function for the floating-point CFFT butterfly process.
+  param[in,out] pSrc             points to the in-place buffer of floating-point data type.
+  param[in]     fftLen           length of the FFT.
+  param[in]     pCoef            points to the twiddle coefficient buffer.
+  param[in]     twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
+  return        none
+*/
+
+void arm_radix8_butterfly_f16(
+  float16_t * pSrc,
+  uint16_t fftLen,
+  const float16_t * pCoef,
+  uint16_t twidCoefModifier)
+{
+   uint32_t ia1, ia2, ia3, ia4, ia5, ia6, ia7;
+   uint32_t i1, i2, i3, i4, i5, i6, i7, i8;
+   uint32_t id;
+   uint32_t n1, n2, j;
+
+   float16_t r1, r2, r3, r4, r5, r6, r7, r8;
+   float16_t t1, t2;
+   float16_t s1, s2, s3, s4, s5, s6, s7, s8;
+   float16_t p1, p2, p3, p4;
+   float16_t co2, co3, co4, co5, co6, co7, co8;
+   float16_t si2, si3, si4, si5, si6, si7, si8;
+   const float16_t C81 = 0.70710678118f;
+
+   n2 = fftLen;
+
+   do
+   {
+      n1 = n2;
+      n2 = n2 >> 3;
+      i1 = 0;
+
+      do
+      {
+         i2 = i1 + n2;
+         i3 = i2 + n2;
+         i4 = i3 + n2;
+         i5 = i4 + n2;
+         i6 = i5 + n2;
+         i7 = i6 + n2;
+         i8 = i7 + n2;
+         r1 = pSrc[2 * i1] + pSrc[2 * i5];
+         r5 = pSrc[2 * i1] - pSrc[2 * i5];
+         r2 = pSrc[2 * i2] + pSrc[2 * i6];
+         r6 = pSrc[2 * i2] - pSrc[2 * i6];
+         r3 = pSrc[2 * i3] + pSrc[2 * i7];
+         r7 = pSrc[2 * i3] - pSrc[2 * i7];
+         r4 = pSrc[2 * i4] + pSrc[2 * i8];
+         r8 = pSrc[2 * i4] - pSrc[2 * i8];
+         t1 = r1 - r3;
+         r1 = r1 + r3;
+         r3 = r2 - r4;
+         r2 = r2 + r4;
+         pSrc[2 * i1] = r1 + r2;
+         pSrc[2 * i5] = r1 - r2;
+         r1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
+         s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
+         r2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
+         s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
+         s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
+         s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
+         r4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
+         s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
+         t2 = r1 - s3;
+         r1 = r1 + s3;
+         s3 = r2 - r4;
+         r2 = r2 + r4;
+         pSrc[2 * i1 + 1] = r1 + r2;
+         pSrc[2 * i5 + 1] = r1 - r2;
+         pSrc[2 * i3]     = t1 + s3;
+         pSrc[2 * i7]     = t1 - s3;
+         pSrc[2 * i3 + 1] = t2 - r3;
+         pSrc[2 * i7 + 1] = t2 + r3;
+         r1 = (r6 - r8) * C81;
+         r6 = (r6 + r8) * C81;
+         r2 = (s6 - s8) * C81;
+         s6 = (s6 + s8) * C81;
+         t1 = r5 - r1;
+         r5 = r5 + r1;
+         r8 = r7 - r6;
+         r7 = r7 + r6;
+         t2 = s5 - r2;
+         s5 = s5 + r2;
+         s8 = s7 - s6;
+         s7 = s7 + s6;
+         pSrc[2 * i2]     = r5 + s7;
+         pSrc[2 * i8]     = r5 - s7;
+         pSrc[2 * i6]     = t1 + s8;
+         pSrc[2 * i4]     = t1 - s8;
+         pSrc[2 * i2 + 1] = s5 - r7;
+         pSrc[2 * i8 + 1] = s5 + r7;
+         pSrc[2 * i6 + 1] = t2 - r8;
+         pSrc[2 * i4 + 1] = t2 + r8;
+
+         i1 += n1;
+      } while (i1 < fftLen);
+
+      if (n2 < 8)
+         break;
+
+      ia1 = 0;
+      j = 1;
+
+      do
+      {
+         /*  index calculation for the coefficients */
+         id  = ia1 + twidCoefModifier;
+         ia1 = id;
+         ia2 = ia1 + id;
+         ia3 = ia2 + id;
+         ia4 = ia3 + id;
+         ia5 = ia4 + id;
+         ia6 = ia5 + id;
+         ia7 = ia6 + id;
+
+         co2 = pCoef[2 * ia1];
+         co3 = pCoef[2 * ia2];
+         co4 = pCoef[2 * ia3];
+         co5 = pCoef[2 * ia4];
+         co6 = pCoef[2 * ia5];
+         co7 = pCoef[2 * ia6];
+         co8 = pCoef[2 * ia7];
+         si2 = pCoef[2 * ia1 + 1];
+         si3 = pCoef[2 * ia2 + 1];
+         si4 = pCoef[2 * ia3 + 1];
+         si5 = pCoef[2 * ia4 + 1];
+         si6 = pCoef[2 * ia5 + 1];
+         si7 = pCoef[2 * ia6 + 1];
+         si8 = pCoef[2 * ia7 + 1];
+
+         i1 = j;
+
+         do
+         {
+            /*  index calculation for the input */
+            i2 = i1 + n2;
+            i3 = i2 + n2;
+            i4 = i3 + n2;
+            i5 = i4 + n2;
+            i6 = i5 + n2;
+            i7 = i6 + n2;
+            i8 = i7 + n2;
+            r1 = pSrc[2 * i1] + pSrc[2 * i5];
+            r5 = pSrc[2 * i1] - pSrc[2 * i5];
+            r2 = pSrc[2 * i2] + pSrc[2 * i6];
+            r6 = pSrc[2 * i2] - pSrc[2 * i6];
+            r3 = pSrc[2 * i3] + pSrc[2 * i7];
+            r7 = pSrc[2 * i3] - pSrc[2 * i7];
+            r4 = pSrc[2 * i4] + pSrc[2 * i8];
+            r8 = pSrc[2 * i4] - pSrc[2 * i8];
+            t1 = r1 - r3;
+            r1 = r1 + r3;
+            r3 = r2 - r4;
+            r2 = r2 + r4;
+            pSrc[2 * i1] = r1 + r2;
+            r2 = r1 - r2;
+            s1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
+            s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
+            s2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
+            s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
+            s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
+            s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
+            s4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
+            s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
+            t2 = s1 - s3;
+            s1 = s1 + s3;
+            s3 = s2 - s4;
+            s2 = s2 + s4;
+            r1 = t1 + s3;
+            t1 = t1 - s3;
+            pSrc[2 * i1 + 1] = s1 + s2;
+            s2 = s1 - s2;
+            s1 = t2 - r3;
+            t2 = t2 + r3;
+            p1 = co5 * r2;
+            p2 = si5 * s2;
+            p3 = co5 * s2;
+            p4 = si5 * r2;
+            pSrc[2 * i5]     = p1 + p2;
+            pSrc[2 * i5 + 1] = p3 - p4;
+            p1 = co3 * r1;
+            p2 = si3 * s1;
+            p3 = co3 * s1;
+            p4 = si3 * r1;
+            pSrc[2 * i3]     = p1 + p2;
+            pSrc[2 * i3 + 1] = p3 - p4;
+            p1 = co7 * t1;
+            p2 = si7 * t2;
+            p3 = co7 * t2;
+            p4 = si7 * t1;
+            pSrc[2 * i7]     = p1 + p2;
+            pSrc[2 * i7 + 1] = p3 - p4;
+            r1 = (r6 - r8) * C81;
+            r6 = (r6 + r8) * C81;
+            s1 = (s6 - s8) * C81;
+            s6 = (s6 + s8) * C81;
+            t1 = r5 - r1;
+            r5 = r5 + r1;
+            r8 = r7 - r6;
+            r7 = r7 + r6;
+            t2 = s5 - s1;
+            s5 = s5 + s1;
+            s8 = s7 - s6;
+            s7 = s7 + s6;
+            r1 = r5 + s7;
+            r5 = r5 - s7;
+            r6 = t1 + s8;
+            t1 = t1 - s8;
+            s1 = s5 - r7;
+            s5 = s5 + r7;
+            s6 = t2 - r8;
+            t2 = t2 + r8;
+            p1 = co2 * r1;
+            p2 = si2 * s1;
+            p3 = co2 * s1;
+            p4 = si2 * r1;
+            pSrc[2 * i2]     = p1 + p2;
+            pSrc[2 * i2 + 1] = p3 - p4;
+            p1 = co8 * r5;
+            p2 = si8 * s5;
+            p3 = co8 * s5;
+            p4 = si8 * r5;
+            pSrc[2 * i8]     = p1 + p2;
+            pSrc[2 * i8 + 1] = p3 - p4;
+            p1 = co6 * r6;
+            p2 = si6 * s6;
+            p3 = co6 * s6;
+            p4 = si6 * r6;
+            pSrc[2 * i6]     = p1 + p2;
+            pSrc[2 * i6 + 1] = p3 - p4;
+            p1 = co4 * t1;
+            p2 = si4 * t2;
+            p3 = co4 * t2;
+            p4 = si4 * t1;
+            pSrc[2 * i4]     = p1 + p2;
+            pSrc[2 * i4 + 1] = p3 - p4;
+
+            i1 += n1;
+         } while (i1 < fftLen);
+
+         j++;
+      } while (j < n2);
+
+      twidCoefModifier <<= 3;
+   } while (n2 > 7);
+}
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
--- a/Source/TransformFunctions/arm_rfft_fast_f16.c
+++ b/Source/TransformFunctions/arm_rfft_fast_f16.c
@ -0,0 +1,609 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_rfft_fast_f16.c
+ * Description:  RFFT & RIFFT Floating point process function
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/transform_functions_f16.h"
+#include "arm_common_tables_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+void stage_rfft_f16(
+  const arm_rfft_fast_instance_f16 * S,
+        float16_t * p,
+        float16_t * pOut)
+{
+        uint32_t  k;                                /* Loop Counter */
+        float16_t twR, twI;                         /* RFFT Twiddle coefficients */
+  const float16_t * pCoeff = S->pTwiddleRFFT;       /* Points to RFFT Twiddle factors */
+        float16_t *pA = p;                          /* increasing pointer */
+        float16_t *pB = p;                          /* decreasing pointer */
+        float16_t xAR, xAI, xBR, xBI;               /* temporary variables */
+        float16_t t1a, t1b;                         /* temporary variables */
+        float16_t p0, p1, p2, p3;                   /* temporary variables */
+
+        float16x8x2_t tw,xA,xB;
+        float16x8x2_t tmp1, tmp2, res;
+
+        uint16x8_t     vecStridesBkwd;
+
+        vecStridesBkwd = vddupq_u16((uint16_t)14, 2);
+
+
+        int blockCnt;
+
+
+   k = (S->Sint).fftLen - 1;
+
+   /* Pack first and last sample of the frequency domain together */
+
+   xBR = pB[0];
+   xBI = pB[1];
+   xAR = pA[0];
+   xAI = pA[1];
+
+   twR = *pCoeff++ ;
+   twI = *pCoeff++ ;
+
+   // U1 = XA(1) + XB(1); % It is real
+   t1a = xBR + xAR  ;
+
+   // U2 = XB(1) - XA(1); % It is imaginary
+   t1b = xBI + xAI  ;
+
+   // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
+   // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
+   *pOut++ = 0.5f * ( t1a + t1b );
+   *pOut++ = 0.5f * ( t1a - t1b );
+
+   // XA(1) = 1/2*( U1 - imag(U2) +  i*( U1 +imag(U2) ));
+   pB  = p + 2*k - 14;
+   pA += 2;
+
+   blockCnt = k >> 3;
+   while (blockCnt > 0)
+   {
+      /*
+         function X = my_split_rfft(X, ifftFlag)
+         % X is a series of real numbers
+         L  = length(X);
+         XC = X(1:2:end) +i*X(2:2:end);
+         XA = fft(XC);
+         XB = conj(XA([1 end:-1:2]));
+         TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
+         for l = 2:L/2
+            XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
+         end
+         XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
+         X = XA;
+      */
+
+
+      xA = vld2q_f16(pA);
+      pA += 16;
+
+      xB = vld2q_f16(pB);
+
+      xB.val[0] = vldrhq_gather_shifted_offset_f16(pB, vecStridesBkwd);
+      xB.val[1] = vldrhq_gather_shifted_offset_f16(&pB[1], vecStridesBkwd);
+
+      xB.val[1] = vnegq_f16(xB.val[1]);
+      pB -= 16;
+
+
+      tw = vld2q_f16(pCoeff);
+      pCoeff += 16;
+
+
+      tmp1.val[0] = vaddq_f16(xA.val[0],xB.val[0]);
+      tmp1.val[1] = vaddq_f16(xA.val[1],xB.val[1]);
+
+      tmp2.val[0] = vsubq_f16(xB.val[0],xA.val[0]);
+      tmp2.val[1] = vsubq_f16(xB.val[1],xA.val[1]);
+
+      res.val[0] = vmulq(tw.val[0], tmp2.val[0]);
+      res.val[0] = vfmsq(res.val[0],tw.val[1], tmp2.val[1]);
+
+      res.val[1] = vmulq(tw.val[0], tmp2.val[1]);
+      res.val[1] = vfmaq(res.val[1], tw.val[1], tmp2.val[0]);
+
+      res.val[0] = vaddq_f16(res.val[0],tmp1.val[0] );
+      res.val[1] = vaddq_f16(res.val[1],tmp1.val[1] );
+
+      res.val[0] = vmulq_n_f16(res.val[0], 0.5f);
+      res.val[1] = vmulq_n_f16(res.val[1], 0.5f);
+
+
+      vst2q_f16(pOut, res);
+      pOut += 16;
+
+    
+      blockCnt--;
+   } 
+
+   pB += 14;
+   blockCnt = k & 7;
+   while (blockCnt > 0)
+   {
+      /*
+         function X = my_split_rfft(X, ifftFlag)
+         % X is a series of real numbers
+         L  = length(X);
+         XC = X(1:2:end) +i*X(2:2:end);
+         XA = fft(XC);
+         XB = conj(XA([1 end:-1:2]));
+         TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
+         for l = 2:L/2
+            XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
+         end
+         XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
+         X = XA;
+      */
+
+      xBI = pB[1];
+      xBR = pB[0];
+      xAR = pA[0];
+      xAI = pA[1];
+
+      twR = *pCoeff++;
+      twI = *pCoeff++;
+
+      t1a = xBR - xAR ;
+      t1b = xBI + xAI ;
+
+      // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
+      // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
+      p0 = twR * t1a;
+      p1 = twI * t1a;
+      p2 = twR * t1b;
+      p3 = twI * t1b;
+
+      *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
+      *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
+
+      pA += 2;
+      pB -= 2;
+      blockCnt--;
+   }
+}
+
+/* Prepares data for inverse cfft */
+void merge_rfft_f16(
+  const arm_rfft_fast_instance_f16 * S,
+        float16_t * p,
+        float16_t * pOut)
+{
+        uint32_t  k;                                /* Loop Counter */
+        float16_t twR, twI;                         /* RFFT Twiddle coefficients */
+  const float16_t *pCoeff = S->pTwiddleRFFT;        /* Points to RFFT Twiddle factors */
+        float16_t *pA = p;                          /* increasing pointer */
+        float16_t *pB = p;                          /* decreasing pointer */
+        float16_t xAR, xAI, xBR, xBI;               /* temporary variables */
+        float16_t t1a, t1b, r, s, t, u;             /* temporary variables */
+
+        float16x8x2_t tw,xA,xB;
+        float16x8x2_t tmp1, tmp2, res;
+        uint16x8_t     vecStridesBkwd;
+
+        vecStridesBkwd = vddupq_u16((uint16_t)14, 2);
+
+        int blockCnt;
+        
+
+   k = (S->Sint).fftLen - 1;
+
+   xAR = pA[0];
+   xAI = pA[1];
+
+   pCoeff += 2 ;
+
+   *pOut++ = 0.5f * ( xAR + xAI );
+   *pOut++ = 0.5f * ( xAR - xAI );
+
+   pB  =  p + 2*k - 14;
+   pA +=  2    ;
+
+   blockCnt = k >> 3;
+   while (blockCnt > 0)
+   {
+      /* G is half of the frequency complex spectrum */
+      //for k = 2:N
+      //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
+      xA = vld2q_f16(pA);
+      pA += 16;
+
+      xB = vld2q_f16(pB);
+
+      xB.val[0] = vldrhq_gather_shifted_offset_f16(pB, vecStridesBkwd);
+      xB.val[1] = vldrhq_gather_shifted_offset_f16(&pB[1], vecStridesBkwd);
+
+      xB.val[1] = vnegq_f16(xB.val[1]);
+      pB -= 16;
+
+
+      tw = vld2q_f16(pCoeff);
+      tw.val[1] = vnegq_f16(tw.val[1]);
+      pCoeff += 16;
+
+
+      tmp1.val[0] = vaddq_f16(xA.val[0],xB.val[0]);
+      tmp1.val[1] = vaddq_f16(xA.val[1],xB.val[1]);
+
+      tmp2.val[0] = vsubq_f16(xB.val[0],xA.val[0]);
+      tmp2.val[1] = vsubq_f16(xB.val[1],xA.val[1]);
+
+      res.val[0] = vmulq(tw.val[0], tmp2.val[0]);
+      res.val[0] = vfmsq(res.val[0],tw.val[1], tmp2.val[1]);
+
+      res.val[1] = vmulq(tw.val[0], tmp2.val[1]);
+      res.val[1] = vfmaq(res.val[1], tw.val[1], tmp2.val[0]);
+
+      res.val[0] = vaddq_f16(res.val[0],tmp1.val[0] );
+      res.val[1] = vaddq_f16(res.val[1],tmp1.val[1] );
+
+      res.val[0] = vmulq_n_f16(res.val[0], 0.5f);
+      res.val[1] = vmulq_n_f16(res.val[1], 0.5f);
+
+
+      vst2q_f16(pOut, res);
+      pOut += 16;
+
+    
+      blockCnt--;
+   }
+
+   pB += 14;
+   blockCnt = k & 7;
+   while (blockCnt > 0)
+   {
+      /* G is half of the frequency complex spectrum */
+      //for k = 2:N
+      //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
+      xBI =   pB[1]    ;
+      xBR =   pB[0]    ;
+      xAR =  pA[0];
+      xAI =  pA[1];
+
+      twR = *pCoeff++;
+      twI = *pCoeff++;
+
+      t1a = xAR - xBR ;
+      t1b = xAI + xBI ;
+
+      r = twR * t1a;
+      s = twI * t1b;
+      t = twI * t1a;
+      u = twR * t1b;
+
+      // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
+      // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
+      *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
+      *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
+
+      pA += 2;
+      pB -= 2;
+      blockCnt--;
+   }
+
+}
+#else
+void stage_rfft_f16(
+  const arm_rfft_fast_instance_f16 * S,
+        float16_t * p,
+        float16_t * pOut)
+{
+        uint32_t  k;                                /* Loop Counter */
+        float16_t twR, twI;                         /* RFFT Twiddle coefficients */
+  const float16_t * pCoeff = S->pTwiddleRFFT;       /* Points to RFFT Twiddle factors */
+        float16_t *pA = p;                          /* increasing pointer */
+        float16_t *pB = p;                          /* decreasing pointer */
+        float16_t xAR, xAI, xBR, xBI;               /* temporary variables */
+        float16_t t1a, t1b;                         /* temporary variables */
+        float16_t p0, p1, p2, p3;                   /* temporary variables */
+
+
+   k = (S->Sint).fftLen - 1;
+
+   /* Pack first and last sample of the frequency domain together */
+
+   xBR = pB[0];
+   xBI = pB[1];
+   xAR = pA[0];
+   xAI = pA[1];
+
+   twR = *pCoeff++ ;
+   twI = *pCoeff++ ;
+
+
+   // U1 = XA(1) + XB(1); % It is real
+   t1a = xBR + xAR  ;
+
+   // U2 = XB(1) - XA(1); % It is imaginary
+   t1b = xBI + xAI  ;
+
+   // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
+   // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
+   *pOut++ = 0.5f * ( t1a + t1b );
+   *pOut++ = 0.5f * ( t1a - t1b );
+
+   // XA(1) = 1/2*( U1 - imag(U2) +  i*( U1 +imag(U2) ));
+   pB  = p + 2*k;
+   pA += 2;
+
+   do
+   {
+      /*
+         function X = my_split_rfft(X, ifftFlag)
+         % X is a series of real numbers
+         L  = length(X);
+         XC = X(1:2:end) +i*X(2:2:end);
+         XA = fft(XC);
+         XB = conj(XA([1 end:-1:2]));
+         TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
+         for l = 2:L/2
+            XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
+         end
+         XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
+         X = XA;
+      */
+
+      xBI = pB[1];
+      xBR = pB[0];
+      xAR = pA[0];
+      xAI = pA[1];
+
+      twR = *pCoeff++;
+      twI = *pCoeff++;
+
+      t1a = xBR - xAR ;
+      t1b = xBI + xAI ;
+
+      // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
+      // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
+      p0 = twR * t1a;
+      p1 = twI * t1a;
+      p2 = twR * t1b;
+      p3 = twI * t1b;
+
+      *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
+      *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
+
+
+      pA += 2;
+      pB -= 2;
+      k--;
+   } while (k > 0U);
+}
+
+/* Prepares data for inverse cfft */
+void merge_rfft_f16(
+  const arm_rfft_fast_instance_f16 * S,
+        float16_t * p,
+        float16_t * pOut)
+{
+        uint32_t  k;                                /* Loop Counter */
+        float16_t twR, twI;                         /* RFFT Twiddle coefficients */
+  const float16_t *pCoeff = S->pTwiddleRFFT;        /* Points to RFFT Twiddle factors */
+        float16_t *pA = p;                          /* increasing pointer */
+        float16_t *pB = p;                          /* decreasing pointer */
+        float16_t xAR, xAI, xBR, xBI;               /* temporary variables */
+        float16_t t1a, t1b, r, s, t, u;             /* temporary variables */
+
+   k = (S->Sint).fftLen - 1;
+
+   xAR = pA[0];
+   xAI = pA[1];
+
+   pCoeff += 2 ;
+
+   *pOut++ = 0.5f * ( xAR + xAI );
+   *pOut++ = 0.5f * ( xAR - xAI );
+
+   pB  =  p + 2*k ;
+   pA +=  2	   ;
+
+   while (k > 0U)
+   {
+      /* G is half of the frequency complex spectrum */
+      //for k = 2:N
+      //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
+      xBI =   pB[1]    ;
+      xBR =   pB[0]    ;
+      xAR =  pA[0];
+      xAI =  pA[1];
+
+      twR = *pCoeff++;
+      twI = *pCoeff++;
+
+      t1a = xAR - xBR ;
+      t1b = xAI + xBI ;
+
+      r = twR * t1a;
+      s = twI * t1b;
+      t = twI * t1a;
+      u = twR * t1b;
+
+      // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
+      // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
+      *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
+      *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
+
+      pA += 2;
+      pB -= 2;
+      k--;
+   }
+
+}
+
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+  @ingroup groupTransforms
+*/
+
+/**
+  @defgroup RealFFT Real FFT Functions
+ 
+  @par
+                   The CMSIS DSP library includes specialized algorithms for computing the
+                   FFT of real data sequences.  The FFT is defined over complex data but
+                   in many applications the input is real.  Real FFT algorithms take advantage
+                   of the symmetry properties of the FFT and have a speed advantage over complex
+                   algorithms of the same length.
+  @par
+                   The Fast RFFT algorith relays on the mixed radix CFFT that save processor usage.
+  @par
+                   The real length N forward FFT of a sequence is computed using the steps shown below.
+  @par
+                   \image html RFFT.gif "Real Fast Fourier Transform"
+  @par
+                   The real sequence is initially treated as if it were complex to perform a CFFT.
+                   Later, a processing stage reshapes the data to obtain half of the frequency spectrum
+                   in complex format. Except the first complex number that contains the two real numbers
+                   X[0] and X[N/2] all the data is complex. In other words, the first complex sample
+                   contains two real values packed.
+  @par
+                   The input for the inverse RFFT should keep the same format as the output of the
+                   forward RFFT. A first processing stage pre-process the data to later perform an
+                   inverse CFFT.
+  @par
+                   \image html RIFFT.gif "Real Inverse Fast Fourier Transform"
+  @par
+                   The algorithms for floating-point, Q15, and Q31 data are slightly different
+                   and we describe each algorithm in turn.
+  @par           Floating-point
+                   The main functions are \ref arm_rfft_fast_f16() and \ref arm_rfft_fast_init_f16().
+                   The older functions \ref arm_rfft_f16() and \ref arm_rfft_init_f16() have been deprecated
+                   but are still documented.
+  @par
+                   The FFT of a real N-point sequence has even symmetry in the frequency domain. 
+                   The second half of the data equals the conjugate of the first half flipped in frequency. 
+                   Looking at the data, we see that we can uniquely represent the FFT using only N/2 complex numbers.
+                   These are packed into the output array in alternating real and imaginary components:
+  @par
+                   X = { real[0], imag[0], real[1], imag[1], real[2], imag[2] ...
+                   real[(N/2)-1], imag[(N/2)-1 }
+  @par
+                   It happens that the first complex number (real[0], imag[0]) is actually
+                   all real. real[0] represents the DC offset, and imag[0] should be 0.
+                   (real[1], imag[1]) is the fundamental frequency, (real[2], imag[2]) is
+                   the first harmonic and so on.
+  @par
+                   The real FFT functions pack the frequency domain data in this fashion.
+                   The forward transform outputs the data in this form and the inverse
+                   transform expects input data in this form. The function always performs
+                   the needed bitreversal so that the input and output data is always in
+                   normal order. The functions support lengths of [32, 64, 128, ..., 4096]
+                   samples.
+  @par           Q15 and Q31
+                   The real algorithms are defined in a similar manner and utilize N/2 complex
+                   transforms behind the scenes.
+  @par
+                   The complex transforms used internally include scaling to prevent fixed-point
+                   overflows.  The overall scaling equals 1/(fftLen/2).
+                   Due to the use of complex transform internally, the source buffer is
+                   modified by the rfft.
+  @par
+                   A separate instance structure must be defined for each transform used but
+                   twiddle factor and bit reversal tables can be reused.
+  @par
+                   There is also an associated initialization function for each data type.
+                   The initialization function performs the following operations:
+                    - Sets the values of the internal structure fields.
+                    - Initializes twiddle factor table and bit reversal table pointers.
+                    - Initializes the internal complex FFT data structure.
+  @par
+                   Use of the initialization function is optional **except for MVE versions where it is mandatory**.
+                   If you don't use the initialization functions, then the structures should be initialized with code
+                   similar to the one below:
+  <pre>
+      arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
+      arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
+  </pre>
+                   where <code>fftLenReal</code> is the length of the real transform;
+                   <code>fftLenBy2</code> length of  the internal complex transform (fftLenReal/2).
+                   <code>ifftFlagR</code> Selects forward (=0) or inverse (=1) transform.
+                   <code>bitReverseFlagR</code> Selects bit reversed output (=0) or normal order
+                   output (=1).
+                   <code>twidCoefRModifier</code> stride modifier for the twiddle factor table.
+                   The value is based on the FFT length;
+                   <code>pTwiddleAReal</code>points to the A array of twiddle coefficients;
+                   <code>pTwiddleBReal</code>points to the B array of twiddle coefficients;
+                   <code>pCfft</code> points to the CFFT Instance structure. The CFFT structure
+                   must also be initialized.  
+@par
+                   Note that with MVE versions you can't initialize instance structures directly and **must
+                   use the initialization function**.
+ */
+
+/**
+  @addtogroup RealFFT
+  @{
+*/
+
+/**
+  @brief         Processing function for the floating-point real FFT.
+  @param[in]     S         points to an arm_rfft_fast_instance_f16 structure
+  @param[in]     p         points to input buffer (Source buffer is modified by this function.)
+  @param[in]     pOut      points to output buffer
+  @param[in]     ifftFlag
+                   - value = 0: RFFT
+                   - value = 1: RIFFT
+  @return        none
+*/
+
+void arm_rfft_fast_f16(
+  const arm_rfft_fast_instance_f16 * S,
+  float16_t * p,
+  float16_t * pOut,
+  uint8_t ifftFlag)
+{
+   const arm_cfft_instance_f16 * Sint = &(S->Sint);
+
+   /* Calculation of Real FFT */
+   if (ifftFlag)
+   {
+      /*  Real FFT compression */
+      merge_rfft_f16(S, p, pOut);
+      /* Complex radix-4 IFFT process */
+      arm_cfft_f16( Sint, pOut, ifftFlag, 1);
+   }
+   else
+   {
+      /* Calculation of RFFT of input */
+      arm_cfft_f16( Sint, p, ifftFlag, 1);
+
+      /*  Real FFT extraction */
+      stage_rfft_f16(S, p, pOut);
+   }
+}
+
+/**
+* @} end of RealFFT group
+*/
+
+#endif /*  #if defined(ARM_FLOAT16_SUPPORTED) */
--- a/Source/TransformFunctions/arm_rfft_fast_init_f16.c
+++ b/Source/TransformFunctions/arm_rfft_fast_init_f16.c
@ -0,0 +1,355 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_rfft_fast_init_f16.c
+ * Description:  Split Radix Decimation in Frequency CFFT Floating point processing function
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/transform_functions_f16.h"
+#include "arm_common_tables_f16.h"
+#include "arm_const_structs_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+/**
+  @ingroup groupTransforms
+ */
+
+/**
+  @addtogroup RealFFT
+  @{
+ */
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_16) && defined(ARM_TABLE_BITREVIDX_FLT_16) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_32))
+
+/**
+  @private
+  @brief         Initialization function for the 32pt floating-point real FFT.
+  @param[in,out] S  points to an arm_rfft_fast_instance_f16 structure
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+static arm_status arm_rfft_32_fast_init_f16( arm_rfft_fast_instance_f16 * S ) {
+
+  arm_status status;
+
+  if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+  status=arm_cfft_init_f16(&(S->Sint),16);
+  if (status != ARM_MATH_SUCCESS)
+  {
+    return(status);
+  }
+
+  S->fftLenRFFT = 32U;
+  S->pTwiddleRFFT    = (float16_t *) twiddleCoefF16_rfft_32;
+
+  return ARM_MATH_SUCCESS;
+}
+#endif 
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_32) && defined(ARM_TABLE_BITREVIDX_FLT_32) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_64))
+
+/**
+  @private
+  @brief         Initialization function for the 64pt floating-point real FFT.
+  @param[in,out] S  points to an arm_rfft_fast_instance_f16 structure
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+static arm_status arm_rfft_64_fast_init_f16( arm_rfft_fast_instance_f16 * S ) {
+
+  arm_status status;
+
+  if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+  status=arm_cfft_init_f16(&(S->Sint),32);
+  if (status != ARM_MATH_SUCCESS)
+  {
+    return(status);
+  }
+  S->fftLenRFFT = 64U;
+
+  S->pTwiddleRFFT    = (float16_t *) twiddleCoefF16_rfft_64;
+
+  return ARM_MATH_SUCCESS;
+}
+#endif 
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_64) && defined(ARM_TABLE_BITREVIDX_FLT_64) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_128))
+
+/**
+  @private
+  @brief         Initialization function for the 128pt floating-point real FFT.
+  @param[in,out] S  points to an arm_rfft_fast_instance_f16 structure
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+static arm_status arm_rfft_128_fast_init_f16( arm_rfft_fast_instance_f16 * S ) {
+
+  arm_status status;
+
+  if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+  status=arm_cfft_init_f16(&(S->Sint),64);
+  if (status != ARM_MATH_SUCCESS)
+  {
+    return(status);
+  }
+  S->fftLenRFFT = 128;
+
+  S->pTwiddleRFFT    = (float16_t *) twiddleCoefF16_rfft_128;
+
+  return ARM_MATH_SUCCESS;
+}
+#endif 
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_128) && defined(ARM_TABLE_BITREVIDX_FLT_128) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_256))
+
+/**
+  @private
+  @brief         Initialization function for the 256pt floating-point real FFT.
+  @param[in,out] S  points to an arm_rfft_fast_instance_f16 structure
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+*/
+
+static arm_status arm_rfft_256_fast_init_f16( arm_rfft_fast_instance_f16 * S ) {
+
+  arm_status status;
+
+  if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+  status=arm_cfft_init_f16(&(S->Sint),128);
+  if (status != ARM_MATH_SUCCESS)
+  {
+    return(status);
+  }
+  S->fftLenRFFT = 256U;
+
+  S->pTwiddleRFFT    = (float16_t *) twiddleCoefF16_rfft_256;
+
+  return ARM_MATH_SUCCESS;
+}
+#endif 
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_256) && defined(ARM_TABLE_BITREVIDX_FLT_256) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_512))
+
+/**
+  @private
+  @brief         Initialization function for the 512pt floating-point real FFT.
+  @param[in,out] S  points to an arm_rfft_fast_instance_f16 structure
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+static arm_status arm_rfft_512_fast_init_f16( arm_rfft_fast_instance_f16 * S ) {
+
+  arm_status status;
+
+  if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+  status=arm_cfft_init_f16(&(S->Sint),256);
+  if (status != ARM_MATH_SUCCESS)
+  {
+    return(status);
+  }
+  S->fftLenRFFT = 512U;
+
+  S->pTwiddleRFFT    = (float16_t *) twiddleCoefF16_rfft_512;
+
+  return ARM_MATH_SUCCESS;
+}
+#endif 
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_512) && defined(ARM_TABLE_BITREVIDX_FLT_512) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_1024))
+/**
+  @private
+  @brief         Initialization function for the 1024pt floating-point real FFT.
+  @param[in,out] S  points to an arm_rfft_fast_instance_f16 structure
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+static arm_status arm_rfft_1024_fast_init_f16( arm_rfft_fast_instance_f16 * S ) {
+
+  arm_status status;
+
+  if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+  status=arm_cfft_init_f16(&(S->Sint),512);
+  if (status != ARM_MATH_SUCCESS)
+  {
+    return(status);
+  }
+  S->fftLenRFFT = 1024U;
+
+  S->pTwiddleRFFT    = (float16_t *) twiddleCoefF16_rfft_1024;
+
+  return ARM_MATH_SUCCESS;
+}
+#endif
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_2048))
+/**
+  @private
+  @brief         Initialization function for the 2048pt floating-point real FFT.
+  @param[in,out] S  points to an arm_rfft_fast_instance_f16 structure
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+static arm_status arm_rfft_2048_fast_init_f16( arm_rfft_fast_instance_f16 * S ) {
+
+  arm_status status;
+
+  if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+  status=arm_cfft_init_f16(&(S->Sint),1024);
+  if (status != ARM_MATH_SUCCESS)
+  {
+    return(status);
+  }
+  S->fftLenRFFT = 2048U;
+
+  S->pTwiddleRFFT    = (float16_t *) twiddleCoefF16_rfft_2048;
+
+  return ARM_MATH_SUCCESS;
+}
+#endif
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_4096))
+/**
+  @private
+* @brief         Initialization function for the 4096pt floating-point real FFT.
+* @param[in,out] S  points to an arm_rfft_fast_instance_f16 structure
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+static arm_status arm_rfft_4096_fast_init_f16( arm_rfft_fast_instance_f16 * S ) {
+
+  arm_status status;
+
+  if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+  status=arm_cfft_init_f16(&(S->Sint),2048);
+  if (status != ARM_MATH_SUCCESS)
+  {
+    return(status);
+  }
+  S->fftLenRFFT = 4096U;
+
+  S->pTwiddleRFFT    = (float16_t *) twiddleCoefF16_rfft_4096;
+
+  return ARM_MATH_SUCCESS;
+}
+#endif 
+
+/**
+  @brief         Initialization function for the floating-point real FFT.
+  @param[in,out] S       points to an arm_rfft_fast_instance_f16 structure
+  @param[in]     fftLen  length of the Real Sequence
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
+
+  @par           Description
+                   The parameter <code>fftLen</code> specifies the length of RFFT/CIFFT process.
+                   Supported FFT Lengths are 32, 64, 128, 256, 512, 1024, 2048, 4096.
+  @par
+                   This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
+ */
+
+arm_status arm_rfft_fast_init_f16(
+  arm_rfft_fast_instance_f16 * S,
+  uint16_t fftLen)
+{
+  typedef arm_status(*fft_init_ptr)( arm_rfft_fast_instance_f16 *);
+  fft_init_ptr fptr = 0x0;
+
+  switch (fftLen)
+  {
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_4096))
+  case 4096U:
+    fptr = arm_rfft_4096_fast_init_f16;
+    break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_2048))
+  case 2048U:
+    fptr = arm_rfft_2048_fast_init_f16;
+    break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_512) && defined(ARM_TABLE_BITREVIDX_FLT_512) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_1024))
+  case 1024U:
+    fptr = arm_rfft_1024_fast_init_f16;
+    break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_256) && defined(ARM_TABLE_BITREVIDX_FLT_256) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_512))
+  case 512U:
+    fptr = arm_rfft_512_fast_init_f16;
+    break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_128) && defined(ARM_TABLE_BITREVIDX_FLT_128) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_256))
+  case 256U:
+    fptr = arm_rfft_256_fast_init_f16;
+    break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_64) && defined(ARM_TABLE_BITREVIDX_FLT_64) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_128))
+  case 128U:
+    fptr = arm_rfft_128_fast_init_f16;
+    break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_32) && defined(ARM_TABLE_BITREVIDX_FLT_32) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_64))
+  case 64U:
+    fptr = arm_rfft_64_fast_init_f16;
+    break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_16) && defined(ARM_TABLE_BITREVIDX_FLT_16) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F16_32))
+  case 32U:
+    fptr = arm_rfft_32_fast_init_f16;
+    break;
+#endif
+  default:
+    return ARM_MATH_ARGUMENT_ERROR;
+  }
+
+  if( ! fptr ) return ARM_MATH_ARGUMENT_ERROR;
+  return fptr( S );
+
+}
+
+/**
+  @} end of RealFFT group
+ */
+
+#endif /*  #if defined(ARM_FLOAT16_SUPPORTED) */
--- a/Source/fft.cmake
+++ b/Source/fft.cmake
@ -438,6 +438,59 @@ if (CONFIGTABLE AND RFFT_FAST_F32_4096)
    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_RFFT_F32_4096)
 endif()

+#######################################
+#
+# RFFT FAST F16
+#
+
+if (CONFIGTABLE AND RFFT_FAST_F16_32)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_F16_16)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FLT_16)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_RFFT_F16_32)
+endif()
+
+if (CONFIGTABLE AND RFFT_FAST_F16_64)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_F16_32)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FLT_32)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_RFFT_F16_64)
+endif()
+
+if (CONFIGTABLE AND RFFT_FAST_F16_128)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_F16_64)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FLT_64)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_RFFT_F16_128)
+endif()
+
+if (CONFIGTABLE AND RFFT_FAST_F16_256)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_F16_128)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FLT_128)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_RFFT_F16_256)
+endif()
+
+if (CONFIGTABLE AND RFFT_FAST_F16_512)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_F16_256)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FLT_256)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_RFFT_F16_512)
+endif()
+
+if (CONFIGTABLE AND RFFT_FAST_F16_1024)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_F16_512)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FLT_512)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_RFFT_F16_1024)
+endif()
+
+if (CONFIGTABLE AND RFFT_FAST_F16_2048)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_F16_1024)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FLT_1024)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_RFFT_F16_2048)
+endif()
+
+if (CONFIGTABLE AND RFFT_FAST_F16_4096)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_F16_2048)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_BITREVIDX_FLT_2048)
+    target_compile_definitions(${PROJECT} PUBLIC ARM_TABLE_TWIDDLECOEF_RFFT_F16_4096)
+endif()
+
 #######################################
 #
 # RFFT F32
--- a/Testing/CMakeLists.txt
+++ b/Testing/CMakeLists.txt
@ -328,6 +328,7 @@ if ((NOT ARMAC5) AND (FLOAT16TESTS) AND ((FLOAT16) OR (MVEF) OR (HELIUM) OR (NEO
 set(TESTSRC16 
  Source/Tests/BasicTestsF16.cpp
  Source/Tests/TransformCF16.cpp
+  Source/Tests/TransformRF16.cpp
  )
 endif()
 endif() 
--- a/Testing/Include/Tests/TransformRF16.h
+++ b/Testing/Include/Tests/TransformRF16.h
@ -0,0 +1,25 @@
+#include "Test.h"
+#include "Pattern.h"
+
+#include "dsp/transform_functions_f16.h"
+
+class TransformRF16:public Client::Suite
+    {
+        public:
+            TransformRF16(Testing::testID_t id);
+            virtual void setUp(Testing::testID_t,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr);
+            virtual void tearDown(Testing::testID_t,Client::PatternMgr *mgr);
+        private:
+            #include "TransformRF16_decl.h"
+            
+            Client::Pattern<float16_t> input;
+            Client::LocalPattern<float16_t> outputfft;
+            Client::LocalPattern<float16_t> inputchanged;
+
+            Client::RefPattern<float16_t> ref;
+
+            arm_rfft_fast_instance_f16 instRfftF16;
+
+            int ifft;
+            
+    };
--- a/Testing/Source/Tests/TransformRF16.cpp
+++ b/Testing/Source/Tests/TransformRF16.cpp
@ -0,0 +1,473 @@
+#include "TransformRF16.h"
+#include <stdio.h>
+#include "Error.h"
+#include "Test.h"
+
+
+#define SNR_THRESHOLD 58
+
+
+
+    void TransformRF16::test_rfft_f16()
+    {
+       float16_t *inp = input.ptr();
+
+       float16_t *tmp = inputchanged.ptr();
+
+       float16_t *outp = outputfft.ptr();
+
+       memcpy(tmp,inp,sizeof(float16_t)*input.nbSamples());
+   
+        arm_rfft_fast_f16(
+             &this->instRfftF16,
+             tmp,
+             outp,
+             this->ifft);
+          
+        ASSERT_SNR(outputfft,ref,(float16_t)SNR_THRESHOLD);
+        ASSERT_EMPTY_TAIL(outputfft);
+        
+    } 
+
+  
+    void TransformRF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
+    {
+
+
+       (void)paramsArgs;
+
+       switch(id)
+       {
+
+          case TransformRF16::TEST_RFFT_F16_1:
+
+            input.reload(TransformRF16::INPUTS_RFFT_NOISY_32_F16_ID,mgr);
+            ref.reload(  TransformRF16::REF_RFFT_NOISY_32_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,32);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=0;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_17:
+
+            input.reload(TransformRF16::INPUTS_RIFFT_NOISY_32_F16_ID,mgr);
+            ref.reload(  TransformRF16::INPUTS_RFFT_NOISY_32_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,32);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=1;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_2:
+
+            input.reload(TransformRF16::INPUTS_RFFT_NOISY_64_F16_ID,mgr);
+            ref.reload(  TransformRF16::REF_RFFT_NOISY_64_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,64);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=0;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_18:
+
+            input.reload(TransformRF16::INPUTS_RIFFT_NOISY_64_F16_ID,mgr);
+            ref.reload(  TransformRF16::INPUTS_RFFT_NOISY_64_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,64);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=1;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_3:
+
+            input.reload(TransformRF16::INPUTS_RFFT_NOISY_128_F16_ID,mgr);
+            ref.reload(  TransformRF16::REF_RFFT_NOISY_128_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,128);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=0;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_19:
+
+            input.reload(TransformRF16::INPUTS_RIFFT_NOISY_128_F16_ID,mgr);
+            ref.reload(  TransformRF16::INPUTS_RFFT_NOISY_128_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,128);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=1;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_4:
+
+            input.reload(TransformRF16::INPUTS_RFFT_NOISY_256_F16_ID,mgr);
+            ref.reload(  TransformRF16::REF_RFFT_NOISY_256_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,256);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=0;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_20:
+
+            input.reload(TransformRF16::INPUTS_RIFFT_NOISY_256_F16_ID,mgr);
+            ref.reload(  TransformRF16::INPUTS_RFFT_NOISY_256_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,256);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=1;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_5:
+
+            input.reload(TransformRF16::INPUTS_RFFT_NOISY_512_F16_ID,mgr);
+            ref.reload(  TransformRF16::REF_RFFT_NOISY_512_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,512);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=0;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_21:
+
+            input.reload(TransformRF16::INPUTS_RIFFT_NOISY_512_F16_ID,mgr);
+            ref.reload(  TransformRF16::INPUTS_RFFT_NOISY_512_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,512);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=1;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_6:
+
+            input.reload(TransformRF16::INPUTS_RFFT_NOISY_1024_F16_ID,mgr);
+            ref.reload(  TransformRF16::REF_RFFT_NOISY_1024_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,1024);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=0;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_22:
+
+            input.reload(TransformRF16::INPUTS_RIFFT_NOISY_1024_F16_ID,mgr);
+            ref.reload(  TransformRF16::INPUTS_RFFT_NOISY_1024_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,1024);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=1;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_7:
+
+            input.reload(TransformRF16::INPUTS_RFFT_NOISY_2048_F16_ID,mgr);
+            ref.reload(  TransformRF16::REF_RFFT_NOISY_2048_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,2048);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=0;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_23:
+
+            input.reload(TransformRF16::INPUTS_RIFFT_NOISY_2048_F16_ID,mgr);
+            ref.reload(  TransformRF16::INPUTS_RFFT_NOISY_2048_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,2048);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=1;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_8:
+
+            input.reload(TransformRF16::INPUTS_RFFT_NOISY_4096_F16_ID,mgr);
+            ref.reload(  TransformRF16::REF_RFFT_NOISY_4096_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,4096);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=0;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_24:
+
+            input.reload(TransformRF16::INPUTS_RIFFT_NOISY_4096_F16_ID,mgr);
+            ref.reload(  TransformRF16::INPUTS_RFFT_NOISY_4096_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,4096);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=1;
+
+          break;
+
+          /* STEP FUNCTIONS */
+
+          case TransformRF16::TEST_RFFT_F16_9:
+
+            input.reload(TransformRF16::INPUTS_RFFT_STEP_32_F16_ID,mgr);
+            ref.reload(  TransformRF16::REF_RFFT_STEP_32_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,32);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=0;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_25:
+
+            input.reload(TransformRF16::INPUTS_RIFFT_STEP_32_F16_ID,mgr);
+            ref.reload(  TransformRF16::INPUTS_RFFT_STEP_32_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,32);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=1;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_10:
+
+            input.reload(TransformRF16::INPUTS_RFFT_STEP_64_F16_ID,mgr);
+            ref.reload(  TransformRF16::REF_RFFT_STEP_64_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,64);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=0;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_26:
+
+            input.reload(TransformRF16::INPUTS_RIFFT_STEP_64_F16_ID,mgr);
+            ref.reload(  TransformRF16::INPUTS_RFFT_STEP_64_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,64);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=1;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_11:
+
+            input.reload(TransformRF16::INPUTS_RFFT_STEP_128_F16_ID,mgr);
+            ref.reload(  TransformRF16::REF_RFFT_STEP_128_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,128);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+            this->ifft=0;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_27:
+
+            input.reload(TransformRF16::INPUTS_RIFFT_STEP_128_F16_ID,mgr);
+            ref.reload(  TransformRF16::INPUTS_RFFT_STEP_128_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,128);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=1;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_12:
+
+            input.reload(TransformRF16::INPUTS_RFFT_STEP_256_F16_ID,mgr);
+            ref.reload(  TransformRF16::REF_RFFT_STEP_256_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,256);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=0;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_28:
+
+            input.reload(TransformRF16::INPUTS_RIFFT_STEP_256_F16_ID,mgr);
+            ref.reload(  TransformRF16::INPUTS_RFFT_STEP_256_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,256);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=1;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_13:
+
+            input.reload(TransformRF16::INPUTS_RFFT_STEP_512_F16_ID,mgr);
+            ref.reload(  TransformRF16::REF_RFFT_STEP_512_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,512);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=0;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_29:
+
+            input.reload(TransformRF16::INPUTS_RIFFT_STEP_512_F16_ID,mgr);
+            ref.reload(  TransformRF16::INPUTS_RFFT_STEP_512_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,512);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=1;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_14:
+
+            input.reload(TransformRF16::INPUTS_RFFT_STEP_1024_F16_ID,mgr);
+            ref.reload(  TransformRF16::REF_RFFT_STEP_1024_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,1024);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=0;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_30:
+
+            input.reload(TransformRF16::INPUTS_RIFFT_STEP_1024_F16_ID,mgr);
+            ref.reload(  TransformRF16::INPUTS_RFFT_STEP_1024_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,1024);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=1;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_15:
+
+            input.reload(TransformRF16::INPUTS_RFFT_STEP_2048_F16_ID,mgr);
+            ref.reload(  TransformRF16::REF_RFFT_STEP_2048_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,2048);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=0;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_31:
+
+            input.reload(TransformRF16::INPUTS_RIFFT_STEP_2048_F16_ID,mgr);
+            ref.reload(  TransformRF16::INPUTS_RFFT_STEP_2048_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,2048);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=1;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_16:
+
+            input.reload(TransformRF16::INPUTS_RFFT_STEP_4096_F16_ID,mgr);
+            ref.reload(  TransformRF16::REF_RFFT_STEP_4096_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,4096);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=0;
+
+          break;
+
+          case TransformRF16::TEST_RFFT_F16_32:
+
+            input.reload(TransformRF16::INPUTS_RIFFT_STEP_4096_F16_ID,mgr);
+            ref.reload(  TransformRF16::INPUTS_RFFT_STEP_4096_F16_ID,mgr);
+
+            arm_rfft_fast_init_f16(&this->instRfftF16 ,4096);
+
+            inputchanged.create(input.nbSamples(),TransformRF16::TEMP_F16_ID,mgr);
+
+            this->ifft=1;
+
+          break;
+
+
+
+       }
+
+       
+      outputfft.create(ref.nbSamples(),TransformRF16::OUTPUT_RFFT_F16_ID,mgr);
+       
+
+    }
+
+    void TransformRF16::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)
+    {
+        (void)id;
+        outputfft.dump(mgr);
+    }
--- a/Testing/desc_f16.txt
+++ b/Testing/desc_f16.txt
@ -200,6 +200,118 @@ group Root {
              }

            }
+
+           suite Transform Real F16 {
+              class = TransformRF16
+              folder = TransformF16
+
+              Pattern INPUTS_RFFT_NOISY_32_F16_ID : RealInputSamples_Noisy_32_2_f16.txt 
+              Pattern INPUTS_RIFFT_NOISY_32_F16_ID : RealInputIFFTSamples_Noisy_32_2_f16.txt 
+              Pattern REF_RFFT_NOISY_32_F16_ID : RealFFTSamples_Noisy_32_2_f16.txt 
+
+              Pattern INPUTS_RFFT_NOISY_64_F16_ID : RealInputSamples_Noisy_64_3_f16.txt
+              Pattern INPUTS_RIFFT_NOISY_64_F16_ID : RealInputIFFTSamples_Noisy_64_3_f16.txt 
+              Pattern REF_RFFT_NOISY_64_F16_ID : RealFFTSamples_Noisy_64_3_f16.txt 
+
+              Pattern INPUTS_RFFT_NOISY_128_F16_ID : RealInputSamples_Noisy_128_4_f16.txt 
+              Pattern INPUTS_RIFFT_NOISY_128_F16_ID : RealInputIFFTSamples_Noisy_128_4_f16.txt 
+              Pattern REF_RFFT_NOISY_128_F16_ID : RealFFTSamples_Noisy_128_4_f16.txt 
+
+              Pattern INPUTS_RFFT_NOISY_256_F16_ID : RealInputSamples_Noisy_256_5_f16.txt 
+              Pattern INPUTS_RIFFT_NOISY_256_F16_ID : RealInputIFFTSamples_Noisy_256_5_f16.txt 
+              Pattern REF_RFFT_NOISY_256_F16_ID : RealFFTSamples_Noisy_256_5_f16.txt 
+
+              Pattern INPUTS_RFFT_NOISY_512_F16_ID : RealInputSamples_Noisy_512_6_f16.txt 
+              Pattern INPUTS_RIFFT_NOISY_512_F16_ID : RealInputIFFTSamples_Noisy_512_6_f16.txt 
+              Pattern REF_RFFT_NOISY_512_F16_ID : RealFFTSamples_Noisy_512_6_f16.txt 
+
+              Pattern INPUTS_RFFT_NOISY_1024_F16_ID : RealInputSamples_Noisy_1024_7_f16.txt 
+              Pattern INPUTS_RIFFT_NOISY_1024_F16_ID : RealInputIFFTSamples_Noisy_1024_7_f16.txt 
+              Pattern REF_RFFT_NOISY_1024_F16_ID : RealFFTSamples_Noisy_1024_7_f16.txt 
+
+              Pattern INPUTS_RFFT_NOISY_2048_F16_ID : RealInputSamples_Noisy_2048_8_f16.txt 
+              Pattern INPUTS_RIFFT_NOISY_2048_F16_ID : RealInputIFFTSamples_Noisy_2048_8_f16.txt 
+              Pattern REF_RFFT_NOISY_2048_F16_ID : RealFFTSamples_Noisy_2048_8_f16.txt 
+
+              Pattern INPUTS_RFFT_NOISY_4096_F16_ID : RealInputSamples_Noisy_4096_9_f16.txt 
+              Pattern INPUTS_RIFFT_NOISY_4096_F16_ID : RealInputIFFTSamples_Noisy_4096_9_f16.txt 
+              Pattern REF_RFFT_NOISY_4096_F16_ID : RealFFTSamples_Noisy_4096_9_f16.txt 
+
+              Pattern INPUTS_RFFT_STEP_16_F16_ID : RealInputSamples_Step_16_10_f16.txt 
+              Pattern INPUTS_RIFFT_STEP_16_F16_ID : RealInputIFFTSamples_Step_16_10_f16.txt 
+              Pattern REF_RFFT_STEP_16_F16_ID : RealFFTSamples_Step_16_10_f16.txt 
+
+              Pattern INPUTS_RFFT_STEP_32_F16_ID : RealInputSamples_Step_32_11_f16.txt 
+              Pattern INPUTS_RIFFT_STEP_32_F16_ID : RealInputIFFTSamples_Step_32_11_f16.txt 
+              Pattern REF_RFFT_STEP_32_F16_ID : RealFFTSamples_Step_32_11_f16.txt 
+
+              Pattern INPUTS_RFFT_STEP_64_F16_ID : RealInputSamples_Step_64_12_f16.txt 
+              Pattern INPUTS_RIFFT_STEP_64_F16_ID : RealInputIFFTSamples_Step_64_12_f16.txt 
+              Pattern REF_RFFT_STEP_64_F16_ID : RealFFTSamples_Step_64_12_f16.txt 
+
+              Pattern INPUTS_RFFT_STEP_128_F16_ID : RealInputSamples_Step_128_13_f16.txt 
+              Pattern INPUTS_RIFFT_STEP_128_F16_ID : RealInputIFFTSamples_Step_128_13_f16.txt 
+              Pattern REF_RFFT_STEP_128_F16_ID : RealFFTSamples_Step_128_13_f16.txt 
+
+              Pattern INPUTS_RFFT_STEP_256_F16_ID : RealInputSamples_Step_256_14_f16.txt 
+              Pattern INPUTS_RIFFT_STEP_256_F16_ID : RealInputIFFTSamples_Step_256_14_f16.txt 
+              Pattern REF_RFFT_STEP_256_F16_ID : RealFFTSamples_Step_256_14_f16.txt 
+
+              Pattern INPUTS_RFFT_STEP_512_F16_ID : RealInputSamples_Step_512_15_f16.txt 
+              Pattern INPUTS_RIFFT_STEP_512_F16_ID : RealInputIFFTSamples_Step_512_15_f16.txt 
+              Pattern REF_RFFT_STEP_512_F16_ID : RealFFTSamples_Step_512_15_f16.txt 
+
+              Pattern INPUTS_RFFT_STEP_1024_F16_ID : RealInputSamples_Step_1024_16_f16.txt 
+              Pattern INPUTS_RIFFT_STEP_1024_F16_ID : RealInputIFFTSamples_Step_1024_16_f16.txt 
+              Pattern REF_RFFT_STEP_1024_F16_ID : RealFFTSamples_Step_1024_16_f16.txt 
+
+              Pattern INPUTS_RFFT_STEP_2048_F16_ID : RealInputSamples_Step_2048_17_f16.txt 
+              Pattern INPUTS_RIFFT_STEP_2048_F16_ID : RealInputIFFTSamples_Step_2048_17_f16.txt 
+              Pattern REF_RFFT_STEP_2048_F16_ID : RealFFTSamples_Step_2048_17_f16.txt 
+
+              Pattern INPUTS_RFFT_STEP_4096_F16_ID : RealInputSamples_Step_4096_18_f16.txt 
+              Pattern INPUTS_RIFFT_STEP_4096_F16_ID : RealInputIFFTSamples_Step_4096_18_f16.txt 
+              Pattern REF_RFFT_STEP_4096_F16_ID : RealFFTSamples_Step_4096_18_f16.txt 
+              
+              Output  OUTPUT_RFFT_F16_ID : RealFFTSamples
+              Output  TEMP_F16_ID : Temp
+
+              Functions {
+                 rfft_noisy_32_f16:test_rfft_f16
+                 rfft_noisy_64_f16:test_rfft_f16
+                 rfft_noisy_128_f16:test_rfft_f16
+                 rfft_noisy_256_f16:test_rfft_f16
+                 rfft_noisy_512_f16:test_rfft_f16
+                 rfft_noisy_1024_f16:test_rfft_f16
+                 rfft_noisy_2048_f16:test_rfft_f16
+                 rfft_noisy_4096_f16:test_rfft_f16
+                 rfft_step_32_f16:test_rfft_f16
+                 rfft_step_64_f16:test_rfft_f16
+                 rfft_step_128_f16:test_rfft_f16
+                 rfft_step_256_f16:test_rfft_f16
+                 rfft_step_512_f16:test_rfft_f16
+                 rfft_step_1024_f16:test_rfft_f16
+                 rfft_step_2048_f16:test_rfft_f16
+                 rfft_step_4096_f16:test_rfft_f16
+                 rifft_noisy_32_f16:test_rfft_f16
+                 rifft_noisy_64_f16:test_rfft_f16
+                 rifft_noisy_128_f16:test_rfft_f16
+                 rifft_noisy_256_f16:test_rfft_f16
+                 rifft_noisy_512_f16:test_rfft_f16
+                 rifft_noisy_1024_f16:test_rfft_f16
+                 rifft_noisy_2048_f16:test_rfft_f16
+                 rifft_noisy_4096_f16:test_rfft_f16
+                 rifft_step_32_f16:test_rfft_f16
+                 rifft_step_64_f16:test_rfft_f16
+                 rifft_step_128_f16:test_rfft_f16
+                 rifft_step_256_f16:test_rfft_f16
+                 rifft_step_512_f16:test_rfft_f16
+                 rifft_step_1024_f16:test_rfft_f16
+                 rifft_step_2048_f16:test_rfft_f16
+                 rifft_step_4096_f16:test_rfft_f16
+              }
+
+            }
        }
    }
 }