CMSIS-DSP: Added benchmarking code for Transform.

6 years ago · 799b7aa521
parent 5a6f1e2baa
commit 799b7aa521
20 changed files with 16361 additions and 15 deletions
--- a/Source/TransformFunctions/CMakeLists.txt
+++ b/Source/TransformFunctions/CMakeLists.txt
@ -110,7 +110,7 @@ target_sources(CMSISDSPTransform PRIVATE arm_cfft_q31.c)
 target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q31.c)
 endif()
-# For scipy or or wrappers
+# For scipy or wrappers or benchmarks
 if (WRAPPER)
 target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_init_f32.c)
 endif()
--- a/Testing/CMakeLists.txt
+++ b/Testing/CMakeLists.txt
@ -118,6 +118,9 @@ set(TESTSRC
  Source/Benchmarks/BinaryF32.cpp
  Source/Benchmarks/BinaryQ31.cpp
  Source/Benchmarks/BinaryQ15.cpp
  Source/Benchmarks/TransformF32.cpp
  Source/Benchmarks/TransformQ31.cpp
  Source/Benchmarks/TransformQ15.cpp
  Source/Benchmarks/FullyConnectedBench.cpp
  Source/Benchmarks/PoolingBench.cpp
  )
--- a/Testing/Include/Benchmarks/TransformF32.h
+++ b/Testing/Include/Benchmarks/TransformF32.h
@ -0,0 +1,32 @@
 #include "Test.h"
 #include "Pattern.h"
 class TransformF32:public Client::Suite
    {
        public:
            TransformF32(Testing::testID_t id);
            void setUp(Testing::testID_t,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr);
            void tearDown(Testing::testID_t,Client::PatternMgr *mgr);
        private:
            #include "TransformF32_decl.h"
            Client::Pattern<float32_t> samples;
            Client::LocalPattern<float32_t> output;
            Client::LocalPattern<float32_t> state;
            int nbSamples;
            int ifft;
            int bitRev;
            float32_t *pSrc;
            float32_t *pDst;
            float32_t *pState;
            const arm_cfft_instance_f32 *cfftInstance;
            arm_rfft_fast_instance_f32 rfftFastInstance;
            arm_dct4_instance_f32 dct4Instance;
            arm_rfft_instance_f32 rfftInstance;
            arm_cfft_radix4_instance_f32 cfftRadix4Instance;
            arm_cfft_radix2_instance_f32 cfftRadix2Instance;
    };
--- a/Testing/Include/Benchmarks/TransformQ15.h
+++ b/Testing/Include/Benchmarks/TransformQ15.h
@ -0,0 +1,31 @@
 #include "Test.h"
 #include "Pattern.h"
 class TransformQ15:public Client::Suite
    {
        public:
            TransformQ15(Testing::testID_t id);
            void setUp(Testing::testID_t,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr);
            void tearDown(Testing::testID_t,Client::PatternMgr *mgr);
        private:
            #include "TransformQ15_decl.h"
            Client::Pattern<q15_t> samples;
            Client::LocalPattern<q15_t> output;
            Client::LocalPattern<q15_t> state;
            int nbSamples;
            int ifft;
            int bitRev;
            q15_t *pSrc;
            q15_t *pDst;
            q15_t *pState;
            arm_cfft_instance_q15 cfftInstance;
            arm_dct4_instance_q15 dct4Instance;
            arm_rfft_instance_q15 rfftInstance;
            arm_cfft_radix4_instance_q15 cfftRadix4Instance;
            arm_cfft_radix2_instance_q15 cfftRadix2Instance;
    };
--- a/Testing/Include/Benchmarks/TransformQ31.h
+++ b/Testing/Include/Benchmarks/TransformQ31.h
@ -0,0 +1,31 @@
 #include "Test.h"
 #include "Pattern.h"
 class TransformQ31:public Client::Suite
    {
        public:
            TransformQ31(Testing::testID_t id);
            void setUp(Testing::testID_t,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr);
            void tearDown(Testing::testID_t,Client::PatternMgr *mgr);
        private:
            #include "TransformQ31_decl.h"
            Client::Pattern<q31_t> samples;
            Client::LocalPattern<q31_t> output;
            Client::LocalPattern<q31_t> state;
            int nbSamples;
            int ifft;
            int bitRev;
            q31_t *pSrc;
            q31_t *pDst;
            q31_t *pState;
            arm_cfft_instance_q31 cfftInstance;
            arm_dct4_instance_q31 dct4Instance;
            arm_rfft_instance_q31 rfftInstance;
            arm_cfft_radix4_instance_q31 cfftRadix4Instance;
            arm_cfft_radix2_instance_q31 cfftRadix2Instance;
    };
--- a/Testing/PatternGeneration/Transform.py
+++ b/Testing/PatternGeneration/Transform.py
@ -0,0 +1,51 @@
 import os.path
 import numpy as np
 import itertools
 import Tools
 # Those patterns are used for tests and benchmarks.
 # For tests, there is the need to add tests for saturation
 def randComplex(nb):
    data = np.random.randn(2*nb)
    data = data/max(data)
    data_comp = data.view(dtype=np.complex128)
    return(data_comp)
 def asReal(a):
    #return(a.view(dtype=np.float64))
    return(a.reshape(np.size(a)).view(dtype=np.float64))
 def writeTests(config):
    NBRSAMPLES=2048
    NBCSAMPLES=256
    samples=np.random.randn(NBRSAMPLES)
    samples = np.abs(samples/max(samples))
    samplesC=randComplex(NBCSAMPLES)
    config.writeInput(1, samples,"RealSamples")
    config.writeInput(1, asReal(samplesC),"ComplexSamples")
 PATTERNDIR = os.path.join("Patterns","DSP","Transform","Transform")
 PARAMDIR = os.path.join("Parameters","DSP","Transform","Transform")
 configf32=Tools.Config(PATTERNDIR,PARAMDIR,"f32")
 configq31=Tools.Config(PATTERNDIR,PARAMDIR,"q31")
 configq15=Tools.Config(PATTERNDIR,PARAMDIR,"q15")
 writeTests(configf32)
 writeTests(configq31)
 writeTests(configq15)
--- a/Testing/Patterns/DSP/Transform/TransformF32/ComplexSamples1_f32.txt
+++ b/Testing/Patterns/DSP/Transform/TransformF32/ComplexSamples1_f32.txt
--- a/Testing/Patterns/DSP/Transform/TransformF32/RealSamples1_f32.txt
+++ b/Testing/Patterns/DSP/Transform/TransformF32/RealSamples1_f32.txt
--- a/Testing/Patterns/DSP/Transform/TransformQ15/ComplexSamples1_q15.txt
+++ b/Testing/Patterns/DSP/Transform/TransformQ15/ComplexSamples1_q15.txt
--- a/Testing/Patterns/DSP/Transform/TransformQ15/RealSamples1_q15.txt
+++ b/Testing/Patterns/DSP/Transform/TransformQ15/RealSamples1_q15.txt
--- a/Testing/Patterns/DSP/Transform/TransformQ31/ComplexSamples1_q31.txt
+++ b/Testing/Patterns/DSP/Transform/TransformQ31/ComplexSamples1_q31.txt
--- a/Testing/Patterns/DSP/Transform/TransformQ31/RealSamples1_q31.txt
+++ b/Testing/Patterns/DSP/Transform/TransformQ31/RealSamples1_q31.txt
--- a/Testing/Source/Benchmarks/TransformF32.cpp
+++ b/Testing/Source/Benchmarks/TransformF32.cpp
@ -0,0 +1,166 @@
 #include "TransformF32.h"
 #include "Error.h"
 #include "arm_const_structs.h"
 const arm_cfft_instance_f32 *arm_cfft_get_instance_f32(uint16_t fftLen)
 {
 switch (fftLen) {
     case 16:
       return(&arm_cfft_sR_f32_len16);
       break;
     case 32:
       return(&arm_cfft_sR_f32_len32);
       break;
     case 64:
       return(&arm_cfft_sR_f32_len64);
       break;
     case 128:
       return(&arm_cfft_sR_f32_len128);
       break;
     case 256:
       return(&arm_cfft_sR_f32_len256);
       break;
     case 512:
       return(&arm_cfft_sR_f32_len512);
       break;
     case 1024:
       return(&arm_cfft_sR_f32_len1024);
       break;
     case 2048:
       return(&arm_cfft_sR_f32_len2048);
       break;
     case 4096:
       return(&arm_cfft_sR_f32_len4096);
       break;
   }
   return(NULL);
 }
    void TransformF32::test_cfft_f32()
    { 
       arm_cfft_f32(this->cfftInstance, this->pDst, this->ifft,this->bitRev);
    } 
    void TransformF32::test_rfft_f32()
    { 
       arm_rfft_fast_f32(&this->rfftFastInstance, this->pSrc, this->pDst, this->ifft);
    } 
    void TransformF32::test_dct4_f32()
    { 
        arm_dct4_f32(
          &this->dct4Instance,
          this->pState,
          this->pDst);
    } 
    void TransformF32::test_cfft_radix4_f32()
    { 
       arm_cfft_radix4_f32(&this->cfftRadix4Instance,this->pDst);
    } 
    void TransformF32::test_cfft_radix2_f32()
    { 
       arm_cfft_radix2_f32(&this->cfftRadix2Instance,this->pDst);
    } 
    void TransformF32::setUp(Testing::testID_t id,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr)
    {
       float32_t normalize;
       std::vector<Testing::param_t>::iterator it = params.begin();
       this->nbSamples = *it++;
       this->ifft = *it++;
       this->bitRev = *it;
       switch(id)
       {
          case TEST_CFFT_F32_1:
            samples.reload(TransformF32::INPUTC_F32_ID,mgr,2*this->nbSamples);
            output.create(2*this->nbSamples,TransformF32::OUT_F32_ID,mgr);
            this->pSrc=samples.ptr();
            this->pDst=output.ptr();
            this->cfftInstance=arm_cfft_get_instance_f32(this->nbSamples);
            memcpy(this->pDst,this->pSrc,2*sizeof(float32_t)*this->nbSamples);
          break;
          case TEST_RFFT_F32_2:
            samples.reload(TransformF32::INPUTR_F32_ID,mgr,this->nbSamples);
            output.create(this->nbSamples,TransformF32::OUT_F32_ID,mgr);
            this->pSrc=samples.ptr();
            this->pDst=output.ptr();
            arm_rfft_fast_init_f32(&this->rfftFastInstance, this->nbSamples);
          break;
          case TEST_DCT4_F32_3:
            samples.reload(TransformF32::INPUTR_F32_ID,mgr,this->nbSamples);
            output.create(this->nbSamples,TransformF32::OUT_F32_ID,mgr);
            state.create(2*this->nbSamples,TransformF32::STATE_F32_ID,mgr);
            this->pSrc=samples.ptr();
            this->pDst=output.ptr();
            this->pState=state.ptr();
            normalize = sqrt((2.0f/(float32_t)this->nbSamples));      
            memcpy(this->pDst,this->pSrc,sizeof(float32_t)*this->nbSamples); 
            arm_dct4_init_f32(
               &this->dct4Instance,
               &this->rfftInstance,
               &this->cfftRadix4Instance,
               this->nbSamples,
               this->nbSamples/2,
               normalize);
          break;
          case TEST_CFFT_RADIX4_F32_4:
            samples.reload(TransformF32::INPUTC_F32_ID,mgr,2*this->nbSamples);
            output.create(2*this->nbSamples,TransformF32::OUT_F32_ID,mgr);
            this->pSrc=samples.ptr();
            this->pDst=output.ptr();
            memcpy(this->pDst,this->pSrc,2*sizeof(float32_t)*this->nbSamples);
            arm_cfft_radix4_init_f32(&this->cfftRadix4Instance,
                this->nbSamples,
                this->ifft,
                this->bitRev);
          break;
          case TEST_CFFT_RADIX2_F32_5:
            samples.reload(TransformF32::INPUTC_F32_ID,mgr,2*this->nbSamples);
            output.create(2*this->nbSamples,TransformF32::OUT_F32_ID,mgr);
            this->pSrc=samples.ptr();
            this->pDst=output.ptr();
            memcpy(this->pDst,this->pSrc,2*sizeof(float32_t)*this->nbSamples);
            arm_cfft_radix2_init_f32(&this->cfftRadix2Instance,
                this->nbSamples,
                this->ifft,
                this->bitRev);
          break;
       }
    }
    void TransformF32::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)
    {
    }
--- a/Testing/Source/Benchmarks/TransformQ15.cpp
+++ b/Testing/Source/Benchmarks/TransformQ15.cpp
@ -0,0 +1,224 @@
 #include "TransformQ15.h"
 #include "Error.h"
 #include "arm_const_structs.h"
 #define CFFT_INIT(typ, suffix, S, fftLen)                                       \
                                                                                \
 {                                                                               \
                                                                                \
        /*  Initialise the default arm status */                                \
        arm_status status = ARM_MATH_SUCCESS;                                   \
                                                                                \
        /*  Initialise the FFT length */                                        \
        S->fftLen = fftLen;                                                     \
                                                                                \
        /*  Initialise the Twiddle coefficient pointer */                       \
        S->pTwiddle = (typ *)twiddleCoef_4096_##suffix;                         \
                                                                                \
                                                                                \
        /*  Initializations of Instance structure depending on the FFT length */\
        switch (S->fftLen) {                                                    \
                                                          \
                                                                                \
            /*  Initializations of structure parameters for 2048 point FFT */   \
        case 2048U:                                                             \
            /*  Initialise the bit reversal table modifier */                   \
            S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH;      \
            S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_2048;       \
            S->pTwiddle = (typ *)twiddleCoef_2048_##suffix;                     \
            break;                                                              \
                                                                                \
            /*  Initializations of structure parameters for 1024 point FFT */   \
        case 1024U:                                                             \
            /*  Initialise the bit reversal table modifier */                   \
            S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH;      \
            S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_1024;       \
            S->pTwiddle = (typ *)twiddleCoef_1024_##suffix;                     \
            break;                                                              \
                                                                                \
            /*  Initializations of structure parameters for 512 point FFT */    \
        case 512U:                                                              \
            /*  Initialise the bit reversal table modifier */                   \
            S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH;       \
            S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_512;        \
            S->pTwiddle = (typ *)twiddleCoef_512_##suffix;                      \
            break;                                                              \
                                                                                \
        case 256U:                                                              \
            S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH;       \
            S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_256;        \
            S->pTwiddle = (typ *)twiddleCoef_256_##suffix;                      \
                                                                                \
            break;                                                              \
                                                                                \
        case 128U:                                                              \
            S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH;       \
            S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_128;        \
            S->pTwiddle = (typ *)twiddleCoef_128_##suffix;                      \
                                                                                \
            break;                                                              \
                                                                                \
        case 64U:                                                               \
            S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH;        \
            S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_64;         \
            S->pTwiddle = (typ *)twiddleCoef_64_##suffix;                       \
            break;                                                              \
                                                                                \
        case 32U:                                                               \
            S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH;        \
            S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_32;         \
            S->pTwiddle = (typ *)twiddleCoef_32_##suffix;                       \
            break;                                                              \
                                                                                \
        case 16U:                                                               \
            /*  Initializations of structure parameters for 16 point FFT */     \
            S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH;        \
            S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_16;         \
            S->pTwiddle = (typ *)twiddleCoef_16_##suffix;                       \
            break;                                                              \
                                                                                \
                                                                                \
        default:                                                                \
            /*  Reporting argument error if fftSize is not valid value */       \
            status = ARM_MATH_ARGUMENT_ERROR;                                   \
            break;                                                              \
        }                                                                       \
                                                                                \
                                                                                \
        return (status);                                                        \
                                                                                \
 }
 arm_status arm_cfft_init_q15(arm_cfft_instance_q15 *S, uint16_t fftLen)
 {
    CFFT_INIT(q15_t, q15, S, fftLen);
 }
    void TransformQ15::test_cfft_q15()
    { 
       arm_cfft_q15(&this->cfftInstance, this->pDst, this->ifft,this->bitRev);
    } 
    void TransformQ15::test_rfft_q15()
    { 
       arm_rfft_q15(&this->rfftInstance, this->pSrc, this->pDst);
    } 
    void TransformQ15::test_dct4_q15()
    { 
        arm_dct4_q15(
          &this->dct4Instance,
          this->pState,
          this->pDst);
    } 
    void TransformQ15::test_cfft_radix4_q15()
    { 
       arm_cfft_radix4_q15(&this->cfftRadix4Instance,this->pDst);
    } 
    void TransformQ15::test_cfft_radix2_q15()
    { 
       arm_cfft_radix2_q15(&this->cfftRadix2Instance,this->pDst);
    } 
    void TransformQ15::setUp(Testing::testID_t id,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr)
    {
       float32_t normalize;
       std::vector<Testing::param_t>::iterator it = params.begin();
       this->nbSamples = *it++;
       this->ifft = *it++;
       this->bitRev = *it;
       switch(id)
       {
          case TEST_CFFT_Q15_1:
            samples.reload(TransformQ15::INPUTC_Q15_ID,mgr,2*this->nbSamples);
            output.create(2*this->nbSamples,TransformQ15::OUT_Q15_ID,mgr);
            this->pSrc=samples.ptr();
            this->pDst=output.ptr();
            arm_cfft_init_q15(&this->cfftInstance,this->nbSamples);
            memcpy(this->pDst,this->pSrc,2*sizeof(q15_t)*this->nbSamples);
          break;
          case TEST_RFFT_Q15_2:
            samples.reload(TransformQ15::INPUTR_Q15_ID,mgr,this->nbSamples);
            output.create(this->nbSamples,TransformQ15::OUT_Q15_ID,mgr);
            this->pSrc=samples.ptr();
            this->pDst=output.ptr();
            arm_rfft_init_q15(&this->rfftInstance, this->nbSamples, this->ifft, this->bitRev);
          break;
          case TEST_DCT4_Q15_3:
            samples.reload(TransformQ15::INPUTR_Q15_ID,mgr,this->nbSamples);
            output.create(this->nbSamples,TransformQ15::OUT_Q15_ID,mgr);
            state.create(2*this->nbSamples,TransformQ15::STATE_Q15_ID,mgr);
            this->pSrc=samples.ptr();
            this->pDst=output.ptr();
            this->pState=state.ptr();
            normalize = sqrt((2.0f/(float32_t)this->nbSamples));      
            memcpy(this->pDst,this->pSrc,sizeof(q15_t)*this->nbSamples); 
            arm_dct4_init_q15(
               &this->dct4Instance,
               &this->rfftInstance,
               &this->cfftRadix4Instance,
               this->nbSamples,
               this->nbSamples/2,
               normalize);
          break;
          case TEST_CFFT_RADIX4_Q15_4:
            samples.reload(TransformQ15::INPUTC_Q15_ID,mgr,2*this->nbSamples);
            output.create(2*this->nbSamples,TransformQ15::OUT_Q15_ID,mgr);
            this->pSrc=samples.ptr();
            this->pDst=output.ptr();
            memcpy(this->pDst,this->pSrc,2*sizeof(q15_t)*this->nbSamples);
            arm_cfft_radix4_init_q15(&this->cfftRadix4Instance,
                this->nbSamples,
                this->ifft,
                this->bitRev);
          break;
          case TEST_CFFT_RADIX2_Q15_5:
            samples.reload(TransformQ15::INPUTC_Q15_ID,mgr,2*this->nbSamples);
            output.create(2*this->nbSamples,TransformQ15::OUT_Q15_ID,mgr);
            this->pSrc=samples.ptr();
            this->pDst=output.ptr();
            memcpy(this->pDst,this->pSrc,2*sizeof(q15_t)*this->nbSamples);
            arm_cfft_radix2_init_q15(&this->cfftRadix2Instance,
                this->nbSamples,
                this->ifft,
                this->bitRev);
          break;
       }
    }
    void TransformQ15::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)
    {
    }
--- a/Testing/Source/Benchmarks/TransformQ31.cpp
+++ b/Testing/Source/Benchmarks/TransformQ31.cpp
@ -0,0 +1,223 @@
 #include "TransformQ31.h"
 #include "Error.h"
 #include "arm_const_structs.h"
 #define CFFT_INIT(typ, suffix, S, fftLen)                                       \
                                                                                \
 {                                                                               \
                                                                                \
        /*  Initialise the default arm status */                                \
        arm_status status = ARM_MATH_SUCCESS;                                   \
                                                                                \
        /*  Initialise the FFT length */                                        \
        S->fftLen = fftLen;                                                     \
                                                                                \
        /*  Initialise the Twiddle coefficient pointer */                       \
        S->pTwiddle = (typ *)twiddleCoef_4096_##suffix;                         \
                                                                                \
                                                                                \
        /*  Initializations of Instance structure depending on the FFT length */\
        switch (S->fftLen) {                                                    \
                                                          \
            /*  Initializations of structure parameters for 2048 point FFT */   \
        case 2048U:                                                             \
            /*  Initialise the bit reversal table modifier */                   \
            S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH;      \
            S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_2048;       \
            S->pTwiddle = (typ *)twiddleCoef_2048_##suffix;                     \
            break;                                                              \
                                                                                \
            /*  Initializations of structure parameters for 1024 point FFT */   \
        case 1024U:                                                             \
            /*  Initialise the bit reversal table modifier */                   \
            S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH;      \
            S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_1024;       \
            S->pTwiddle = (typ *)twiddleCoef_1024_##suffix;                     \
            break;                                                              \
                                                                                \
            /*  Initializations of structure parameters for 512 point FFT */    \
        case 512U:                                                              \
            /*  Initialise the bit reversal table modifier */                   \
            S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH;       \
            S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_512;        \
            S->pTwiddle = (typ *)twiddleCoef_512_##suffix;                      \
            break;                                                              \
                                                                                \
        case 256U:                                                              \
            S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH;       \
            S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_256;        \
            S->pTwiddle = (typ *)twiddleCoef_256_##suffix;                      \
                                                                                \
            break;                                                              \
                                                                                \
        case 128U:                                                              \
            S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH;       \
            S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_128;        \
            S->pTwiddle = (typ *)twiddleCoef_128_##suffix;                      \
                                                                                \
            break;                                                              \
                                                                                \
        case 64U:                                                               \
            S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH;        \
            S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_64;         \
            S->pTwiddle = (typ *)twiddleCoef_64_##suffix;                       \
            break;                                                              \
                                                                                \
        case 32U:                                                               \
            S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH;        \
            S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_32;         \
            S->pTwiddle = (typ *)twiddleCoef_32_##suffix;                       \
            break;                                                              \
                                                                                \
        case 16U:                                                               \
            /*  Initializations of structure parameters for 16 point FFT */     \
            S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH;        \
            S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_16;         \
            S->pTwiddle = (typ *)twiddleCoef_16_##suffix;                       \
            break;                                                              \
                                                                                \
                                                                                \
        default:                                                                \
            /*  Reporting argument error if fftSize is not valid value */       \
            status = ARM_MATH_ARGUMENT_ERROR;                                   \
            break;                                                              \
        }                                                                       \
                                                                                \
                                                                                \
        return (status);                                                        \
                                                                                \
 }
 arm_status arm_cfft_init_q31(arm_cfft_instance_q31 *S, uint16_t fftLen)
 {
    CFFT_INIT(q31_t, q31, S, fftLen);
 }
    void TransformQ31::test_cfft_q31()
    { 
       arm_cfft_q31(&this->cfftInstance, this->pDst, this->ifft,this->bitRev);
    } 
    void TransformQ31::test_rfft_q31()
    { 
       arm_rfft_q31(&this->rfftInstance, this->pSrc, this->pDst);
    } 
    void TransformQ31::test_dct4_q31()
    { 
        arm_dct4_q31(
          &this->dct4Instance,
          this->pState,
          this->pDst);
    } 
    void TransformQ31::test_cfft_radix4_q31()
    { 
       arm_cfft_radix4_q31(&this->cfftRadix4Instance,this->pDst);
    } 
    void TransformQ31::test_cfft_radix2_q31()
    { 
       arm_cfft_radix2_q31(&this->cfftRadix2Instance,this->pDst);
    } 
    void TransformQ31::setUp(Testing::testID_t id,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr)
    {
       float32_t normalize;
       std::vector<Testing::param_t>::iterator it = params.begin();
       this->nbSamples = *it++;
       this->ifft = *it++;
       this->bitRev = *it;
       switch(id)
       {
          case TEST_CFFT_Q31_1:
            samples.reload(TransformQ31::INPUTC_Q31_ID,mgr,2*this->nbSamples);
            output.create(2*this->nbSamples,TransformQ31::OUT_Q31_ID,mgr);
            this->pSrc=samples.ptr();
            this->pDst=output.ptr();
            arm_cfft_init_q31(&this->cfftInstance,this->nbSamples);
            memcpy(this->pDst,this->pSrc,2*sizeof(q31_t)*this->nbSamples);
          break;
          case TEST_RFFT_Q31_2:
            samples.reload(TransformQ31::INPUTR_Q31_ID,mgr,this->nbSamples);
            output.create(this->nbSamples,TransformQ31::OUT_Q31_ID,mgr);
            this->pSrc=samples.ptr();
            this->pDst=output.ptr();
            arm_rfft_init_q31(&this->rfftInstance, this->nbSamples,this->ifft,this->bitRev);
          break;
          case TEST_DCT4_Q31_3:
            samples.reload(TransformQ31::INPUTR_Q31_ID,mgr,this->nbSamples);
            output.create(this->nbSamples,TransformQ31::OUT_Q31_ID,mgr);
            state.create(2*this->nbSamples,TransformQ31::STATE_Q31_ID,mgr);
            this->pSrc=samples.ptr();
            this->pDst=output.ptr();
            this->pState=state.ptr();
            normalize = sqrt((2.0f/(float32_t)this->nbSamples));      
            memcpy(this->pDst,this->pSrc,sizeof(q31_t)*this->nbSamples); 
            arm_dct4_init_q31(
               &this->dct4Instance,
               &this->rfftInstance,
               &this->cfftRadix4Instance,
               this->nbSamples,
               this->nbSamples/2,
               normalize);
          break;
          case TEST_CFFT_RADIX4_Q31_4:
            samples.reload(TransformQ31::INPUTC_Q31_ID,mgr,2*this->nbSamples);
            output.create(2*this->nbSamples,TransformQ31::OUT_Q31_ID,mgr);
            this->pSrc=samples.ptr();
            this->pDst=output.ptr();
            memcpy(this->pDst,this->pSrc,2*sizeof(q31_t)*this->nbSamples);
            arm_cfft_radix4_init_q31(&this->cfftRadix4Instance,
                this->nbSamples,
                this->ifft,
                this->bitRev);
          break;
          case TEST_CFFT_RADIX2_Q31_5:
            samples.reload(TransformQ31::INPUTC_Q31_ID,mgr,2*this->nbSamples);
            output.create(2*this->nbSamples,TransformQ31::OUT_Q31_ID,mgr);
            this->pSrc=samples.ptr();
            this->pDst=output.ptr();
            memcpy(this->pDst,this->pSrc,2*sizeof(q31_t)*this->nbSamples);
            arm_cfft_radix2_init_q31(&this->cfftRadix2Instance,
                this->nbSamples,
                this->ifft,
                this->bitRev);
          break;
       }
    }
    void TransformQ31::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)
    {
    }
--- a/Testing/addAllBenchToDatabase.bat
+++ b/Testing/addAllBenchToDatabase.bat
@ -22,3 +22,5 @@ echo "Unary Matrix"
 python addToDB.py -f bench.txt  Unary 
 echo "Binary Matrix"
 python addToDB.py -f bench.txt  Binary
 echo "Transform"
 python addToDB.py -f bench.txt  Transform
--- a/Testing/addAllBenchToRegressionDatabase.bat
+++ b/Testing/addAllBenchToRegressionDatabase.bat
@ -22,3 +22,5 @@ echo "Unary Matrix"
 python addToRegDB.py -f bench.txt  Unary 
 echo "Binary Matrix"
 python addToRegDB.py -f bench.txt  Binary
 echo "Transform"
 python addToRegDB.py -f bench.txt  Transform
--- a/Testing/bench.txt
+++ b/Testing/bench.txt
@ -1270,6 +1270,143 @@ group Root {
           }
        }
        group Transform {
           class = Transform 
           folder = Transform 
           suite Transform F32 {
                class = TransformF32 
                folder = TransformF32
                ParamList {
                  NB,IFFT,BITREV
                  Summary NB
                  Names "Number of samples,Inverse FFT, Bit Reversal"
                  Formula "NB"
                }
                Pattern INPUTR_F32_ID : RealSamples1_f32.txt 
                Pattern INPUTC_F32_ID : ComplexSamples1_f32.txt 
                Output  OUT_F32_ID : Output
                Output  STATE_F32_ID : Output
                Params CFFT_PARAM_ID = {
                  NB = [16,64,128,256]
                  IFFT = [0,1]
                  REV = [0,1]
                }
                Params RFFT_PARAM_ID = {
                  NB = [32,64,128,256]
                  IFFT = [0,1]
                  REV = [1]
                }
                Params DCT_PARAM_ID = {
                  NB = [128,512,2048]
                  IFFT = [0]
                  REV = [1]
                }
                Functions {
                   test_cfft_f32:test_cfft_f32 -> CFFT_PARAM_ID
                   test_rfft_f32:test_rfft_f32 -> RFFT_PARAM_ID
                   test_dct4_f32:test_dct4_f32 -> DCT_PARAM_ID
                   test_cfft_radix4_f32:test_cfft_radix4_f32 -> CFFT_PARAM_ID
                   test_cfft_radix2_f32:test_cfft_radix2_f32 -> CFFT_PARAM_ID
                }
           }
           suite Transform Q31 {
                class = TransformQ31 
                folder = TransformQ31
                ParamList {
                  NB,IFFT,BITREV
                  Summary NB
                  Names "Number of samples,Inverse FFT, Bit Reversal"
                  Formula "NB"
                }
                Pattern INPUTR_Q31_ID : RealSamples1_q31.txt 
                Pattern INPUTC_Q31_ID : ComplexSamples1_q31.txt 
                Output  OUT_Q31_ID : Output
                Output  STATE_Q31_ID : Output
                Params CFFT_PARAM_ID = {
                  NB = [16,64,128,256]
                  IFFT = [0,1]
                  REV = [0,1]
                }
                Params RFFT_PARAM_ID = {
                  NB = [32,64,128,256]
                  IFFT = [0,1]
                  REV = [0,1]
                }
                Params DCT_PARAM_ID = {
                  NB = [128,512,2048]
                  IFFT = [0]
                  REV = [1]
                }
                Functions {
                   test_cfft_q31:test_cfft_q31 -> CFFT_PARAM_ID
                   test_rfft_q31:test_rfft_q31 -> RFFT_PARAM_ID
                   test_dct4_q31:test_dct4_q31 -> DCT_PARAM_ID
                   test_cfft_radix4_q31:test_cfft_radix4_q31 -> CFFT_PARAM_ID
                   test_cfft_radix2_q31:test_cfft_radix2_q31 -> CFFT_PARAM_ID
                }
           }
           suite Transform Q15 {
                class = TransformQ15 
                folder = TransformQ15
                ParamList {
                  NB,IFFT,BITREV
                  Summary NB
                  Names "Number of samples,Inverse FFT, Bit Reversal"
                  Formula "NB"
                }
                Pattern INPUTR_Q15_ID : RealSamples1_q15.txt 
                Pattern INPUTC_Q15_ID : ComplexSamples1_q15.txt 
                Output  OUT_Q15_ID : Output
                Output  STATE_Q15_ID : Output
                Params CFFT_PARAM_ID = {
                  NB = [16,64,128,256]
                  IFFT = [0,1]
                  REV = [0,1]
                }
                Params RFFT_PARAM_ID = {
                  NB = [32,64,128,256]
                  IFFT = [0,1]
                  REV = [1]
                }
                Params DCT_PARAM_ID = {
                  NB = [128,512,2048]
                  IFFT = [0]
                  REV = [1]
                }
                Functions {
                   test_cfft_q15:test_cfft_q15 -> CFFT_PARAM_ID
                   test_rfft_q15:test_rfft_q15 -> RFFT_PARAM_ID
                   test_dct4_q15:test_dct4_q15 -> DCT_PARAM_ID
                   test_cfft_radix4_q15:test_cfft_radix4_q15 -> CFFT_PARAM_ID
                   test_cfft_radix2_q15:test_cfft_radix2_q15 -> CFFT_PARAM_ID
                }
           }
        }
    }
    group NN Benchmarks
--- a/Testing/generateAllRegressions.bat
+++ b/Testing/generateAllRegressions.bat
@ -1,24 +1,26 @@
 echo "Basic Maths"
-python summaryBench.py -f bench.txt  BasicBenchmarks
+python summaryBench.py -f bench.txt  -r build_m7\result.txt BasicBenchmarks
 echo "Complex Maths"
-python summaryBench.py -f bench.txt  ComplexBenchmarks
+python summaryBench.py -f bench.txt  -r build_m7\result.txt ComplexBenchmarks
 echo "FIR"
-python summaryBench.py -f bench.txt  FIR
+python summaryBench.py -f bench.txt  -r build_m7\result.txt FIR
 echo "Convolution / Correlation"
-python summaryBench.py -f bench.txt  MISC
+python summaryBench.py -f bench.txt  -r build_m7\result.txt MISC
 echo "Decimation / Interpolation"
-python summaryBench.py -f bench.txt  DECIM
+python summaryBench.py -f bench.txt  -r build_m7\result.txt DECIM
 echo "BiQuad"
-python summaryBench.py -f bench.txt  BIQUAD
+python summaryBench.py -f bench.txt  -r build_m7\result.txt BIQUAD
 echo "Controller"
-python summaryBench.py -f bench.txt  Controller
+python summaryBench.py -f bench.txt  -r build_m7\result.txt Controller
 echo "Fast Math"
-python summaryBench.py -f bench.txt  FastMath
+python summaryBench.py -f bench.txt  -r build_m7\result.txt FastMath
 echo "Barycenter"
-python summaryBench.py -f bench.txt  SupportBarF32
+python summaryBench.py -f bench.txt  -r build_m7\result.txt SupportBarF32
 echo "Support"
-python summaryBench.py -f bench.txt  Support
+python summaryBench.py -f bench.txt  -r build_m7\result.txt Support
 echo "Unary Matrix"
-python summaryBench.py -f bench.txt  Unary 
+python summaryBench.py -f bench.txt  -r build_m7\result.txt Unary 
 echo "Binary Matrix"
-python summaryBench.py -f bench.txt  Binary
+python summaryBench.py -f bench.txt  -r build_m7\result.txt Binary
 echo "Transform"
 python summaryBench.py -f bench.txt  -r build_m7\result.txt Transform
--- a/Testing/runAllBenchmarks.bat
+++ b/Testing/runAllBenchmarks.bat
@ -0,0 +1,70 @@
@ECHO OFF 
 echo "Basic Maths"
 python processTests.py -f bench.txt -e BasicBenchmarks
 call:runBench
 echo "Complex Maths"
 python processTests.py -f bench.txt -e ComplexBenchmarks
 call:runBench
 echo "FIR"
 python processTests.py -f bench.txt -e FIR
 call:runBench
 echo "Convolution / Correlation"
 python processTests.py -f bench.txt -e MISC
 call:runBench
 echo "Decimation / Interpolation"
 python processTests.py -f bench.txt -e DECIM
 call:runBench
 echo "BiQuad"
 python processTests.py -f bench.txt -e BIQUAD
 call:runBench
 echo "Controller"
 python processTests.py -f bench.txt -e Controller
 call:runBench
 echo "Fast Math"
 python processTests.py -f bench.txt -e FastMath
 call:runBench
 echo "Barycenter"
 python processTests.py -f bench.txt -e SupportBarF32
 call:runBench
 echo "Support"
 python processTests.py -f bench.txt -e Support
 call:runBench
 echo "Unary Matrix"
 python processTests.py -f bench.txt -e Unary 
 call:runBench
 echo "Binary Matrix"
 python processTests.py -f bench.txt -e Binary
 call:runBench
 echo "Transform"
 python processTests.py -f bench.txt -e Transform
 call:runBench
 EXIT /B
 :runBench
 REM pushd build_m7
 REM pushd build_m0
 pushd build_a5
 make
 REM "C:\Program Files\ARM\Development Studio 2019.0\sw\models\bin\FVP_MPS2_Cortex-M7.exe" -a Testing > result.txt
 REM "C:\Program Files\ARM\Development Studio 2019.0\sw\models\bin\FVP_MPS2_Cortex-M0.exe" -a Testing > result.txt
 "C:\Program Files\ARM\Development Studio 2019.0\sw\models\bin\FVP_VE_Cortex-A5x1.exe" -a Testing  > result.txt
 popd
 echo "Parse result"
 REM python processResult.py -f bench.txt -e -r build_m7\result.txt
 REM python processResult.py -f bench.txt -e -r build_m0\result.txt
 python processResult.py -f bench.txt -e -r build_a5\result.txt
 goto:eof