CMSIS-DSP: Mean square error for q15, q31, f16, f32, f64.

Reworked q7 to have a bit more accuracy.
4 years ago · 8dcdb350a4
parent 47a987217f
commit 8dcdb350a4
62 changed files with 8788 additions and 7398 deletions
--- a/Include/dsp/statistics_functions.h
+++ b/Include/dsp/statistics_functions.h
@ -910,6 +910,66 @@ void arm_mse_q7(
        uint32_t blockSize,
        q7_t * pResult);

+/**
+  @brief         Mean square error between two Q15 vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    mean square error
+  @return        none 
+*/
+  
+void arm_mse_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        uint32_t blockSize,
+        q15_t * pResult);
+
+/**
+  @brief         Mean square error between two Q31 vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    mean square error
+  @return        none 
+*/
+  
+void arm_mse_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        uint32_t blockSize,
+        q31_t * pResult);
+
+/**
+  @brief         Mean square error between two single precision float vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    mean square error
+  @return        none 
+*/
+  
+void arm_mse_f32(
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        uint32_t blockSize,
+        float32_t * pResult);
+
+/**
+  @brief         Mean square error between two double precision float vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    mean square error
+  @return        none 
+*/
+  
+void arm_mse_f64(
+  const float64_t * pSrcA,
+  const float64_t * pSrcB,
+        uint32_t blockSize,
+        float64_t * pResult);
+
 #ifdef   __cplusplus
 }
 #endif
--- a/Include/dsp/statistics_functions_f16.h
+++ b/Include/dsp/statistics_functions_f16.h
@ -243,6 +243,21 @@ float16_t arm_kullback_leibler_f16(const float16_t * pSrcA
      uint32_t   blockSize,
      float16_t *pResult);

+/**
+  @brief         Mean square error between two half precision float vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    mean square error
+  @return        none 
+*/
+  
+void arm_mse_f16(
+  const float16_t * pSrcA,
+  const float16_t * pSrcB,
+        uint32_t blockSize,
+        float16_t * pResult);
+
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
 #ifdef   __cplusplus
 }
--- a/Source/StatisticsFunctions/CMakeLists.txt
+++ b/Source/StatisticsFunctions/CMakeLists.txt
@ -81,6 +81,11 @@ target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q15.c)
 target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q31.c)
 target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q7.c)
 target_sources(CMSISDSPStatistics PRIVATE arm_mse_q7.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_mse_q15.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_mse_q31.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_mse_f16.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_mse_f32.c)
+target_sources(CMSISDSPStatistics PRIVATE arm_mse_f64.c)

 configLib(CMSISDSPStatistics ${ROOT})
 configDsp(CMSISDSPStatistics ${ROOT})
--- a/Source/StatisticsFunctions/StatisticsFunctions.c
+++ b/Source/StatisticsFunctions/StatisticsFunctions.c
@ -94,3 +94,7 @@
 #include "arm_absmin_no_idx_q31.c"
 #include "arm_absmin_no_idx_q7.c"
 #include "arm_mse_q7.c"
+#include "arm_mse_q15.c"
+#include "arm_mse_q31.c"
+#include "arm_mse_f32.c"
+#include "arm_mse_f64.c"
--- a/Source/StatisticsFunctions/StatisticsFunctionsF16.c
+++ b/Source/StatisticsFunctions/StatisticsFunctionsF16.c
@ -43,3 +43,4 @@
 #include "arm_absmin_f16.c"
 #include "arm_absmax_no_idx_f16.c"
 #include "arm_absmin_no_idx_f16.c"
+#include "arm_mse_f16.c"
--- a/Source/StatisticsFunctions/arm_mse_f16.c
+++ b/Source/StatisticsFunctions/arm_mse_f16.c
@ -0,0 +1,203 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mse_f16.c
+ * Description:  Half floating point mean square error
+ *
+ * $Date:        05 April 2022
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions_f16.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup MSE
+  @{
+ */
+
+/**
+  @brief         Mean square error between two half floating point vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize   number of samples in input vector
+  @param[out]    result      mean square error
+  @return        none
+ */
+
+#if !defined(ARM_MATH_AUTOVECTORIZE)
+
+#if defined(ARM_MATH_MVE_FLOAT16)
+#include "arm_helium_utils.h"
+
+void arm_mse_f16(
+    const float16_t * pSrcA,
+    const float16_t * pSrcB,
+    uint32_t    blockSize,
+    float16_t * result)
+
+{
+    float16x8_t vecA, vecB;
+    float16x8_t vecSum;
+    uint32_t blkCnt; 
+    _Float16 sum = 0.0f16;  
+    vecSum = vdupq_n_f16(0.0f16);
+
+    blkCnt = (blockSize) >> 3;
+    while (blkCnt > 0U)
+    {
+        vecA = vld1q(pSrcA);
+        pSrcA += 8;
+        
+        vecB = vld1q(pSrcB);
+        pSrcB += 8;
+
+        vecA = vsubq(vecA, vecB);
+
+        vecSum = vfmaq(vecSum, vecA, vecA);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt --;
+    }
+
+
+    blkCnt = (blockSize) & 7;
+    if (blkCnt > 0U)
+    {
+        mve_pred16_t p0 = vctp16q(blkCnt);
+        vecA = vld1q(pSrcA);
+        vecB = vld1q(pSrcB);
+
+        vecA = vsubq(vecA, vecB);
+        vecSum = vfmaq_m(vecSum, vecA, vecA, p0);
+    }
+
+    sum = vecAddAcrossF16Mve(vecSum);
+
+    /* Store result in destination buffer */
+    *result = (_Float16)sum / (_Float16)blockSize;
+
+}
+
+#endif
+
+
+#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/
+
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#if (!defined(ARM_MATH_MVE_FLOAT16)) || defined(ARM_MATH_AUTOVECTORIZE)
+
+
+
+void arm_mse_f16(
+    const float16_t * pSrcA,
+    const float16_t * pSrcB,
+    uint32_t    blockSize,
+    float16_t * result)
+
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  _Float16 inA, inB;
+  _Float16 sum = 0.0f16;                          /* Temporary return variable */
+#if defined (ARM_MATH_LOOPUNROLL)
+  blkCnt = (blockSize) >> 3;
+
+ 
+  while (blkCnt > 0U)
+  {
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = (blockSize) & 7;
+#else
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+#endif
+  while (blkCnt > 0U)
+  {
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = (_Float16)inA - (_Float16)inB;
+    sum += (_Float16)inA * (_Float16)inA;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in destination buffer */
+  *result = (_Float16)sum / (_Float16)blockSize;
+}
+
+#endif /* end of test for vector instruction availability */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
+/**
+  @} end of MSE group
+ */
--- a/Source/StatisticsFunctions/arm_mse_f32.c
+++ b/Source/StatisticsFunctions/arm_mse_f32.c
@ -0,0 +1,246 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mse_f32.c
+ * Description:  Floating point mean square error
+ *
+ * $Date:        05 April 2022
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup MSE
+  @{
+ */
+
+/**
+  @brief         Mean square error between two floating point vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize   number of samples in input vector
+  @param[out]    result      mean square error
+  @return        none
+ */
+
+#if !defined(ARM_MATH_AUTOVECTORIZE)
+
+#if defined(ARM_MATH_MVEF)
+#include "arm_helium_utils.h"
+
+void arm_mse_f32(
+    const float32_t * pSrcA,
+    const float32_t * pSrcB,
+    uint32_t    blockSize,
+    float32_t * result)
+
+{
+    float32x4_t vecA, vecB;
+    float32x4_t vecSum;
+    uint32_t blkCnt; 
+    float32_t sum = 0.0f;  
+    vecSum = vdupq_n_f32(0.0f);
+
+    /* Compute 4 outputs at a time */
+    blkCnt = (blockSize) >> 2;
+    while (blkCnt > 0U)
+    {
+        vecA = vld1q(pSrcA);
+        pSrcA += 4;
+        
+        vecB = vld1q(pSrcB);
+        pSrcB += 4;
+
+        vecA = vsubq(vecA, vecB);
+
+        vecSum = vfmaq(vecSum, vecA, vecA);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt --;
+    }
+
+
+    blkCnt = (blockSize) & 3;
+    if (blkCnt > 0U)
+    {
+        mve_pred16_t p0 = vctp32q(blkCnt);
+        vecA = vld1q(pSrcA);
+        vecB = vld1q(pSrcB);
+
+        vecA = vsubq(vecA, vecB);
+        vecSum = vfmaq_m(vecSum, vecA, vecA, p0);
+    }
+
+    sum = vecAddAcrossF32Mve(vecSum);
+
+    /* Store result in destination buffer */
+    *result = sum / blockSize;
+
+}
+
+#endif
+
+#if defined(ARM_MATH_NEON) 
+void arm_mse_f32(
+    const float32_t * pSrcA,
+    const float32_t * pSrcB,
+    uint32_t    blockSize,
+    float32_t * result)
+
+{
+    float32x4_t vecA, vecB;
+    float32x4_t vecSum;
+    uint32_t blkCnt; 
+    float32_t sum = 0.0f;  
+    vecSum = vdupq_n_f32(0.0f);
+#if !defined(__aarch64__)
+    f32x2_t tmp = vdup_n_f32(0.0f); 
+#endif 
+
+    /* Compute 4 outputs at a time */
+    blkCnt = (blockSize) >> 2;
+    while (blkCnt > 0U)
+    {
+        vecA = vld1q_f32(pSrcA);
+        pSrcA += 4;
+        
+        vecB = vld1q_f32(pSrcB);
+        pSrcB += 4;
+
+        vecA = vsubq_f32(vecA, vecB);
+
+        vecSum = vfmaq_f32(vecSum, vecA, vecA);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt --;
+    }
+
+#if defined(__aarch64__)
+    sum = vpadds_f32(vpadd_f32(vget_low_f32(vecSum), vget_high_f32(vecSum)));
+#else
+    tmp = vpadd_f32(vget_low_f32(vecSum), vget_high_f32(vecSum));
+    sum = vget_lane_f32(tmp, 0) + vget_lane_f32(tmp, 1);
+
+#endif 
+
+    blkCnt = (blockSize) & 3;
+    while (blkCnt > 0U)
+    {
+        /* Calculate dot product and store result in a temporary buffer. */
+        inA = *pSrcA++; 
+        inB = *pSrcB++;
+        inA = inA - inB;
+        sum += inA * inA;
+    
+        /* Decrement loop counter */
+        blkCnt--;
+    }
+    
+    /* Store result in destination buffer */
+    *result = sum / blockSize;
+
+}
+#endif
+
+#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/
+
+
+
+#if (!defined(ARM_MATH_MVEF) && !defined(ARM_MATH_NEON)) || defined(ARM_MATH_AUTOVECTORIZE)
+
+
+void arm_mse_f32(
+    const float32_t * pSrcA,
+    const float32_t * pSrcB,
+    uint32_t    blockSize,
+    float32_t * result)
+
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  float32_t inA, inB;
+  float32_t sum = 0.0f;                          /* Temporary return variable */
+#if defined (ARM_MATH_LOOPUNROLL)
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = (blockSize) >> 2;
+
+  /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while (blkCnt > 0U)
+  {
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = inA - inB;
+    sum += inA * inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = inA - inB;
+    sum += inA * inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = inA - inB;
+    sum += inA * inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = inA - inB;
+    sum += inA * inA;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = (blockSize) & 3;
+#else
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+#endif
+  while (blkCnt > 0U)
+  {
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = inA - inB;
+    sum += inA * inA;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in destination buffer */
+  *result = sum / blockSize;
+}
+
+#endif /* end of test for vector instruction availability */
+
+/**
+  @} end of MSE group
+ */
--- a/Source/StatisticsFunctions/arm_mse_f64.c
+++ b/Source/StatisticsFunctions/arm_mse_f64.c
@ -0,0 +1,110 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mse_f64.c
+ * Description:  Double floating point mean square error
+ *
+ * $Date:        05 April 2022
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @addtogroup MSE
+  @{
+ */
+
+/**
+  @brief         Mean square error between two double floating point vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize   number of samples in input vector
+  @param[out]    result      mean square error
+  @return        none
+ */
+
+
+
+
+
+void arm_mse_f64(
+    const float64_t * pSrcA,
+    const float64_t * pSrcB,
+    uint32_t    blockSize,
+    float64_t * result)
+
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  float64_t inA, inB;
+  float64_t sum = 0.0;                          /* Temporary return variable */
+#if defined (ARM_MATH_LOOPUNROLL)
+  blkCnt = (blockSize) >> 1;
+
+ 
+  while (blkCnt > 0U)
+  {
+
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = inA - inB;
+    sum += inA * inA;
+
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = inA - inB;
+    sum += inA * inA;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = (blockSize) & 1;
+#else
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+#endif
+  while (blkCnt > 0U)
+  {
+    inA = *pSrcA++; 
+    inB = *pSrcB++;
+    inA = inA - inB;
+    sum += inA * inA;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in destination buffer */
+  *result = sum / blockSize;
+}
+
+
+/**
+  @} end of MSE group
+ */
--- a/Source/StatisticsFunctions/arm_mse_q15.c
+++ b/Source/StatisticsFunctions/arm_mse_q15.c
@ -0,0 +1,175 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mse_q15.c
+ * Description:  Mean square error between two Q15 vectors
+ *
+ * $Date:        04 April 2022
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup MSE
+  @{
+ */
+
+/**
+  @brief         Mean square error between two Q15 vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize   number of samples in input vector
+  @param[out]    pResult     mean square error
+  @return        none
+ */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+void arm_mse_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        uint32_t blockSize,
+        q15_t * pResult)
+{
+    uint32_t  blkCnt;           /* loop counters */
+    q15x8_t vecSrcA,vecSrcB;
+    q63_t   sum = 0LL;
+
+    blkCnt = blockSize >> 3U;
+    while (blkCnt > 0U)
+    {
+        vecSrcA = vld1q(pSrcA);
+        vecSrcB = vld1q(pSrcB);
+
+        vecSrcA = vshrq(vecSrcA,1);
+        vecSrcB = vshrq(vecSrcB,1);
+
+        vecSrcA = vqsubq(vecSrcA,vecSrcB);
+        /*
+         * sum lanes
+         */
+        sum = vmlaldavaq(sum, vecSrcA, vecSrcA);
+
+        blkCnt--;
+        pSrcA += 8;
+        pSrcB += 8;
+    }
+
+    /*
+     * tail
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U)
+    {
+        mve_pred16_t p0 = vctp16q(blkCnt);
+        vecSrcA = vld1q(pSrcA);
+        vecSrcB = vld1q(pSrcB);
+
+        vecSrcA = vshrq(vecSrcA,1);
+        vecSrcB = vshrq(vecSrcB,1);
+
+        vecSrcA = vqsubq(vecSrcA,vecSrcB);
+
+        sum = vmlaldavaq_p(sum, vecSrcA, vecSrcA, p0);
+    }
+
+    
+
+    *pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
+}
+#else
+void arm_mse_q15(
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        uint32_t blockSize,
+        q15_t * pResult)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        q63_t sum = 0;                                 /* Temporary result storage */
+        q15_t inA,inB;                                       /* Temporary variable to store input value */
+
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
+    inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
+    sum += (q63_t)((q31_t) inA * inA);
+
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
+    inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
+    sum += (q63_t)((q31_t) inA * inA);
+
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
+    inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
+    sum += (q63_t)((q31_t) inA * inA);
+
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
+    inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
+    sum += (q63_t)((q31_t) inA * inA);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
+    inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
+    sum += (q63_t)((q31_t) inA * inA);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in q15 format */
+  *pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
+}
+#endif /* defined(ARM_MATH_MVEI) */
+
+/**
+  @} end of MSE group
+ */
--- a/Source/StatisticsFunctions/arm_mse_q31.c
+++ b/Source/StatisticsFunctions/arm_mse_q31.c
@ -0,0 +1,176 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mse_q31.c
+ * Description:  Mean square error between two Q31 vectors
+ *
+ * $Date:        04 April 2022
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup MSE
+  @{
+ */
+
+/**
+  @brief         Mean square error between two Q31 vectors.
+  @param[in]     pSrcA       points to the first input vector
+  @param[in]     pSrcB       points to the second input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    mean square error
+  @return        none
+ */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+void arm_mse_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        uint32_t blockSize,
+        q31_t * pResult)
+{
+    uint32_t  blkCnt;           /* loop counters */
+    q31x4_t vecSrcA,vecSrcB;
+    q63_t   sum = 0LL;
+
+   /* Compute 4 outputs at a time */
+    blkCnt = blockSize >> 2U;
+    while (blkCnt > 0U)
+    {
+        vecSrcA = vld1q(pSrcA);
+        vecSrcB = vld1q(pSrcB);
+
+        vecSrcA = vshrq(vecSrcA,1);
+        vecSrcB = vshrq(vecSrcB,1);
+
+
+        vecSrcA = vqsubq(vecSrcA,vecSrcB);
+        /*
+         * sum lanes
+         */
+        sum = vrmlaldavhaq(sum, vecSrcA, vecSrcA);
+
+        blkCnt--;
+        pSrcA += 4;
+        pSrcB += 4;
+    }
+
+    /*
+     * tail
+     */
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0U)
+    {
+        mve_pred16_t p0 = vctp32q(blkCnt);
+        vecSrcA = vld1q(pSrcA);
+        vecSrcB = vld1q(pSrcB);
+
+        vecSrcA = vshrq(vecSrcA,1);
+        vecSrcB = vshrq(vecSrcB,1);
+
+        vecSrcA = vqsubq(vecSrcA,vecSrcB);
+
+        sum = vrmlaldavhaq_p(sum, vecSrcA, vecSrcA, p0);
+    }
+
+    
+    *pResult = (q31_t) ((sum / blockSize)>>21);
+
+}
+#else
+void arm_mse_q31(
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        uint32_t blockSize,
+        q31_t * pResult)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        q63_t sum = 0;                                 /* Temporary result storage */
+
+        q31_t inA32,inB32;                                    /* Temporary variable to store packed input value */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+
+  while (blkCnt > 0U)
+  {
+    inA32 = *pSrcA++ >> 1;
+    inB32 = *pSrcB++ >> 1;
+    inA32 = __QSUB(inA32, inB32);
+    sum += ((q63_t) inA32 * inA32) >> 14U;
+
+    inA32 = *pSrcA++ >> 1;
+    inB32 = *pSrcB++ >> 1;
+    inA32 = __QSUB(inA32, inB32);
+    sum += ((q63_t) inA32 * inA32) >> 14U;
+
+    inA32 = *pSrcA++ >> 1;
+    inB32 = *pSrcB++ >> 1;
+    inA32 = __QSUB(inA32, inB32);
+    sum += ((q63_t) inA32 * inA32) >> 14U;
+
+    inA32 = *pSrcA++ >> 1;
+    inB32 = *pSrcB++ >> 1;
+    inA32 = __QSUB(inA32, inB32);
+    sum += ((q63_t) inA32 * inA32) >> 14U;
+
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+
+#else
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+  while (blkCnt > 0U)
+  {
+    inA32 = *pSrcA++ >> 1;
+    inB32 = *pSrcB++ >> 1;
+    inA32 = __QSUB(inA32, inB32);
+    sum += ((q63_t) inA32 * inA32) >> 14U;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in q31 format */
+  *pResult = (q31_t) ((sum / blockSize)>>15);
+}
+#endif /* defined(ARM_MATH_MVEI) */
+
+/**
+  @} end of MSE group
+ */
--- a/Source/StatisticsFunctions/arm_mse_q7.c
+++ b/Source/StatisticsFunctions/arm_mse_q7.c
@ -33,14 +33,14 @@
 */

 /**
-  @defgroup mse Mean Square Error
+  @defgroup MSE Mean Square Error

  Calculates the mean square error between two vectors.

 */

 /**
-  @addtogroup mse
+  @addtogroup MSE
  @{
 */

@ -48,11 +48,10 @@
  @brief         Mean square error between two Q7 vectors.
  @param[in]     pSrcA       points to the first input vector
  @param[in]     pSrcB       points to the second input vector
-  @param[in]     blockSize  number of samples in input vector
-  @param[out]    pResult    mean square error
+  @param[in]     blockSize   number of samples in input vector
+  @param[out]    pResult     mean square error
  @return        none
-
-*/
+ */
 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
 void arm_mse_q7(
  const q7_t * pSrcA,
@ -63,14 +62,16 @@ void arm_mse_q7(
    uint32_t  blkCnt;           /* loop counters */
    q7x16_t vecSrcA,vecSrcB;
    q31_t   sum = 0LL;
-    q7_t inA,inB;

   /* Compute 16 outputs at a time */
    blkCnt = blockSize >> 4U;
    while (blkCnt > 0U)
    {
-        vecSrcA = vldrbq_s8(pSrcA);
-        vecSrcB = vldrbq_s8(pSrcB);
+        vecSrcA = vld1q(pSrcA);
+        vecSrcB = vld1q(pSrcB);
+
+        vecSrcA = vshrq(vecSrcA,1);
+        vecSrcB = vshrq(vecSrcB,1);

        vecSrcA = vqsubq(vecSrcA,vecSrcB);
        /*
@ -87,23 +88,21 @@ void arm_mse_q7(
     * tail
     */
    blkCnt = blockSize & 0xF;
-    while (blkCnt > 0U)
+    if (blkCnt > 0U)
    {
-       /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
-       /* Compute Power and store result in a temporary variable, sum. */
-       inA = *pSrcA++;
-       inB = *pSrcB++;
+        mve_pred16_t p0 = vctp8q(blkCnt);
+        vecSrcA = vld1q(pSrcA);
+        vecSrcB = vld1q(pSrcB);

-       inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
+        vecSrcA = vshrq(vecSrcA,1);
+        vecSrcB = vshrq(vecSrcB,1);

-       sum += ((q15_t) inA * inA);
+        vecSrcA = vqsubq(vecSrcA,vecSrcB);

-       /* Decrement loop counter */
-       blkCnt--;
+        sum = vmladavaq_p(sum, vecSrcA, vecSrcA, p0);
    }

-    *pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>7, 8);
+    *pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>5, 8);
 }
 #else
 void arm_mse_q7(
@ -116,10 +115,6 @@ void arm_mse_q7(
        q31_t sum = 0;                                 /* Temporary result storage */
        q7_t inA,inB;                                       /* Temporary variable to store input value */

-#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
-        q31_t inA32,inB32;                                    /* Temporary variable to store packed input value */
-        q31_t in1, in2;                                /* Temporary variables to store input value */
-#endif

 #if defined (ARM_MATH_LOOPUNROLL)

@ -128,42 +123,25 @@ void arm_mse_q7(

  while (blkCnt > 0U)
  {
-    /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
-    /* Compute Power and store result in a temporary variable, sum. */
-#if defined (ARM_MATH_DSP)
-    inA32 = read_q7x4_ia ((q7_t **) &pSrcA);
-    inB32 = read_q7x4_ia ((q7_t **) &pSrcB);
-
-    inA32 = __QSUB8(inA32, inB32);
-
-    in1 = __SXTB16(__ROR(inA32, 8));
-    in2 = __SXTB16(inA32);
-
-    /* calculate power and accumulate to accumulator */
-    sum = __SMLAD(in1, in1, sum);
-    sum = __SMLAD(in2, in2, sum);
-#else
-    inA = *pSrcA++;
-    inB = *pSrcB++;
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
    inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
    sum += ((q15_t) inA * inA);

-    inA = *pSrcA++;
-    inB = *pSrcB++;
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
    inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
    sum += ((q15_t) inA * inA);

-    inA = *pSrcA++;
-    inB = *pSrcB++;
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
    inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
    sum += ((q15_t) inA * inA);

-    inA = *pSrcA++;
-    inB = *pSrcB++;
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;
    inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
    sum += ((q15_t) inA * inA);
-#endif /* #if defined (ARM_MATH_DSP) */

    /* Decrement loop counter */
    blkCnt--;
@ -181,11 +159,8 @@ void arm_mse_q7(

  while (blkCnt > 0U)
  {
-    /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
-    /* Compute Power and store result in a temporary variable, sum. */
-    inA = *pSrcA++;
-    inB = *pSrcB++;
+    inA = *pSrcA++ >> 1;
+    inB = *pSrcB++ >> 1;

    inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
    sum += ((q15_t) inA * inA);
@ -195,10 +170,10 @@ void arm_mse_q7(
  }

  /* Store result in q7 format */
-  *pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>7, 8);;
+  *pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>5, 8);;
 }
 #endif /* defined(ARM_MATH_MVEI) */

 /**
-  @} end of power group
+  @} end of MSE group
 */
--- a/Testing/PatternGeneration/Stats.py
+++ b/Testing/PatternGeneration/Stats.py
@ -477,7 +477,8 @@ def writeTests(config,nb,format):
 # So new tests have to be added after existing ones
 def writeNewsTests(config,nb,format):
    NBSAMPLES = 300
-    #config.setOverwrite(True)
+    if format==Tools.F16:
+       config.setOverwrite(True)
    data1=np.random.randn(NBSAMPLES)
    data1 = Tools.normalize(data1)

@ -491,7 +492,7 @@ def writeNewsTests(config,nb,format):

    config.writeInput(2, data2,"InputNew")
    nb=generateOperatorTests(config,nb,format,data1,data2,mseTest,"MSEVals")
-    #config.setOverwrite(False)
+    config.setOverwrite(False)


 def generateBenchmark(config,format):
--- a/Testing/Patterns/DSP/Stats/StatsF16/AbsMaxIndexes26_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/AbsMaxIndexes26_s16.txt
@ -1,8 +1,8 @@
 H
 3
-// 4
-0x0004
-// 4
-0x0004
-// 4
-0x0004
+// 6
+0x0006
+// 6
+0x0006
+// 18
+0x0012
--- a/Testing/Patterns/DSP/Stats/StatsF16/AbsMaxVals26_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/AbsMaxVals26_f16.txt
@ -1,8 +1,8 @@
 H
 3
-// 0.423138
-0x36c5
-// 0.423138
-0x36c5
-// 0.423138
-0x36c5
+// 0.640755
+0x3920
+// 0.640755
+0x3920
+// 0.887109
+0x3b19
--- a/Testing/Patterns/DSP/Stats/StatsF16/AbsMinIndexes27_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/AbsMinIndexes27_s16.txt
@ -2,7 +2,7 @@ H
 3
 // 0
 0x0000
-// 15
-0x000F
-// 15
-0x000F
+// 7
+0x0007
+// 19
+0x0013
--- a/Testing/Patterns/DSP/Stats/StatsF16/AbsMinVals27_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/AbsMinVals27_f16.txt
@ -1,8 +1,8 @@
 H
 3
-// 0.027578
-0x270f
-// 0.007974
-0x2015
-// 0.007974
-0x2015
+// 0.107198
+0x2edc
+// 0.021092
+0x2566
+// 0.002011
+0x181e
--- a/Testing/Patterns/DSP/Stats/StatsF16/InputNew1_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/InputNew1_f16.txt
--- a/Testing/Patterns/DSP/Stats/StatsF16/InputNew2_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/InputNew2_f16.txt
--- a/Testing/Patterns/DSP/Stats/StatsF16/MSEVals28_f16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF16/MSEVals28_f16.txt
@ -1,10 +1,10 @@
 H
 4
-// 0.038705
-0x28f4
-// 0.092517
-0x2dec
-// 0.106867
-0x2ed7
-// 0.225679
-0x3339
+// 0.211855
+0x32c8
+// 0.182973
+0x31db
+// 0.268630
+0x344c
+// 0.234421
+0x3380
--- a/Testing/Patterns/DSP/Stats/StatsF32/AbsMaxIndexes26_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF32/AbsMaxIndexes26_s16.txt
@ -1,8 +1,8 @@
 H
 3
-// 1
-0x0001
-// 1
-0x0001
-// 1
-0x0001
+// 0
+0x0000
+// 7
+0x0007
+// 7
+0x0007
--- a/Testing/Patterns/DSP/Stats/StatsF32/AbsMaxVals26_f32.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF32/AbsMaxVals26_f32.txt
@ -1,8 +1,8 @@
 W
 3
-// 0.476185
-0x3ef3ce78
-// 0.476185
-0x3ef3ce78
-// 0.476185
-0x3ef3ce78
+// 0.725166
+0x3f39a47a
+// 0.817687
+0x3f5153ed
+// 0.817687
+0x3f5153ed
--- a/Testing/Patterns/DSP/Stats/StatsF32/AbsMinIndexes27_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF32/AbsMinIndexes27_s16.txt
@ -1,8 +1,8 @@
 H
 3
-// 0
-0x0000
-// 7
-0x0007
-// 7
-0x0007
+// 1
+0x0001
+// 5
+0x0005
+// 9
+0x0009
--- a/Testing/Patterns/DSP/Stats/StatsF32/AbsMinVals27_f32.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF32/AbsMinVals27_f32.txt
@ -1,8 +1,8 @@
 W
 3
-// 0.184919
-0x3e3d5b69
-// 0.008792
-0x3c100d1c
-// 0.008792
-0x3c100d1c
+// 0.198876
+0x3e4ba63c
+// 0.035481
+0x3d1154a3
+// 0.034200
+0x3d0c1510
--- a/Testing/Patterns/DSP/Stats/StatsF32/InputNew1_f32.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF32/InputNew1_f32.txt
--- a/Testing/Patterns/DSP/Stats/StatsF32/InputNew2_f32.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF32/InputNew2_f32.txt
--- a/Testing/Patterns/DSP/Stats/StatsF32/MSEVals28_f32.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF32/MSEVals28_f32.txt
@ -1,10 +1,10 @@
 W
 4
-// 0.125231
-0x3e003c73
-// 0.122919
-0x3dfbbceb
-// 0.145740
-0x3e153cd2
-// 0.189820
-0x3e426031
+// 0.072747
+0x3d94fc3e
+// 0.176808
+0x3e350d0d
+// 0.207669
+0x3e54a726
+// 0.183645
+0x3e3c0d87
--- a/Testing/Patterns/DSP/Stats/StatsF64/AbsMaxIndexes26_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF64/AbsMaxIndexes26_s16.txt
@ -2,7 +2,7 @@ H
 3
 // 1
 0x0001
-// 2
-0x0002
-// 2
-0x0002
+// 3
+0x0003
+// 3
+0x0003
--- a/Testing/Patterns/DSP/Stats/StatsF64/AbsMaxVals26_f64.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF64/AbsMaxVals26_f64.txt
@ -1,8 +1,8 @@
 D
 3
-// 0.203055
-0x3fc9fdb6e0c81ee0
-// 0.360222
-0x3fd70de0df777efb
-// 0.360222
-0x3fd70de0df777efb
+// 0.579795
+0x3fe28dad67519d3d
+// 0.783610
+0x3fe91356237f16f6
+// 0.783610
+0x3fe91356237f16f6
--- a/Testing/Patterns/DSP/Stats/StatsF64/AbsMinIndexes27_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF64/AbsMinIndexes27_s16.txt
@ -4,5 +4,5 @@ H
 0x0000
 // 0
 0x0000
-// 0
-0x0000
+// 4
+0x0004
--- a/Testing/Patterns/DSP/Stats/StatsF64/AbsMinVals27_f64.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF64/AbsMinVals27_f64.txt
@ -1,8 +1,8 @@
 D
 3
-// 0.003692
-0x3f6e3f80ef9e8a83
-// 0.003692
-0x3f6e3f80ef9e8a83
-// 0.003692
-0x3f6e3f80ef9e8a83
+// 0.310923
+0x3fd3e6286ed8195c
+// 0.310923
+0x3fd3e6286ed8195c
+// 0.150640
+0x3fc34828d25e0053
--- a/Testing/Patterns/DSP/Stats/StatsF64/InputNew1_f64.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF64/InputNew1_f64.txt
--- a/Testing/Patterns/DSP/Stats/StatsF64/InputNew2_f64.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF64/InputNew2_f64.txt
--- a/Testing/Patterns/DSP/Stats/StatsF64/MSEVals28_f64.txt
+++ b/Testing/Patterns/DSP/Stats/StatsF64/MSEVals28_f64.txt
@ -1,10 +1,10 @@
 D
 4
-// 0.001072
-0x3f518f8a7ed015a2
-// 0.073015
-0x3fb2b11b5caa023a
-// 0.060567
-0x3faf02a5beb935ad
-// 0.198414
-0x3fc9659ffa60ff3b
+// 0.221944
+0x3fcc68ab519cbb08
+// 0.487606
+0x3fdf34ef9e2840ea
+// 0.411797
+0x3fda5ae1181a5066
+// 0.186577
+0x3fc7e1bdbcffc958
--- a/Testing/Patterns/DSP/Stats/StatsQ15/AbsMaxIndexes8_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ15/AbsMaxIndexes8_s16.txt
@ -1,8 +1,8 @@
 H
 3
-// 4
-0x0004
-// 4
-0x0004
-// 18
-0x0012
+// 5
+0x0005
+// 15
+0x000F
+// 15
+0x000F
--- a/Testing/Patterns/DSP/Stats/StatsQ15/AbsMaxVals8_q15.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ15/AbsMaxVals8_q15.txt
@ -1,8 +1,8 @@
 H
 3
-// 0.540886
-0x453C
-// 0.540886
-0x453C
-// 0.701466
-0x59CA
+// 0.511444
+0x4177
+// 0.572485
+0x4947
+// 0.572485
+0x4947
--- a/Testing/Patterns/DSP/Stats/StatsQ15/AbsMinIndexes9_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ15/AbsMinIndexes9_s16.txt
@ -1,8 +1,8 @@
 H
 3
-// 6
-0x0006
-// 6
-0x0006
-// 6
-0x0006
+// 1
+0x0001
+// 1
+0x0001
+// 1
+0x0001
--- a/Testing/Patterns/DSP/Stats/StatsQ15/AbsMinVals9_q15.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ15/AbsMinVals9_q15.txt
@ -1,8 +1,8 @@
 H
 3
-// 0.003012
-0x0063
-// 0.003012
-0x0063
-// 0.003012
-0x0063
+// 0.065882
+0x086F
+// 0.065882
+0x086F
+// 0.065882
+0x086F
--- a/Testing/Patterns/DSP/Stats/StatsQ15/InputNew1_q15.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ15/InputNew1_q15.txt
--- a/Testing/Patterns/DSP/Stats/StatsQ15/InputNew2_q15.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ15/InputNew2_q15.txt
--- a/Testing/Patterns/DSP/Stats/StatsQ15/MSEVals10_q15.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ15/MSEVals10_q15.txt
@ -1,10 +1,10 @@
 H
 4
-// 0.291384
-0x254C
-// 0.326840
-0x29D6
-// 0.266990
-0x222D
-// 0.278624
-0x23AA
+// 0.123046
+0x0FC0
+// 0.134261
+0x112F
+// 0.135165
+0x114D
+// 0.237464
+0x1E65
--- a/Testing/Patterns/DSP/Stats/StatsQ31/AbsMaxIndexes8_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ31/AbsMaxIndexes8_s16.txt
@ -1,8 +1,8 @@
 H
 3
-// 1
-0x0001
-// 3
-0x0003
-// 8
-0x0008
+// 2
+0x0002
+// 7
+0x0007
+// 7
+0x0007
--- a/Testing/Patterns/DSP/Stats/StatsQ31/AbsMaxVals8_q31.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ31/AbsMaxVals8_q31.txt
@ -1,8 +1,8 @@
 W
 3
-// 0.352374
-0x2D1A96B5
-// 0.530170
-0x43DC9BE7
-// 0.634745
-0x513F5458
+// 0.254671
+0x20990B68
+// 0.516980
+0x422C699D
+// 0.516980
+0x422C699D
--- a/Testing/Patterns/DSP/Stats/StatsQ31/AbsMinIndexes9_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ31/AbsMinIndexes9_s16.txt
@ -2,7 +2,7 @@ H
 3
 // 0
 0x0000
-// 7
-0x0007
-// 7
-0x0007
+// 4
+0x0004
+// 4
+0x0004
--- a/Testing/Patterns/DSP/Stats/StatsQ31/AbsMinVals9_q31.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ31/AbsMinVals9_q31.txt
@ -1,8 +1,8 @@
 W
 3
-// 0.132805
-0x10FFBE95
-// 0.003898
-0x007FB95F
-// 0.003898
-0x007FB95F
+// 0.053227
+0x06D0231F
+// 0.003305
+0x006C4DD3
+// 0.003305
+0x006C4DD3
--- a/Testing/Patterns/DSP/Stats/StatsQ31/InputNew1_q31.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ31/InputNew1_q31.txt
--- a/Testing/Patterns/DSP/Stats/StatsQ31/InputNew2_q31.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ31/InputNew2_q31.txt
--- a/Testing/Patterns/DSP/Stats/StatsQ31/MSEVals10_q31.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ31/MSEVals10_q31.txt
@ -1,10 +1,10 @@
 W
 4
-// 0.066580
-0x0885AD96
-// 0.089078
-0x0B66E9B3
-// 0.168307
-0x158B15E2
-// 0.196400
-0x19239FC7
+// 0.153783
+0x13AF2B40
+// 0.209919
+0x1ADE9F11
+// 0.155268
+0x13DFD01C
+// 0.248101
+0x1FC1C512
--- a/Testing/Patterns/DSP/Stats/StatsQ7/AbsMaxIndexes8_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ7/AbsMaxIndexes8_s16.txt
@ -1,10 +1,10 @@
 H
 4
-// 7
-0x0007
-// 7
-0x0007
-// 46
-0x002E
+// 1
+0x0001
+// 25
+0x0019
+// 25
+0x0019
 // 279
 0x0117
--- a/Testing/Patterns/DSP/Stats/StatsQ7/AbsMaxVals8_q7.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ7/AbsMaxVals8_q7.txt
@ -1,10 +1,10 @@
 B
 4
-// 0.807620
+// 0.619484
+0x4F
+// 0.802797
 0x67
-// 0.807620
+// 0.802797
 0x67
-// 0.984827
-0x7E
 // 0.900000
 0x73
--- a/Testing/Patterns/DSP/Stats/StatsQ7/AbsMinIndexes9_s16.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ7/AbsMinIndexes9_s16.txt
@ -1,10 +1,10 @@
 H
 4
-// 13
-0x000D
-// 13
-0x000D
-// 13
-0x000D
+// 8
+0x0008
+// 18
+0x0012
+// 18
+0x0012
 // 279
 0x0117
--- a/Testing/Patterns/DSP/Stats/StatsQ7/AbsMinVals9_q7.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ7/AbsMinVals9_q7.txt
@ -1,10 +1,10 @@
 B
 4
-// 0.008109
-0x01
-// 0.008109
-0x01
-// 0.008109
+// 0.008779
 0x01
+// 0.000193
+0x00
+// 0.000193
+0x00
 // 0.000000
 0x00
--- a/Testing/Patterns/DSP/Stats/StatsQ7/InputNew1_q7.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ7/InputNew1_q7.txt
--- a/Testing/Patterns/DSP/Stats/StatsQ7/InputNew2_q7.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ7/InputNew2_q7.txt
--- a/Testing/Patterns/DSP/Stats/StatsQ7/MSEVals10_q7.txt
+++ b/Testing/Patterns/DSP/Stats/StatsQ7/MSEVals10_q7.txt
@ -1,10 +1,10 @@
 B
 4
-// 0.191272
-0x18
-// 0.159547
-0x14
-// 0.205092
-0x1A
-// 0.257902
-0x21
+// 0.092336
+0x0C
+// 0.121537
+0x10
+// 0.162974
+0x15
+// 0.148534
+0x13
--- a/Testing/Source/Tests/StatsTestsF16.cpp
+++ b/Testing/Source/Tests/StatsTestsF16.cpp
@ -441,7 +441,28 @@ a double precision computation.
      ASSERT_REL_ERROR(ref,output,REL_ERROR);
    } 

+    void StatsTestsF16::test_mse_f16()
+    {
+        const float16_t *inpA  = inputA.ptr();
+        const float16_t *inpB  = inputB.ptr();
+
+        float16_t result;
+
+        float16_t *refp  = ref.ptr();
+
+        float16_t *outp  = output.ptr();
+
+        arm_mse_f16(inpA,inpB,
+              inputA.nbSamples(),
+              &result);
+
+        outp[0] = result;
+
+        ASSERT_SNR(result,refp[this->refOffset],(float16_t)SNR_THRESHOLD);

+        ASSERT_REL_ERROR(result,refp[this->refOffset],(float16_t)REL_ERROR);
+
+    }
  
    void StatsTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
    {
@ -1032,6 +1053,58 @@ a double precision computation.
               refOffset = 2;
            }
            break;
+
+             case StatsTestsF16::TEST_MSE_F16_49:
+            {
+               inputA.reload(StatsTestsF16::INPUTNEW1_F16_ID,mgr,7);
+               inputB.reload(StatsTestsF16::INPUTNEW2_F16_ID,mgr,7);
+              
+               ref.reload(StatsTestsF16::MSE_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 0;
+            }
+            break;
+
+            case StatsTestsF16::TEST_MSE_F16_50:
+            {
+               inputA.reload(StatsTestsF16::INPUTNEW1_F16_ID,mgr,16);
+               inputB.reload(StatsTestsF16::INPUTNEW2_F16_ID,mgr,16);
+              
+               ref.reload(StatsTestsF16::MSE_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 1;
+            }
+            break;
+
+            case StatsTestsF16::TEST_MSE_F16_51:
+            {
+               inputA.reload(StatsTestsF16::INPUTNEW1_F16_ID,mgr,23);
+               inputB.reload(StatsTestsF16::INPUTNEW2_F16_ID,mgr,23);
+              
+               ref.reload(StatsTestsF16::MSE_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 2;
+            }
+            break;
+
+            case StatsTestsF16::TEST_MSE_F16_52:
+            {
+               inputA.reload(StatsTestsF16::INPUTNEW1_F16_ID,mgr,100);
+               inputB.reload(StatsTestsF16::INPUTNEW2_F16_ID,mgr,100);
+              
+               ref.reload(StatsTestsF16::MSE_F16_ID,mgr);
+               
+               output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
+
+               refOffset = 3;
+            }
+            break;
        }
        
    }
--- a/Testing/Source/Tests/StatsTestsF32.cpp
+++ b/Testing/Source/Tests/StatsTestsF32.cpp
@ -436,7 +436,30 @@ a double precision computation.
    } 

   
+    void StatsTestsF32::test_mse_f32()
+    {
+        const float32_t *inpA  = inputA.ptr();
+        const float32_t *inpB  = inputB.ptr();
+
+        float32_t result;
+
+        float32_t *refp  = ref.ptr();
+
+        float32_t *outp  = output.ptr();
+
+        arm_mse_f32(inpA,inpB,
+              inputA.nbSamples(),
+              &result);
+
+        outp[0] = result;
+
+        ASSERT_SNR(result,refp[this->refOffset],(float32_t)SNR_THRESHOLD);
+
+        ASSERT_REL_ERROR(result,refp[this->refOffset],(float32_t)REL_ERROR);
+
+    }
  
+
    void StatsTestsF32::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
    {
        (void)paramsArgs;
@ -1027,6 +1050,58 @@ a double precision computation.
            }
            break;

+            case StatsTestsF32::TEST_MSE_F32_49:
+            {
+               inputA.reload(StatsTestsF32::INPUTNEW1_F32_ID,mgr,3);
+               inputB.reload(StatsTestsF32::INPUTNEW2_F32_ID,mgr,3);
+              
+               ref.reload(StatsTestsF32::MSE_F32_ID,mgr);
+               
+               output.create(1,StatsTestsF32::OUT_F32_ID,mgr);
+
+               refOffset = 0;
+            }
+            break;
+
+            case StatsTestsF32::TEST_MSE_F32_50:
+            {
+               inputA.reload(StatsTestsF32::INPUTNEW1_F32_ID,mgr,8);
+               inputB.reload(StatsTestsF32::INPUTNEW2_F32_ID,mgr,8);
+              
+               ref.reload(StatsTestsF32::MSE_F32_ID,mgr);
+               
+               output.create(1,StatsTestsF32::OUT_F32_ID,mgr);
+
+               refOffset = 1;
+            }
+            break;
+
+            case StatsTestsF32::TEST_MSE_F32_51:
+            {
+               inputA.reload(StatsTestsF32::INPUTNEW1_F32_ID,mgr,11);
+               inputB.reload(StatsTestsF32::INPUTNEW2_F32_ID,mgr,11);
+              
+               ref.reload(StatsTestsF32::MSE_F32_ID,mgr);
+               
+               output.create(1,StatsTestsF32::OUT_F32_ID,mgr);
+
+               refOffset = 2;
+            }
+            break;
+
+            case StatsTestsF32::TEST_MSE_F32_52:
+            {
+               inputA.reload(StatsTestsF32::INPUTNEW1_F32_ID,mgr,100);
+               inputB.reload(StatsTestsF32::INPUTNEW2_F32_ID,mgr,100);
+              
+               ref.reload(StatsTestsF32::MSE_F32_ID,mgr);
+               
+               output.create(1,StatsTestsF32::OUT_F32_ID,mgr);
+
+               refOffset = 3;
+            }
+            break;
+

        }
        
--- a/Testing/Source/Tests/StatsTestsF64.cpp
+++ b/Testing/Source/Tests/StatsTestsF64.cpp
@ -439,6 +439,29 @@ a double precision computation.

 */
  
+    void StatsTestsF64::test_mse_f64()
+    {
+        const float64_t *inpA  = inputA.ptr();
+        const float64_t *inpB  = inputB.ptr();
+
+        float64_t result;
+
+        float64_t *refp  = ref.ptr();
+
+        float64_t *outp  = output.ptr();
+
+        arm_mse_f64(inpA,inpB,
+              inputA.nbSamples(),
+              &result);
+
+        outp[0] = result;
+
+        ASSERT_SNR(result,refp[this->refOffset],(float64_t)SNR_THRESHOLD);
+
+        ASSERT_REL_ERROR(result,refp[this->refOffset],(float64_t)REL_ERROR);
+
+    }
+
    void StatsTestsF64::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
    {
        (void)paramsArgs;
@ -1030,6 +1053,58 @@ a double precision computation.
            }
            break;

+            case StatsTestsF64::TEST_MSE_F64_49:
+            {
+               inputA.reload(StatsTestsF64::INPUTNEW1_F64_ID,mgr,2);
+               inputB.reload(StatsTestsF64::INPUTNEW2_F64_ID,mgr,2);
+              
+               ref.reload(StatsTestsF64::MSE_F64_ID,mgr);
+               
+               output.create(1,StatsTestsF64::OUT_F64_ID,mgr);
+
+               refOffset = 0;
+            }
+            break;
+
+            case StatsTestsF64::TEST_MSE_F64_50:
+            {
+               inputA.reload(StatsTestsF64::INPUTNEW1_F64_ID,mgr,4);
+               inputB.reload(StatsTestsF64::INPUTNEW2_F64_ID,mgr,4);
+              
+               ref.reload(StatsTestsF64::MSE_F64_ID,mgr);
+               
+               output.create(1,StatsTestsF64::OUT_F64_ID,mgr);
+
+               refOffset = 1;
+            }
+            break;
+
+            case StatsTestsF64::TEST_MSE_F64_51:
+            {
+               inputA.reload(StatsTestsF64::INPUTNEW1_F64_ID,mgr,5);
+               inputB.reload(StatsTestsF64::INPUTNEW2_F64_ID,mgr,5);
+              
+               ref.reload(StatsTestsF64::MSE_F64_ID,mgr);
+               
+               output.create(1,StatsTestsF64::OUT_F64_ID,mgr);
+
+               refOffset = 2;
+            }
+            break;
+
+            case StatsTestsF64::TEST_MSE_F64_52:
+            {
+               inputA.reload(StatsTestsF64::INPUTNEW1_F64_ID,mgr,100);
+               inputB.reload(StatsTestsF64::INPUTNEW2_F64_ID,mgr,100);
+              
+               ref.reload(StatsTestsF64::MSE_F64_ID,mgr);
+               
+               output.create(1,StatsTestsF64::OUT_F64_ID,mgr);
+
+               refOffset = 3;
+            }
+            break;
+

        }
        
--- a/Testing/Source/Tests/StatsTestsQ15.cpp
+++ b/Testing/Source/Tests/StatsTestsQ15.cpp
@ -6,6 +6,8 @@
 //#include <cstdio>

 #define SNR_THRESHOLD 50
+#define SNR_THRESHOLD_MSE 50
+
 /* 

 Reference patterns are generated with
@ -13,6 +15,8 @@ a double precision computation.

 */
 #define ABS_ERROR_Q15 ((q15_t)100)
+#define ABS_ERROR_Q15_MSE ((q15_t)100)
+
 #define ABS_ERROR_Q63 (1<<17)

    void StatsTestsQ15::test_max_q15()
@ -310,6 +314,29 @@ a double precision computation.
    }

  
+    void StatsTestsQ15::test_mse_q15()
+    {
+        const q15_t *inpA  = inputA.ptr();
+        const q15_t *inpB  = inputB.ptr();
+
+        q15_t result;
+
+        q15_t *refp  = ref.ptr();
+
+        q15_t *outp  = output.ptr();
+
+        arm_mse_q15(inpA,inpB,
+              inputA.nbSamples(),
+              &result);
+
+        outp[0] = result;
+
+        ASSERT_SNR(result,refp[this->refOffset],(float32_t)SNR_THRESHOLD_MSE);
+
+        ASSERT_NEAR_EQ(result,refp[this->refOffset],(q15_t)ABS_ERROR_Q15_MSE);
+
+    }
+
  
    void StatsTestsQ15::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
    {
@ -808,6 +835,58 @@ a double precision computation.
            }
            break;

+            case StatsTestsQ15::TEST_MSE_Q15_40:
+            {
+               inputA.reload(StatsTestsQ15::INPUTNEW1_Q15_ID,mgr,7);
+               inputB.reload(StatsTestsQ15::INPUTNEW2_Q15_ID,mgr,7);
+              
+               ref.reload(StatsTestsQ15::MSE_Q15_ID,mgr);
+               
+               output.create(1,StatsTestsQ15::OUT_Q15_ID,mgr);
+
+               refOffset = 0;
+            }
+            break;
+
+            case StatsTestsQ15::TEST_MSE_Q15_41:
+            {
+               inputA.reload(StatsTestsQ15::INPUTNEW1_Q15_ID,mgr,16);
+               inputB.reload(StatsTestsQ15::INPUTNEW2_Q15_ID,mgr,16);
+              
+               ref.reload(StatsTestsQ15::MSE_Q15_ID,mgr);
+               
+               output.create(1,StatsTestsQ15::OUT_Q15_ID,mgr);
+
+               refOffset = 1;
+            }
+            break;
+
+            case StatsTestsQ15::TEST_MSE_Q15_42:
+            {
+               inputA.reload(StatsTestsQ15::INPUTNEW1_Q15_ID,mgr,23);
+               inputB.reload(StatsTestsQ15::INPUTNEW2_Q15_ID,mgr,23);
+              
+               ref.reload(StatsTestsQ15::MSE_Q15_ID,mgr);
+               
+               output.create(1,StatsTestsQ15::OUT_Q15_ID,mgr);
+
+               refOffset = 2;
+            }
+            break;
+
+            case StatsTestsQ15::TEST_MSE_Q15_43:
+            {
+               inputA.reload(StatsTestsQ15::INPUTNEW1_Q15_ID,mgr,100);
+               inputB.reload(StatsTestsQ15::INPUTNEW2_Q15_ID,mgr,100);
+              
+               ref.reload(StatsTestsQ15::MSE_Q15_ID,mgr);
+               
+               output.create(1,StatsTestsQ15::OUT_Q15_ID,mgr);
+
+               refOffset = 3;
+            }
+            break;
+
          
        }
        
--- a/Testing/Source/Tests/StatsTestsQ31.cpp
+++ b/Testing/Source/Tests/StatsTestsQ31.cpp
@ -6,6 +6,8 @@
 //#include <cstdio>

 #define SNR_THRESHOLD 100
+#define SNR_THRESHOLD_MSE 100
+
 /* 

 Reference patterns are generated with
@ -13,6 +15,8 @@ a double precision computation.

 */
 #define ABS_ERROR_Q31 ((q31_t)(100))
+#define ABS_ERROR_Q31_MSE ((q31_t)(100))
+
 #define ABS_ERROR_Q63 ((q63_t)(1<<18))

    void StatsTestsQ31::test_max_q31()
@ -309,7 +313,30 @@ a double precision computation.

    }

+    void StatsTestsQ31::test_mse_q31()
+    {
+        const q31_t *inpA  = inputA.ptr();
+        const q31_t *inpB  = inputB.ptr();
+
+        q31_t result;
+
+        q31_t *refp  = ref.ptr();
+
+        q31_t *outp  = output.ptr();
+
+        arm_mse_q31(inpA,inpB,
+              inputA.nbSamples(),
+              &result);
+
+        outp[0] = result;
+
+        ASSERT_SNR(result,refp[this->refOffset],(float32_t)SNR_THRESHOLD_MSE);
+
+        ASSERT_NEAR_EQ(result,refp[this->refOffset],(q31_t)ABS_ERROR_Q31_MSE);
+
+    }
  
+
  
    void StatsTestsQ31::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
    {
@ -808,6 +835,58 @@ a double precision computation.
            }
            break;

+            case StatsTestsQ31::TEST_MSE_Q31_40:
+            {
+               inputA.reload(StatsTestsQ31::INPUTNEW1_Q31_ID,mgr,3);
+               inputB.reload(StatsTestsQ31::INPUTNEW2_Q31_ID,mgr,3);
+              
+               ref.reload(StatsTestsQ31::MSE_Q31_ID,mgr);
+               
+               output.create(1,StatsTestsQ31::OUT_Q31_ID,mgr);
+
+               refOffset = 0;
+            }
+            break;
+
+            case StatsTestsQ31::TEST_MSE_Q31_41:
+            {
+               inputA.reload(StatsTestsQ31::INPUTNEW1_Q31_ID,mgr,8);
+               inputB.reload(StatsTestsQ31::INPUTNEW2_Q31_ID,mgr,8);
+              
+               ref.reload(StatsTestsQ31::MSE_Q31_ID,mgr);
+               
+               output.create(1,StatsTestsQ31::OUT_Q31_ID,mgr);
+
+               refOffset = 1;
+            }
+            break;
+
+            case StatsTestsQ31::TEST_MSE_Q31_42:
+            {
+               inputA.reload(StatsTestsQ31::INPUTNEW1_Q31_ID,mgr,11);
+               inputB.reload(StatsTestsQ31::INPUTNEW2_Q31_ID,mgr,11);
+              
+               ref.reload(StatsTestsQ31::MSE_Q31_ID,mgr);
+               
+               output.create(1,StatsTestsQ31::OUT_Q31_ID,mgr);
+
+               refOffset = 2;
+            }
+            break;
+
+            case StatsTestsQ31::TEST_MSE_Q31_43:
+            {
+               inputA.reload(StatsTestsQ31::INPUTNEW1_Q31_ID,mgr,100);
+               inputB.reload(StatsTestsQ31::INPUTNEW2_Q31_ID,mgr,100);
+              
+               ref.reload(StatsTestsQ31::MSE_Q31_ID,mgr);
+               
+               output.create(1,StatsTestsQ31::OUT_Q31_ID,mgr);
+
+               refOffset = 3;
+            }
+            break;
+
          
        }
        
--- a/Testing/Source/Tests/StatsTestsQ7.cpp
+++ b/Testing/Source/Tests/StatsTestsQ7.cpp
@ -6,7 +6,7 @@
 //#include <cstdio>

 #define SNR_THRESHOLD 20
-#define SNR_THRESHOLD_MSE 14
+#define SNR_THRESHOLD_MSE 20

 /* 

--- a/Testing/desc.txt
+++ b/Testing/desc.txt
@ -16,6 +16,7 @@ group Root {

              Pattern INPUT1_F64_ID : Input1_f64.txt 
              Pattern INPUTNEW1_F64_ID : InputNew1_f64.txt 
+              Pattern INPUTNEW2_F64_ID : InputNew2_f64.txt 

              Pattern INPUT2_F64_ID : Input2_f64.txt 
              Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt
@ -52,6 +53,8 @@ group Root {
              Pattern ABSMININDEXES_S16_ID : AbsMinIndexes27_s16.txt
              Pattern ABSMINVALS_F64_ID : AbsMinVals27_f64.txt

+              Pattern MSE_F64_ID : MSEVals28_f64.txt
+

              Output  OUT_F64_ID : Output
              Output  OUT_S16_ID : Index
@ -122,6 +125,11 @@ group Root {
                Test nb=2n   arm_absmin_no_idx_f64:test_absmin_no_idx_f64
                Test nb=2n+1 arm_absmin_no_idx_f64:test_absmin_no_idx_f64

+                Test nb=2    arm_mse_f64:test_mse_f64
+                Test nb=2n   arm_mse_f64:test_mse_f64
+                Test nb=2n+1 arm_mse_f64:test_mse_f64
+                Test long    arm_mse_f64:test_mse_f64
+

              }

@ -133,6 +141,7 @@ group Root {

              Pattern INPUT1_F32_ID : Input1_f32.txt 
              Pattern INPUTNEW1_F32_ID : InputNew1_f32.txt 
+              Pattern INPUTNEW2_F32_ID : InputNew2_f32.txt

              Pattern INPUT2_F32_ID : Input2_f32.txt 
              Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt
@ -169,6 +178,8 @@ group Root {
              Pattern ABSMININDEXES_S16_ID : AbsMinIndexes27_s16.txt
              Pattern ABSMINVALS_F32_ID : AbsMinVals27_f32.txt

+              Pattern MSE_F32_ID : MSEVals28_f32.txt
+

              Output  OUT_F32_ID : Output
              Output  OUT_S16_ID : Index
@ -231,13 +242,18 @@ group Root {
                Test nb=4n   arm_min_no_idx_f32:test_min_no_idx_f32
                Test nb=4n+1 arm_min_no_idx_f32:test_min_no_idx_f32

-                Test nb=2    arm_absmax_no_idx_f32:test_absmax_no_idx_f32
-                Test nb=2n   arm_absmax_no_idx_f32:test_absmax_no_idx_f32
-                Test nb=2n+1 arm_absmax_no_idx_f32:test_absmax_no_idx_f32
+                Test nb=3    arm_absmax_no_idx_f32:test_absmax_no_idx_f32
+                Test nb=4n   arm_absmax_no_idx_f32:test_absmax_no_idx_f32
+                Test nb=4n+1 arm_absmax_no_idx_f32:test_absmax_no_idx_f32
+
+                Test nb=3    arm_absmin_no_idx_f32:test_absmin_no_idx_f32
+                Test nb=4n   arm_absmin_no_idx_f32:test_absmin_no_idx_f32
+                Test nb=4n+1 arm_absmin_no_idx_f32:test_absmin_no_idx_f32

-                Test nb=2    arm_absmin_no_idx_f32:test_absmin_no_idx_f32
-                Test nb=2n   arm_absmin_no_idx_f32:test_absmin_no_idx_f32
-                Test nb=2n+1 arm_absmin_no_idx_f32:test_absmin_no_idx_f32
+                Test nb=3    arm_mse_f32:test_mse_f32
+                Test nb=4n   arm_mse_f32:test_mse_f32
+                Test nb=4n+1 arm_mse_f32:test_mse_f32
+                Test long    arm_mse_f32:test_mse_f32


              }
@ -252,6 +268,7 @@ group Root {

              Pattern INPUT1_Q31_ID : Input1_q31.txt 
              Pattern INPUTNEW1_Q31_ID : InputNew1_q31.txt 
+              Pattern INPUTNEW2_Q31_ID : InputNew2_q31.txt 

              Pattern INPUT2_Q31_ID : Input2_q31.txt 
              Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt
@ -270,6 +287,8 @@ group Root {
              Pattern ABSMININDEXES_S16_ID : AbsMinIndexes9_s16.txt
              Pattern ABSMINVALS_Q31_ID : AbsMinVals9_q31.txt

+              Pattern MSE_Q31_ID : MSEVals10_q31.txt
+
              Output  OUT_Q31_ID : Output
              Output  OUT_Q63_ID : Output
              Output  OUT_S16_ID : Index
@ -320,13 +339,18 @@ group Root {
                Test nb=4n   arm_min_no_idx_q31:test_min_no_idx_q31
                Test nb=4n+1 arm_min_no_idx_q31:test_min_no_idx_q31

-                Test nb=2    arm_absmax_no_idx_q31:test_absmax_no_idx_q31
-                Test nb=2n   arm_absmax_no_idx_q31:test_absmax_no_idx_q31
-                Test nb=2n+1 arm_absmax_no_idx_q31:test_absmax_no_idx_q31
+                Test nb=3    arm_absmax_no_idx_q31:test_absmax_no_idx_q31
+                Test nb=4n   arm_absmax_no_idx_q31:test_absmax_no_idx_q31
+                Test nb=4n+1 arm_absmax_no_idx_q31:test_absmax_no_idx_q31
+
+                Test nb=3    arm_absmin_no_idx_q31:test_absmin_no_idx_q31
+                Test nb=4n   arm_absmin_no_idx_q31:test_absmin_no_idx_q31
+                Test nb=4n+1 arm_absmin_no_idx_q31:test_absmin_no_idx_q31

-                Test nb=2    arm_absmin_no_idx_q31:test_absmin_no_idx_q31
-                Test nb=2n   arm_absmin_no_idx_q31:test_absmin_no_idx_q31
-                Test nb=2n+1 arm_absmin_no_idx_q31:test_absmin_no_idx_q31
+                Test nb=3    arm_mse_q31:test_mse_q31
+                Test nb=4n   arm_mse_q31:test_mse_q31
+                Test nb=4n+1 arm_mse_q31:test_mse_q31
+                Test long    arm_mse_q31:test_mse_q31

              }

@ -338,6 +362,7 @@ group Root {

              Pattern INPUT1_Q15_ID : Input1_q15.txt 
              Pattern INPUTNEW1_Q15_ID : InputNew1_q15.txt
+              Pattern INPUTNEW2_Q15_ID : InputNew2_q15.txt

              Pattern INPUT2_Q15_ID : Input2_q15.txt 
              Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt
@ -356,7 +381,7 @@ group Root {
              Pattern ABSMININDEXES_S16_ID : AbsMinIndexes9_s16.txt
              Pattern ABSMINVALS_Q15_ID : AbsMinVals9_q15.txt

-
+              Pattern MSE_Q15_ID : MSEVals10_q15.txt

              Output  OUT_Q15_ID : Output
              Output  OUT_Q63_ID : Output
@ -392,29 +417,34 @@ group Root {
                Test nb=8n   arm_var_q15:test_var_q15
                Test nb=8n+1 arm_var_q15:test_var_q15

-                Test nb=3    arm_absmax_q15:test_absmax_q15
-                Test nb=4n   arm_absmax_q15:test_absmax_q15
-                Test nb=4n+1 arm_absmax_q15:test_absmax_q15
+                Test nb=7    arm_absmax_q15:test_absmax_q15
+                Test nb=8n   arm_absmax_q15:test_absmax_q15
+                Test nb=8n+1 arm_absmax_q15:test_absmax_q15
+
+                Test nb=7    arm_absmin_q15:test_absmin_q15
+                Test nb=8n   arm_absmin_q15:test_absmin_q15
+                Test nb=8n+1 arm_absmin_q15:test_absmin_q15

-                Test nb=3    arm_absmin_q15:test_absmin_q15
-                Test nb=4n   arm_absmin_q15:test_absmin_q15
-                Test nb=4n+1 arm_absmin_q15:test_absmin_q15
+                Test nb=7    arm_max_no_idx_q15:test_max_no_idx_q15
+                Test nb=8n   arm_max_no_idx_q15:test_max_no_idx_q15
+                Test nb=8n+1 arm_max_no_idx_q15:test_max_no_idx_q15

-                Test nb=3    arm_max_no_idx_q15:test_max_no_idx_q15
-                Test nb=4n   arm_max_no_idx_q15:test_max_no_idx_q15
-                Test nb=4n+1 arm_max_no_idx_q15:test_max_no_idx_q15
+                Test nb=7    arm_min_no_idx_q15:test_min_no_idx_q15
+                Test nb=8n   arm_min_no_idx_q15:test_min_no_idx_q15
+                Test nb=8n+1 arm_min_no_idx_q15:test_min_no_idx_q15

-                Test nb=3    arm_min_no_idx_q15:test_min_no_idx_q15
-                Test nb=4n   arm_min_no_idx_q15:test_min_no_idx_q15
-                Test nb=4n+1 arm_min_no_idx_q15:test_min_no_idx_q15
+                Test nb=7    arm_absmax_no_idx_q15:test_absmax_no_idx_q15
+                Test nb=8n   arm_absmax_no_idx_q15:test_absmax_no_idx_q15
+                Test nb=8n+1 arm_absmax_no_idx_q15:test_absmax_no_idx_q15

-                Test nb=2    arm_absmax_no_idx_q15:test_absmax_no_idx_q15
-                Test nb=2n   arm_absmax_no_idx_q15:test_absmax_no_idx_q15
-                Test nb=2n+1 arm_absmax_no_idx_q15:test_absmax_no_idx_q15
+                Test nb=7    arm_absmin_no_idx_q15:test_absmin_no_idx_q15
+                Test nb=8n   arm_absmin_no_idx_q15:test_absmin_no_idx_q15
+                Test nb=8n+1 arm_absmin_no_idx_q15:test_absmin_no_idx_q15

-                Test nb=2    arm_absmin_no_idx_q15:test_absmin_no_idx_q15
-                Test nb=2n   arm_absmin_no_idx_q15:test_absmin_no_idx_q15
-                Test nb=2n+1 arm_absmin_no_idx_q15:test_absmin_no_idx_q15
+                Test nb=7    arm_mse_q15:test_mse_q15
+                Test nb=8n   arm_mse_q15:test_mse_q15
+                Test nb=8n+1 arm_mse_q15:test_mse_q15
+                Test long    arm_mse_q15:test_mse_q15

              }

@ -479,32 +509,32 @@ group Root {
                Test big index  arm_max_q7:test_max_q7
                Test big index  arm_min_q7:test_min_q7

-                Test nb=3    arm_absmax_q7:test_absmax_q7
-                Test nb=4n   arm_absmax_q7:test_absmax_q7
-                Test nb=4n+1 arm_absmax_q7:test_absmax_q7
+                Test nb=15    arm_absmax_q7:test_absmax_q7
+                Test nb=16n   arm_absmax_q7:test_absmax_q7
+                Test nb=16n+1 arm_absmax_q7:test_absmax_q7

-                Test nb=3    arm_absmin_q7:test_absmin_q7
-                Test nb=4n   arm_absmin_q7:test_absmin_q7
-                Test nb=4n+1 arm_absmin_q7:test_absmin_q7
+                Test nb=15    arm_absmin_q7:test_absmin_q7
+                Test nb=16n   arm_absmin_q7:test_absmin_q7
+                Test nb=16n+1 arm_absmin_q7:test_absmin_q7

                Test big index  arm_absmax_q7:test_absmax_q7
                Test big index  arm_absmin_q7:test_absmin_q7

-                Test nb=3    arm_max_no_idx_q7:test_max_no_idx_q7
-                Test nb=4n   arm_max_no_idx_q7:test_max_no_idx_q7
-                Test nb=4n+1 arm_max_no_idx_q7:test_max_no_idx_q7
+                Test nb=15    arm_max_no_idx_q7:test_max_no_idx_q7
+                Test nb=16n   arm_max_no_idx_q7:test_max_no_idx_q7
+                Test nb=16n+1 arm_max_no_idx_q7:test_max_no_idx_q7

-                Test nb=3    arm_min_no_idx_q7:test_min_no_idx_q7
-                Test nb=4n   arm_min_no_idx_q7:test_min_no_idx_q7
-                Test nb=4n+1 arm_min_no_idx_q7:test_min_no_idx_q7
+                Test nb=15    arm_min_no_idx_q7:test_min_no_idx_q7
+                Test nb=16n   arm_min_no_idx_q7:test_min_no_idx_q7
+                Test nb=16n+1 arm_min_no_idx_q7:test_min_no_idx_q7

-                Test nb=2    arm_absmax_no_idx_q7:test_absmax_no_idx_q7
-                Test nb=2n   arm_absmax_no_idx_q7:test_absmax_no_idx_q7
-                Test nb=2n+1 arm_absmax_no_idx_q7:test_absmax_no_idx_q7
+                Test nb=15    arm_absmax_no_idx_q7:test_absmax_no_idx_q7
+                Test nb=16n   arm_absmax_no_idx_q7:test_absmax_no_idx_q7
+                Test nb=16n+1 arm_absmax_no_idx_q7:test_absmax_no_idx_q7

-                Test nb=2    arm_absmin_no_idx_q7:test_absmin_no_idx_q7
-                Test nb=2n   arm_absmin_no_idx_q7:test_absmin_no_idx_q7
-                Test nb=2n+1 arm_absmin_no_idx_q7:test_absmin_no_idx_q7
+                Test nb=15    arm_absmin_no_idx_q7:test_absmin_no_idx_q7
+                Test nb=16n   arm_absmin_no_idx_q7:test_absmin_no_idx_q7
+                Test nb=16n+1 arm_absmin_no_idx_q7:test_absmin_no_idx_q7

                Test nb=15    arm_mse_q7:test_mse_q7
                Test nb=16n   arm_mse_q7:test_mse_q7
--- a/Testing/desc_f16.txt
+++ b/Testing/desc_f16.txt
@ -15,6 +15,7 @@ group Root {

              Pattern INPUT1_F16_ID : Input1_f16.txt 
              Pattern INPUTNEW1_F16_ID : InputNew1_f16.txt 
+              Pattern INPUTNEW2_F16_ID : InputNew2_f16.txt 

              Pattern INPUT2_F16_ID : Input2_f16.txt 
              Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt
@ -51,6 +52,8 @@ group Root {
              Pattern ABSMININDEXES_S16_ID : AbsMinIndexes27_s16.txt
              Pattern ABSMINVALS_F16_ID : AbsMinVals27_f16.txt

+              Pattern MSE_F16_ID : MSEVals28_f16.txt
+
              Output  OUT_F16_ID : Output
              Output  OUT_S16_ID : Index
              Output  TMP_F16_ID : Temp
@ -100,25 +103,30 @@ group Root {

                Test stability  arm_std_f16:test_std_stability_f16

-                Test nb=3    arm_absmax_f16:test_absmax_f16
-                Test nb=4n   arm_absmax_f16:test_absmax_f16
-                Test nb=4n+1 arm_absmax_f16:test_absmax_f16
+                Test nb=7    arm_absmax_f16:test_absmax_f16
+                Test nb=8n   arm_absmax_f16:test_absmax_f16
+                Test nb=8n+1 arm_absmax_f16:test_absmax_f16

-                Test nb=3    arm_absmin_f16:test_absmin_f16
-                Test nb=4n   arm_absmin_f16:test_absmin_f16
-                Test nb=4n+1 arm_absmin_f16:test_absmin_f16
+                Test nb=7    arm_absmin_f16:test_absmin_f16
+                Test nb=8n   arm_absmin_f16:test_absmin_f16
+                Test nb=8n+1 arm_absmin_f16:test_absmin_f16

                Test nb=7    arm_min_no_idx_f16:test_min_no_idx_f16
                Test nb=8n   arm_min_no_idx_f16:test_min_no_idx_f16
                Test nb=8n+1 arm_min_no_idx_f16:test_min_no_idx_f16

-                Test nb=2    arm_absmax_no_idx_f16:test_absmax_no_idx_f16
-                Test nb=2n   arm_absmax_no_idx_f16:test_absmax_no_idx_f16
-                Test nb=2n+1 arm_absmax_no_idx_f16:test_absmax_no_idx_f16
+                Test nb=7    arm_absmax_no_idx_f16:test_absmax_no_idx_f16
+                Test nb=8n   arm_absmax_no_idx_f16:test_absmax_no_idx_f16
+                Test nb=8n+1 arm_absmax_no_idx_f16:test_absmax_no_idx_f16
+
+                Test nb=7    arm_absmin_no_idx_f16:test_absmin_no_idx_f16
+                Test nb=8n   arm_absmin_no_idx_f16:test_absmin_no_idx_f16
+                Test nb=8n+1 arm_absmin_no_idx_f16:test_absmin_no_idx_f16

-                Test nb=2    arm_absmin_no_idx_f16:test_absmin_no_idx_f16
-                Test nb=2n   arm_absmin_no_idx_f16:test_absmin_no_idx_f16
-                Test nb=2n+1 arm_absmin_no_idx_f16:test_absmin_no_idx_f16
+                Test nb=7    arm_mse_f16:test_mse_f16
+                Test nb=8n   arm_mse_f16:test_mse_f16
+                Test nb=8n+1 arm_mse_f16:test_mse_f16
+                Test long    arm_mse_f16:test_mse_f16
              }
           }
        }