Prepare for merge

4 years ago · 9a1a7d284f
parent a973e9ed37
commit 9a1a7d284f
13 changed files with 1031 additions and 12 deletions
--- a/Include/dsp/basic_math_functions.h
+++ b/Include/dsp/basic_math_functions.h
@ -1,8 +1,8 @@
 /******************************************************************************
 * @file     basic_math_functions.h
 * @brief    Public header file for CMSIS DSP Library
- * @version  V1.10.0
- * @date     08 July 2021
+ * @version  V1.11.1
+ * @date     14 July 2022
 * Target Processor: Cortex-M and Cortex-A cores
 ******************************************************************************/
 /*
--- a/Include/dsp/basic_math_functions_f16.h
+++ b/Include/dsp/basic_math_functions_f16.h
@ -1,8 +1,8 @@
 /******************************************************************************
 * @file     basic_math_functions_f16.h
 * @brief    Public header file for CMSIS DSP Library
- * @version  V1.10.0
- * @date     08 July 2021
+ * @version  V1.10.1
+ * @date     14 July 2022
 * Target Processor: Cortex-M and Cortex-A cores
 ******************************************************************************/
 /*
@ -159,6 +159,8 @@ void arm_clip_f16(const float16_t * pSrc,
  float16_t high, 
  uint32_t numSamples);

+
+
 #endif /* defined(ARM_FLOAT16_SUPPORTED)*/

 #ifdef   __cplusplus
--- a/Include/dsp/statistics_functions.h
+++ b/Include/dsp/statistics_functions.h
@ -1,8 +1,8 @@
 /******************************************************************************
 * @file     statistics_functions.h
 * @brief    Public header file for CMSIS DSP Library
- * @version  V1.10.0
- * @date     08 July 2021
+ * @version  V1.10.1
+ * @date     14 July 2022
 * Target Processor: Cortex-M and Cortex-A cores
 ******************************************************************************/
 /*
@ -970,6 +970,67 @@ void arm_mse_f64(
        uint32_t blockSize,
        float64_t * pResult);

+
+/**
+ * @brief  Accumulation value of a floating-point vector.
+ * @param[in]  pSrc       is input pointer
+ * @param[in]  blockSize  is the number of samples to process
+ * @param[out] pResult    is output value.
+ */
+
+void arm_accumulate_f32(
+const float32_t * pSrc,
+      uint32_t blockSize,
+      float32_t * pResult);
+
+/**
+ * @brief  Accumulation value of a floating-point vector.
+ * @param[in]  pSrc       is input pointer
+ * @param[in]  blockSize  is the number of samples to process
+ * @param[out] pResult    is output value.
+ */
+
+void arm_accumulate_f64(
+const float64_t * pSrc,
+      uint32_t blockSize,
+      float64_t * pResult);
+
+/**
+ * @brief  Accumulation value of a Q7 vector.
+ * @param[in]  pSrc       is input pointer
+ * @param[in]  blockSize  is the number of samples to process
+ * @param[out] pResult    is output value.
+ */
+
+void arm_accumulate_q7(
+const q7_t * pSrc,
+      uint32_t blockSize,
+      q7_t * pResult);
+
+/**
+ * @brief  Accumulation value of a Q15 vector.
+ * @param[in]  pSrc       is input pointer
+ * @param[in]  blockSize  is the number of samples to process
+ * @param[out] pResult    is output value.
+ */
+
+void arm_accumulate_q15(
+const q15_t * pSrc,
+      uint32_t blockSize,
+      q15_t * pResult);
+
+/**
+ * @brief  Accumulation value of a Q31 vector.
+ * @param[in]  pSrc       is input pointer
+ * @param[in]  blockSize  is the number of samples to process
+ * @param[out] pResult    is output value.
+ */
+
+void arm_accumulate_q31(
+const q31_t * pSrc,
+      uint32_t blockSize,
+      q31_t * pResult);
+
 #ifdef   __cplusplus
 }
 #endif
--- a/Include/dsp/statistics_functions_f16.h
+++ b/Include/dsp/statistics_functions_f16.h
@ -1,8 +1,8 @@
 /******************************************************************************
 * @file     statistics_functions_f16.h
 * @brief    Public header file for CMSIS DSP Library
- * @version  V1.10.0
- * @date     08 July 2021
+ * @version  V1.10.1
+ * @date     14 July 2022
 * Target Processor: Cortex-M and Cortex-A cores
 ******************************************************************************/
 /*
@ -258,6 +258,19 @@ void arm_mse_f16(
        uint32_t blockSize,
        float16_t * pResult);

+
+/**
+  * @brief  Sum value of a floating-point vector.
+  * @param[in]  pSrc       is input pointer
+  * @param[in]  blockSize  is the number of samples to process
+  * @param[out] pResult    is output value.
+  */
+ void arm_accumulate_f16(
+ const float16_t * pSrc,
+       uint32_t blockSize,
+       float16_t * pResult);
+
+
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
 #ifdef   __cplusplus
 }
--- a/Source/BasicMathFunctions/BasicMathFunctionsF16.c
+++ b/Source/BasicMathFunctions/BasicMathFunctionsF16.c
@ -35,3 +35,4 @@
 #include "arm_scale_f16.c"
 #include "arm_sub_f16.c"
 #include "arm_clip_f16.c"
+
--- a/Source/StatisticsFunctions/StatisticsFunctions.c
+++ b/Source/StatisticsFunctions/StatisticsFunctions.c
@ -3,8 +3,8 @@
 * Title:        StatisticsFunctions.c
 * Description:  Combination of all statistics function source files.
 *
- * $Date:        16. March 2020
- * $Revision:    V1.1.0
+ * $Date:        14 July 2022
+ * $Revision:    V1.1.1
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
@ -98,3 +98,9 @@
 #include "arm_mse_q31.c"
 #include "arm_mse_f32.c"
 #include "arm_mse_f64.c"
+#include "arm_accumulate_f32.c"
+#include "arm_accumulate_f64.c"
+#include "arm_accumulate_q7.c"
+#include "arm_accumulate_q15.c"
+#include "arm_accumulate_q31.c"
+
--- a/Source/StatisticsFunctions/StatisticsFunctionsF16.c
+++ b/Source/StatisticsFunctions/StatisticsFunctionsF16.c
@ -3,8 +3,8 @@
 * Title:        StatisticsFunctions.c
 * Description:  Combination of all statistics function source files.
 *
- * $Date:        16. March 2020
- * $Revision:    V1.1.0
+ * $Date:        14 July 2022
+ * $Revision:    V1.1.1
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
@ -44,3 +44,4 @@
 #include "arm_absmax_no_idx_f16.c"
 #include "arm_absmin_no_idx_f16.c"
 #include "arm_mse_f16.c"
+#include "arm_accumulate_f16.c"
--- a/Source/StatisticsFunctions/arm_accumulate_f16.c
+++ b/Source/StatisticsFunctions/arm_accumulate_f16.c
@ -0,0 +1,152 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_accumulate_f16.c
+ * Description:  accumulation value of a floating-point vector
+ *
+ * $Date:        14 July 2022
+ * $Revision:    V1.0.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @defgroup Accumulation
+ 
+ Calculates the accumulation of the input vector. Sum is defined as the addition of the elements in the vector.
+ The underlying algorithm is used:
+ 
+ <pre>
+ Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]);
+ </pre>
+ 
+ There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ @addtogroup Accumulation
+ @{
+ */
+
+/**
+ @brief         accumulate value of a floating-point vector.
+ @param[in]     pSrc       points to the input vector.
+ @param[in]     blockSize  number of samples in input vector.
+ @param[out]    pResult    sum value returned here.
+ @return        none
+ */
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+
+void arm_accumulate_f16(
+                        const float16_t * pSrc,
+                        uint32_t blockSize,
+                        float16_t * pResult)
+{
+  int32_t  blkCnt;           /* loop counters */
+  f16x8_t vecSrc;
+  f16x8_t sumVec = vdupq_n_f16(0.0f16);
+  
+  blkCnt = blockSize;
+  do {
+    mve_pred16_t p = vctp16q(blkCnt);
+    
+    vecSrc = vldrhq_z_f16((float16_t const *) pSrc, p);
+    sumVec = vaddq_m_f16(sumVec, sumVec, vecSrc, p);
+    
+    blkCnt -= 8;
+    pSrc += 8;
+  }
+  while (blkCnt > 0);
+  
+  *pResult = vecAddAcrossF16Mve(sumVec);
+}
+
+
+#else
+
+void arm_accumulate_f16(
+                        const float16_t * pSrc,
+                        uint32_t blockSize,
+                        float16_t * pResult)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  float16_t sum = 0.0f;                          /* Temporary result storage */
+  
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+  
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += (_Float16)*pSrc++;
+    
+    sum += (_Float16)*pSrc++;
+    
+    sum += (_Float16)*pSrc++;
+    
+    sum += (_Float16)*pSrc++;
+    
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+  
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+  
+#else
+  
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+  
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += (_Float16)*pSrc++;
+    
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+  
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+  /* Store result to destination */
+  *pResult = sum ;
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+ @} end of Accumulation group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Source/StatisticsFunctions/arm_accumulate_f32.c
+++ b/Source/StatisticsFunctions/arm_accumulate_f32.c
@ -0,0 +1,198 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_accumulate_f32.c
+ * Description:  Sum value of a floating-point vector
+ *
+ * $Date:        14 July 2022
+ * $Revision:    V1.0.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions.h"
+
+/**
+ @ingroup groupStats
+ */
+
+
+/**
+ @addtogroup Accumulation
+ @{
+ */
+
+/**
+ @brief         Accumulation value of a floating-point vector.
+ @param[in]     pSrc       points to the input vector.
+ @param[in]     blockSize  number of samples in input vector.
+ @param[out]    pResult    sum value returned here.
+ @return        none
+ */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+
+void arm_accumulate_f32(
+                        const float32_t * pSrc,
+                        uint32_t blockSize,
+                        float32_t * pResult)
+{
+  uint32_t  blkCnt;           /* loop counters */
+  f32x4_t vecSrc;
+  f32x4_t sumVec = vdupq_n_f32(0.0f);
+  float32_t sum = 0.0f; 
+  
+  /* Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+  while (blkCnt > 0U)
+  {
+    vecSrc = vldrwq_f32(pSrc);
+    sumVec = vaddq_f32(sumVec, vecSrc);
+    
+    blkCnt --;
+    pSrc += 4;
+  }
+  
+  sum = vecAddAcrossF32Mve(sumVec);
+  
+  /* Tail */
+  blkCnt = blockSize & 0x3;
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+  
+  *pResult = sum;
+}
+
+
+#else
+#if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE)
+void arm_accumulate_f32(
+                        const float32_t * pSrc,
+                        uint32_t blockSize,
+                        float32_t * pResult)
+{
+  float32_t sum = 0.0f;                          /* Temporary result storage */
+  float32x4_t sumV = vdupq_n_f32(0.0f);                          /* Temporary result storage */
+  float32x2_t sumV2;
+  
+  uint32_t blkCnt;                               /* Loop counter */
+  
+  float32x4_t inV;
+  
+  blkCnt = blockSize >> 2U;
+  
+  /* Compute 4 outputs at a time.
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    inV = vld1q_f32(pSrc);
+    sumV = vaddq_f32(sumV, inV);
+    
+    pSrc += 4;
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+  
+  sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
+  sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
+  
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+   ** No loop unrolling is used. */
+  blkCnt = blockSize & 3;
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+  
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1])  */
+  /* Store the result to the destination */
+  *pResult = sum;
+}
+#else
+void arm_accumulate_f32(
+                        const float32_t * pSrc,
+                        uint32_t blockSize,
+                        float32_t * pResult)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  float32_t sum = 0.0f;                          /* Temporary result storage */
+  
+#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
+  
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    
+    sum += *pSrc++;
+    
+    sum += *pSrc++;
+    
+    sum += *pSrc++;
+    
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+  
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+  
+#else
+  
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+  
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+  
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1])  */
+  /* Store result to destination */
+  *pResult = sum ;
+}
+#endif /* #if defined(ARM_MATH_NEON) */
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+ @} end of Accumulation group
+ */
--- a/Source/StatisticsFunctions/arm_accumulate_f64.c
+++ b/Source/StatisticsFunctions/arm_accumulate_f64.c
@ -0,0 +1,127 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_accumulate_f64.c
+ * Description:  Accumulation value of a floating-point vector
+ *
+ * $Date:        14 July 2022
+ * $Revision:    V1.0.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+ @ingroup groupStats
+ */
+
+
+/**
+ @addtogroup Accumulation
+ @{
+ */
+
+/**
+ @brief         Accumulation value of a floating-point vector.
+ @param[in]     pSrc       points to the input vector.
+ @param[in]     blockSize  number of samples in input vector.
+ @param[out]    pResult    sum value returned here.
+ @return        none
+ */
+#if defined(ARM_MATH_NEON)
+void arm_accumulate_f64(
+                        const float64_t * pSrc,
+                        uint32_t blockSize,
+                        float64_t * pResult)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  
+  /*Neon buffers*/
+  float64x2_t vSum = vdupq_n_f64(0.0f);
+  float64x2_t afterLoad ;
+  
+  float64_t sum = 0.;                            /* Temporary result storage */
+  
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize >> 1U;
+  
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    
+    afterLoad = vld1q_f64(pSrc);
+    vSum = vaddq_f64(vSum, afterLoad);
+    
+    /* Decrement loop counter */
+    blkCnt--;
+    
+    pSrc += 2;
+  }
+  sum = vaddvq_f64(vSum);
+  
+  /* Tail */
+  blkCnt = blockSize & 1 ;
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+  
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1])  */
+  /* Store result to destination */
+  *pResult = sum;
+}
+#else
+void arm_accumulate_f64(
+                        const float64_t * pSrc,
+                        uint32_t blockSize,
+                        float64_t *  pResult)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  float64_t sum = 0.;                            /* Temporary result storage */
+  
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+  
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1])  */
+  /* Store result to destination */
+  *pResult = sum;
+}
+
+#endif
+
+
+/**
+ @} end of Accumulation group
+ */
--- a/Source/StatisticsFunctions/arm_accumulate_q15.c
+++ b/Source/StatisticsFunctions/arm_accumulate_q15.c
@ -0,0 +1,156 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_accumulate_q15.c
+ * Description:  Accumulation value of a Q15 vector
+ *
+ * $Date:        14 July 2022
+ * $Revision:    V1.0.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup Accumulation
+ @{
+ */
+
+/**
+ @brief         Accumulation value of a Q15 vector.
+ @param[in]     pSrc       points to the input vector
+ @param[in]     blockSize  number of samples in input vector
+ @param[out]    pResult    sum value returned here
+ @return        none
+ 
+ @par           Scaling and Overflow Behavior
+ The function is implemented using a 32-bit internal accumulator.
+ The input is represented in 1.15 format and is accumulated in a 32-bit
+ accumulator in 17.15 format.
+ There is no risk of internal overflow with this approach, and the
+ full precision of intermediate result is preserved.
+ Finally, the accumulator is truncated to yield a result of 1.15 format.
+ */
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+void arm_accumulate_q15(
+                        const q15_t * pSrc,
+                        uint32_t blockSize,
+                        q15_t * pResult)
+{
+  uint32_t  blkCnt;           /* loop counters */
+  q15x8_t  vecSrc;
+  q31_t     sum = 0L;
+  
+  /* Compute 8 outputs at a time */
+  blkCnt = blockSize >> 3U;
+  while (blkCnt > 0U)
+  {
+    vecSrc = vldrhq_s16(pSrc);
+    /*
+     * sum lanes
+     */
+    sum = vaddvaq(sum, vecSrc);
+    
+    blkCnt--;
+    pSrc += 8;
+  }
+  
+  /* Tail */
+  blkCnt = blockSize & 0x7;
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+  
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1])  */
+  /* Store the result to the destination */
+  *pResult = sum;
+}
+#else
+void arm_accumulate_q15(
+                        const q15_t * pSrc,
+                        uint32_t blockSize,
+                        q15_t * pResult)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  q31_t sum = 0;                                 /* Temporary result storage */
+  
+#if defined (ARM_MATH_LOOPUNROLL)
+  q31_t in;
+#endif
+  
+#if defined (ARM_MATH_LOOPUNROLL)
+  
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    in = read_q15x2_ia (&pSrc);
+    sum += ((in << 16U) >> 16U);
+    sum +=  (in >> 16U);
+    
+    in = read_q15x2_ia (&pSrc);
+    sum += ((in << 16U) >> 16U);
+    sum +=  (in >> 16U);
+    
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+  
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+  
+#else
+  
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+  
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+  
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+  /* Store result to destination */
+  *pResult = sum;
+}
+#endif /* defined(ARM_MATH_MVEI) */
+
+/**
+ @} end of Accumulation group
+ */
--- a/Source/StatisticsFunctions/arm_accumulate_q31.c
+++ b/Source/StatisticsFunctions/arm_accumulate_q31.c
@ -0,0 +1,149 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_accumulate_q31.c
+ * Description:  Accumulation value of a Q31 vector
+ *
+ * $Date:        14 July 2022
+ * $Revision:    V1.0.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup Accumulation
+ @{
+ */
+
+/**
+ @brief         Accumulation value of a Q31 vector.
+ @param[in]     pSrc       points to the input vector
+ @param[in]     blockSize  number of samples in input vector
+ @param[out]    pResult    sum value returned here
+ @return        none
+ 
+ @par           Scaling and Overflow Behavior
+ The function is implemented using a 64-bit internal accumulator.
+ The input is represented in 1.31 format and is accumulated in a 64-bit
+ accumulator in 33.31 format.
+ There is no risk of internal overflow with this approach, and the
+ full precision of intermediate result is preserved.
+ Finally, the accumulator is truncated to yield a result of 1.31 format.
+ */
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+void arm_accumulate_q31(
+                        const q31_t * pSrc,
+                        uint32_t blockSize,
+                        q31_t * pResult)
+{
+  uint32_t  blkCnt;           /* loop counters */
+  q31x4_t vecSrc;
+  q63_t     sum = 0LL;
+  
+  
+  /* Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+  while (blkCnt > 0U)
+  {
+    
+    vecSrc = vldrwq_s32(pSrc);
+    /*
+     * sum lanes
+     */
+    sum = vaddlvaq(sum, vecSrc);
+    
+    blkCnt --;
+    pSrc += 4;
+  }
+  
+  /* Tail */
+  blkCnt = blockSize & 0x3;
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    blkCnt --;
+  }
+  
+  *pResult = sum;
+}
+#else
+void arm_accumulate_q31(
+                        const q31_t * pSrc,
+                        uint32_t blockSize,
+                        q31_t * pResult)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  q63_t sum = 0;                                 /* Temporary result storage */
+  
+#if defined (ARM_MATH_LOOPUNROLL)
+  
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    
+    sum += *pSrc++;
+    
+    sum += *pSrc++;
+    
+    sum += *pSrc++;
+    
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+  
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+  
+#else
+  
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+  
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+  
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1])  */
+  /* Store result to destination */
+  *pResult = sum;
+}
+#endif /* defined(ARM_MATH_MVEI) */
+
+/**
+ @} end of Accumulation group
+ */
--- a/Source/StatisticsFunctions/arm_accumulate_q7.c
+++ b/Source/StatisticsFunctions/arm_accumulate_q7.c
@ -0,0 +1,153 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_accumulate_q7.c
+ * Description:  Accumulation value of a Q7 vector
+ *
+ * $Date:        14 July 2022
+ * $Revision:    V1.0.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup Accumulation
+ @{
+ */
+
+/**
+ @brief         Accumulation value of a Q7 vector.
+ @param[in]     pSrc       points to the input vector
+ @param[in]     blockSize  number of samples in input vector
+ @param[out]    pResult    sum value returned here
+ @return        none
+ 
+ @par           Scaling and Overflow Behavior
+ The function is implemented using a 32-bit internal accumulator.
+ The input is represented in 1.7 format and is accumulated in a 32-bit
+ accumulator in 25.7 format.
+ There is no risk of internal overflow with this approach, and the
+ full precision of intermediate result is preserved.
+ Finally, the accumulator is truncated to yield a result of 1.7 format.
+ */
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+void arm_accumulate_q7(
+                       const q7_t * pSrc,
+                       uint32_t blockSize,
+                       q7_t * pResult)
+{
+  uint32_t  blkCnt;           /* loop counters */
+  q7x16_t vecSrc;
+  q31_t     sum = 0L;
+  
+  
+  blkCnt = blockSize >> 4;
+  while (blkCnt > 0U)
+  {
+    vecSrc = vldrbq_s8(pSrc);
+    /*
+     * sum lanes
+     */
+    sum = vaddvaq(sum, vecSrc);
+    
+    blkCnt--;
+    pSrc += 16;
+  }
+  
+  blkCnt = blockSize & 0xF;
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+  
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1])  */
+  /* Store the result to the destination */
+  *pResult = sum;
+}
+#else
+void arm_accumulate_q7(
+                       const q7_t * pSrc,
+                       uint32_t blockSize,
+                       q7_t * pResult)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  q31_t sum = 0;                                 /* Temporary result storage */
+  
+#if defined (ARM_MATH_LOOPUNROLL)
+  q31_t in;
+#endif
+  
+#if defined (ARM_MATH_LOOPUNROLL)
+  
+  /* Loop unrolling: Compute 4 outputs at a time */
+  blkCnt = blockSize >> 2U;
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    in = read_q7x4_ia (&pSrc);
+    sum += ((in << 24U) >> 24U);
+    sum += ((in << 16U) >> 24U);
+    sum += ((in <<  8U) >> 24U);
+    sum +=  (in >> 24U);
+    
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+  
+  /* Loop unrolling: Compute remaining outputs */
+  blkCnt = blockSize % 0x4U;
+  
+#else
+  
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+  
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+  
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+    
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+  
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1])  */
+  /* Store result to destination */
+  *pResult = sum;
+}
+#endif /* defined(ARM_MATH_MVEI) */
+
+/**
+ @} end of Accumulation group
+ */