Implement some f64 methods

5 years ago · 7a4579d9a9
parent 9674e00294
commit 7a4579d9a9
45 changed files with 3596 additions and 0 deletions
--- a/Include/dsp/basic_math_functions.h
+++ b/Include/dsp/basic_math_functions.h
@ -100,6 +100,21 @@ extern "C"



+/**
+ * @brief Floating-point vector multiplication.
+ * @param[in]  pSrcA      points to the first input vector
+ * @param[in]  pSrcB      points to the second input vector
+ * @param[out] pDst       points to the output vector
+ * @param[in]  blockSize  number of samples in each vector
+ */
+void arm_mult_f64(
+const float64_t * pSrcA,
+const float64_t * pSrcB,
+	  float64_t * pDst,
+	  uint32_t blockSize);
+
+
+
 /**
   * @brief Floating-point vector addition.
   * @param[in]  pSrcA      points to the first input vector
@ -115,6 +130,21 @@ extern "C"



+/**
+  * @brief Floating-point vector addition.
+  * @param[in]  pSrcA      points to the first input vector
+  * @param[in]  pSrcB      points to the second input vector
+  * @param[out] pDst       points to the output vector
+  * @param[in]  blockSize  number of samples in each vector
+  */
+ void arm_add_f64(
+ const float64_t * pSrcA,
+ const float64_t * pSrcB,
+	   float64_t * pDst,
+	   uint32_t blockSize);
+
+
+
  /**
   * @brief Q7 vector addition.
   * @param[in]  pSrcA      points to the first input vector
@ -172,6 +202,21 @@ extern "C"



+  /**
+   * @brief Floating-point vector subtraction.
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
+  void arm_sub_f64(
+  const float64_t * pSrcA,
+  const float64_t * pSrcB,
+        float64_t * pDst,
+        uint32_t blockSize);
+
+
+
  /**
   * @brief Q7 vector subtraction.
   * @param[in]  pSrcA      points to the first input vector
@ -229,6 +274,21 @@ extern "C"



+  /**
+   * @brief Multiplies a floating-point vector by a scalar.
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  scale      scale factor to be applied
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
+  void arm_scale_f64(
+  const float64_t * pSrc,
+        float64_t scale,
+        float64_t * pDst,
+        uint32_t blockSize);
+
+
+
  /**
   * @brief Multiplies a Q7 vector by a scalar.
   * @param[in]  pSrc        points to the input vector
@ -302,6 +362,18 @@ extern "C"



+/**
+ * @brief Floating-point vector absolute value.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[out] pDst       points to the output buffer
+ * @param[in]  blockSize  number of samples in each vector
+ */
+void arm_abs_f64(
+const float64_t * pSrc,
+	  float64_t * pDst,
+	  uint32_t blockSize);
+
+

  /**
   * @brief Q15 vector absolute value.
@ -342,6 +414,21 @@ extern "C"



+/**
+ * @brief Dot product of floating-point vectors.
+ * @param[in]  pSrcA      points to the first input vector
+ * @param[in]  pSrcB      points to the second input vector
+ * @param[in]  blockSize  number of samples in each vector
+ * @param[out] result     output result returned here
+ */
+void arm_dot_prod_f64(
+const float64_t * pSrcA,
+const float64_t * pSrcB,
+	  uint32_t blockSize,
+	  float64_t * result);
+
+
+
  /**
   * @brief Dot product of Q7 vectors.
   * @param[in]  pSrcA      points to the first input vector
@ -426,6 +513,21 @@ extern "C"
        uint32_t blockSize);


+/**
+ * @brief  Adds a constant offset to a floating-point vector.
+ * @param[in]  pSrc       points to the input vector
+ * @param[in]  offset     is the offset to be added
+ * @param[out] pDst       points to the output vector
+ * @param[in]  blockSize  number of samples in the vector
+ */
+void arm_offset_f64(
+const float64_t * pSrc,
+	  float64_t offset,
+	  float64_t * pDst,
+	  uint32_t blockSize);
+
+
+
  /**
   * @brief  Adds a constant offset to a floating-point vector.
   * @param[in]  pSrc       points to the input vector
@ -495,6 +597,20 @@ extern "C"
        uint32_t blockSize);


+
+/**
+ * @brief  Negates the elements of a floating-point vector.
+ * @param[in]  pSrc       points to the input vector
+ * @param[out] pDst       points to the output vector
+ * @param[in]  blockSize  number of samples in the vector
+ */
+void arm_negate_f64(
+const float64_t * pSrc,
+	  float64_t * pDst,
+	  uint32_t blockSize);
+
+
+
  /**
   * @brief  Negates the elements of a Q7 vector.
   * @param[in]  pSrc       points to the input vector
--- a/Include/dsp/complex_math_functions.h
+++ b/Include/dsp/complex_math_functions.h
@ -96,6 +96,18 @@ extern "C"
        uint32_t numSamples);


+  /**
+   * @brief  Floating-point complex magnitude squared
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
+  void arm_cmplx_mag_squared_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t numSamples);
+
+
  /**
   * @brief  Q31 complex magnitude squared
   * @param[in]  pSrc        points to the complex input vector
@ -132,6 +144,18 @@ extern "C"
        uint32_t numSamples);


+/**
+   * @brief  Floating-point complex magnitude
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
+  void arm_cmplx_mag_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t numSamples);
+
+
  /**
   * @brief  Q31 complex magnitude
   * @param[in]  pSrc        points to the complex input vector
@ -288,6 +312,21 @@ extern "C"



+/**
+ * @brief  Floating-point complex-by-complex multiplication
+ * @param[in]  pSrcA       points to the first input vector
+ * @param[in]  pSrcB       points to the second input vector
+ * @param[out] pDst        points to the output vector
+ * @param[in]  numSamples  number of complex samples in each vector
+ */
+void arm_cmplx_mult_cmplx_f64(
+const float64_t * pSrcA,
+const float64_t * pSrcB,
+	  float64_t * pDst,
+	  uint32_t numSamples);
+
+
+
 #ifdef   __cplusplus
 }
 #endif
--- a/Include/dsp/distance_functions.h
+++ b/Include/dsp/distance_functions.h
@ -69,6 +69,17 @@ __attribute__((weak)) float __powisf2(float a, int b);

 float32_t arm_euclidean_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);

+/**
+ * @brief        Euclidean distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float64_t arm_euclidean_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize);
+
 /**
 * @brief        Bray-Curtis distance between two vectors
 * @param[in]    pA         First vector
@ -106,6 +117,17 @@ float32_t arm_canberra_distance_f32(const float32_t *pA,const float32_t *pB, uin
 float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);


+/**
+ * @brief        Chebyshev distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float64_t arm_chebyshev_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize);
+
+
 /**
 * @brief        Cityblock (Manhattan) distance between two vectors
 * @param[in]    pA         First vector
@ -116,6 +138,16 @@ float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, ui
 */
 float32_t arm_cityblock_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);

+/**
+ * @brief        Cityblock (Manhattan) distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float64_t arm_cityblock_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize);
+
 /**
 * @brief        Correlation distance between two vectors
 *
@ -141,6 +173,18 @@ float32_t arm_correlation_distance_f32(float32_t *pA,float32_t *pB, uint32_t blo

 float32_t arm_cosine_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize);

+/**
+ * @brief        Cosine distance between two vectors
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float64_t arm_cosine_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize);
+
 /**
 * @brief        Jensen-Shannon distance between two vectors
 *
--- a/Include/dsp/fast_math_functions.h
+++ b/Include/dsp/fast_math_functions.h
@ -149,6 +149,21 @@ extern "C"
        uint32_t blockSize);


+
+/**
+  @brief         Floating-point vector of log values.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+  void arm_vlog_f64(
+  const float64_t * pSrc,
+		float64_t * pDst,
+		uint32_t blockSize);
+
+
+
  /**
   * @brief  q31 vector of log values.
   * @param[in]     pSrc       points to the input vector in q31
@ -185,6 +200,22 @@ extern "C"
        float32_t * pDst,
        uint32_t blockSize);

+
+
+/**
+  @brief         Floating-point vector of exp values.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+  void arm_vexp_f64(
+  const float64_t * pSrc,
+		float64_t * pDst,
+		uint32_t blockSize);
+
+
+
 /**
   * @defgroup SQRT Square Root
   *
--- a/Include/dsp/filtering_functions.h
+++ b/Include/dsp/filtering_functions.h
@ -90,6 +90,16 @@ extern "C"
    const float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
  } arm_fir_instance_f32;

+  /**
+   * @brief Instance structure for the floating-point FIR filter.
+   */
+  typedef struct
+  {
+          uint16_t numTaps;     /**< number of filter coefficients in the filter. */
+          float64_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    const float64_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
+  } arm_fir_instance_f64;
+
  /**
   * @brief Processing function for the Q7 FIR filter.
   * @param[in]  S          points to an instance of the Q7 FIR filter structure.
@ -226,6 +236,19 @@ extern "C"
        float32_t * pDst,
        uint32_t blockSize);

+  /**
+   * @brief Processing function for the floating-point FIR filter.
+   * @param[in]  S          points to an instance of the floating-point FIR structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
+   */
+  void arm_fir_f64(
+  const arm_fir_instance_f64 * S,
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize);
+
  /**
   * @brief  Initialization function for the floating-point FIR filter.
   * @param[in,out] S          points to an instance of the floating-point FIR filter structure.
@ -241,6 +264,21 @@ extern "C"
        float32_t * pState,
        uint32_t blockSize);

+  /**
+   * @brief  Initialization function for the floating-point FIR filter.
+   * @param[in,out] S          points to an instance of the floating-point FIR filter structure.
+   * @param[in]     numTaps    Number of filter coefficients in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     blockSize  number of samples that are processed at a time.
+   */
+  void arm_fir_init_f64(
+        arm_fir_instance_f64 * S,
+        uint16_t numTaps,
+  const float64_t * pCoeffs,
+        float64_t * pState,
+        uint32_t blockSize);
+
  /**
   * @brief Instance structure for the Q15 Biquad cascade filter.
   */
@ -1796,6 +1834,22 @@ void arm_biquad_cascade_df2T_compute_coefs_f32(
        float32_t * pDst);


+  /**
+   * @brief Correlation of floating-point sequences.
+   * @param[in]  pSrcA    points to the first input sequence.
+   * @param[in]  srcALen  length of the first input sequence.
+   * @param[in]  pSrcB    points to the second input sequence.
+   * @param[in]  srcBLen  length of the second input sequence.
+   * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+   */
+  void arm_correlate_f64(
+  const float64_t * pSrcA,
+        uint32_t srcALen,
+  const float64_t * pSrcB,
+        uint32_t srcBLen,
+        float64_t * pDst);
+
+
 /**
 @brief Correlation of Q15 sequences
 @param[in]  pSrcA     points to the first input sequence
--- a/Include/dsp/statistics_functions.h
+++ b/Include/dsp/statistics_functions.h
@ -170,6 +170,18 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
        float32_t * pResult);


+  /**
+   * @brief  Sum of the squares of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void arm_power_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult);
+
+
  /**
   * @brief  Sum of the squares of the elements of a Q15 vector.
   * @param[in]  pSrc       is input pointer
@ -242,6 +254,18 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
        float32_t * pResult);


+  /**
+   * @brief  Mean value of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void arm_mean_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult);
+
+
  /**
   * @brief  Variance of the elements of a floating-point vector.
   * @param[in]  pSrc       is input pointer
@ -254,6 +278,18 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
        float32_t * pResult);


+  /**
+   * @brief  Variance of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void arm_var_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult);
+
+
  /**
   * @brief  Variance of the elements of a Q31 vector.
   * @param[in]  pSrc       is input pointer
@ -326,6 +362,18 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
        float32_t * pResult);


+  /**
+   * @brief  Standard deviation of the elements of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
+  void arm_std_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult);
+
+
  /**
   * @brief  Standard deviation of the elements of a Q31 vector.
   * @param[in]  pSrc       is input pointer
@ -459,6 +507,33 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
        uint32_t * pIndex);


+  /**
+   * @brief  Minimum value of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[out] pIndex     is the array index of the minimum value in the input buffer.
+   */
+  void arm_min_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex);
+
+  /**
+   * @brief  Minimum value of absolute values of a floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[out] pIndex     is the array index of the minimum value in the input buffer.
+   */
+  void arm_absmin_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex);
+
+
 /**
 * @brief Maximum value of a Q7 vector.
 * @param[in]  pSrc       points to the input buffer
@ -564,6 +639,32 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
        float32_t * pResult,
        uint32_t * pIndex);

+/**
+ * @brief Maximum value of a floating-point vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void arm_max_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex);
+
+/**
+ * @brief Maximum value of absolute values of a floating-point vector.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
+ */
+  void arm_absmax_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex);
+
  /**
    @brief         Maximum value of a floating-point vector.
    @param[in]     pSrc       points to the input vector
@ -576,6 +677,17 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA,
      uint32_t   blockSize,
      float32_t *pResult);

+  /**
+    @brief         Maximum value of a floating-point vector.
+    @param[in]     pSrc       points to the input vector
+    @param[in]     blockSize  number of samples in input vector
+    @param[out]    pResult    maximum value returned here
+    @return        none
+   */
+  void arm_max_no_idx_f64(
+      const float64_t *pSrc,
+      uint32_t   blockSize,
+      float64_t *pResult);



--- a/Include/dsp/support_functions.h
+++ b/Include/dsp/support_functions.h
@ -295,6 +295,20 @@ extern "C"
        float32_t * pDst,
        uint32_t blockSize);

+ 
+ 
+  /**
+   * @brief  Copies the elements of a floating-point vector.
+   * @param[in]  pSrc       input pointer
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+  void arm_copy_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize);
+
+

  /**
   * @brief  Copies the elements of a Q7 vector.
@ -344,6 +358,18 @@ extern "C"
        uint32_t blockSize);


+  /**
+   * @brief  Fills a constant value into a floating-point vector.
+   * @param[in]  value      input value to be filled
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+  void arm_fill_f64(
+        float64_t value,
+        float64_t * pDst,
+        uint32_t blockSize);
+
+
  /**
   * @brief  Fills a constant value into a Q7 vector.
   * @param[in]  value      input value to be filled
--- a/Source/BasicMathFunctions/BasicMathFunctions.c
+++ b/Source/BasicMathFunctions/BasicMathFunctions.c
@ -27,10 +27,12 @@
 */

 #include "arm_abs_f32.c"
+#include "arm_abs_f64.c"
 #include "arm_abs_q15.c"
 #include "arm_abs_q31.c"
 #include "arm_abs_q7.c"
 #include "arm_add_f32.c"
+#include "arm_add_f64.c"
 #include "arm_add_q15.c"
 #include "arm_add_q31.c"
 #include "arm_add_q7.c"
@ -38,14 +40,17 @@
 #include "arm_and_u32.c"
 #include "arm_and_u8.c"
 #include "arm_dot_prod_f32.c"
+#include "arm_dot_prod_f64.c"
 #include "arm_dot_prod_q15.c"
 #include "arm_dot_prod_q31.c"
 #include "arm_dot_prod_q7.c"
 #include "arm_mult_f32.c"
+#include "arm_mult_f64.c"
 #include "arm_mult_q15.c"
 #include "arm_mult_q31.c"
 #include "arm_mult_q7.c"
 #include "arm_negate_f32.c"
+#include "arm_negate_f64.c"
 #include "arm_negate_q15.c"
 #include "arm_negate_q31.c"
 #include "arm_negate_q7.c"
@ -53,6 +58,7 @@
 #include "arm_not_u32.c"
 #include "arm_not_u8.c"
 #include "arm_offset_f32.c"
+#include "arm_offset_f64.c"
 #include "arm_offset_q15.c"
 #include "arm_offset_q31.c"
 #include "arm_offset_q7.c"
@ -60,6 +66,7 @@
 #include "arm_or_u32.c"
 #include "arm_or_u8.c"
 #include "arm_scale_f32.c"
+#include "arm_scale_f64.c"
 #include "arm_scale_q15.c"
 #include "arm_scale_q31.c"
 #include "arm_scale_q7.c"
@ -67,6 +74,7 @@
 #include "arm_shift_q31.c"
 #include "arm_shift_q7.c"
 #include "arm_sub_f32.c"
+#include "arm_sub_f64.c"
 #include "arm_sub_q15.c"
 #include "arm_sub_q31.c"
 #include "arm_sub_q7.c"
--- a/Source/BasicMathFunctions/arm_abs_f64.c
+++ b/Source/BasicMathFunctions/arm_abs_f64.c
@ -0,0 +1,88 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_abs_f64.c
+ * Description:  Floating-point vector absolute value
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+#include <math.h>
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicAbs Vector Absolute Value
+
+  Computes the absolute value of a vector on an element-by-element basis.
+
+  <pre>
+      pDst[n] = abs(pSrc[n]),   0 <= n < blockSize.
+  </pre>
+
+  The functions support in-place computation allowing the source and
+  destination pointers to reference the same memory buffer.
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicAbs
+  @{
+ */
+
+/**
+  @brief         Floating-point vector absolute value.
+  @param[in]     pSrc       points to the input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+void arm_abs_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = |A| */
+
+    /* Calculate absolute and store result in destination buffer. */
+    *pDst++ = fabs(*pSrc++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of BasicAbs group
+ */
--- a/Source/BasicMathFunctions/arm_add_f64.c
+++ b/Source/BasicMathFunctions/arm_add_f64.c
@ -0,0 +1,87 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_add_f64.c
+ * Description:  Floating-point vector addition
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicAdd Vector Addition
+
+  Element-by-element addition of two vectors.
+
+  <pre>
+      pDst[n] = pSrcA[n] + pSrcB[n],   0 <= n < blockSize.
+  </pre>
+
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicAdd
+  @{
+ */
+
+/**
+  @brief         Floating-point vector addition.
+  @param[in]     pSrcA      points to first input vector
+  @param[in]     pSrcB      points to second input vector
+  @param[out]    pDst       points to output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+void arm_add_f64(
+  const float64_t * pSrcA,
+  const float64_t * pSrcB,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A + B */
+
+    /* Add and store result in destination buffer. */
+    *pDst++ = (*pSrcA++) + (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of BasicAdd group
+ */
--- a/Source/BasicMathFunctions/arm_dot_prod_f64.c
+++ b/Source/BasicMathFunctions/arm_dot_prod_f64.c
@ -0,0 +1,91 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_dot_prod_f64.c
+ * Description:  Floating-point dot product
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicDotProd Vector Dot Product
+
+  Computes the dot product of two vectors.
+  The vectors are multiplied element-by-element and then summed.
+
+  <pre>
+      sum = pSrcA[0]*pSrcB[0] + pSrcA[1]*pSrcB[1] + ... + pSrcA[blockSize-1]*pSrcB[blockSize-1]
+  </pre>
+
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicDotProd
+  @{
+ */
+
+/**
+  @brief         Dot product of floating-point vectors.
+  @param[in]     pSrcA      points to the first input vector.
+  @param[in]     pSrcB      points to the second input vector.
+  @param[in]     blockSize  number of samples in each vector.
+  @param[out]    result     output result returned here.
+  @return        none
+ */
+
+void arm_dot_prod_f64(
+  const float64_t * pSrcA,
+  const float64_t * pSrcB,
+        uint32_t blockSize,
+        float64_t * result)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  float64_t sum = 0.0f;                          /* Temporary return variable */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
+
+    /* Calculate dot product and store result in a temporary buffer. */
+    sum += (*pSrcA++) * (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result in destination buffer */
+  *result = sum;
+}
+
+/**
+  @} end of BasicDotProd group
+ */
--- a/Source/BasicMathFunctions/arm_mult_f64.c
+++ b/Source/BasicMathFunctions/arm_mult_f64.c
@ -0,0 +1,87 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mult_f64.c
+ * Description:  Floating-point vector multiplication
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicMult Vector Multiplication
+
+  Element-by-element multiplication of two vectors.
+
+  <pre>
+      pDst[n] = pSrcA[n] * pSrcB[n],   0 <= n < blockSize.
+  </pre>
+
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicMult
+  @{
+ */
+
+/**
+  @brief         Floating-point vector multiplication.
+  @param[in]     pSrcA      points to the first input vector.
+  @param[in]     pSrcB      points to the second input vector.
+  @param[out]    pDst       points to the output vector.
+  @param[in]     blockSize  number of samples in each vector.
+  @return        none
+ */
+
+void arm_mult_f64(
+  const float64_t * pSrcA,
+  const float64_t * pSrcB,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A * B */
+
+    /* Multiply input and store result in destination buffer. */
+    *pDst++ = (*pSrcA++) * (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of BasicMult group
+ */
--- a/Source/BasicMathFunctions/arm_negate_f64.c
+++ b/Source/BasicMathFunctions/arm_negate_f64.c
@ -0,0 +1,87 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_negate_f64.c
+ * Description:  Negates floating-point vectors
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicNegate Vector Negate
+
+  Negates the elements of a vector.
+
+  <pre>
+      pDst[n] = -pSrc[n],   0 <= n < blockSize.
+  </pre>
+
+  The functions support in-place computation allowing the source and
+  destination pointers to reference the same memory buffer.
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicNegate
+  @{
+ */
+
+/**
+  @brief         Negates the elements of a floating-point vector.
+  @param[in]     pSrc       points to input vector.
+  @param[out]    pDst       points to output vector.
+  @param[in]     blockSize  number of samples in each vector.
+  @return        none
+ */
+
+void arm_negate_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = -A */
+
+    /* Negate and store result in destination buffer. */
+    *pDst++ = -*pSrc++;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of BasicNegate group
+ */
--- a/Source/BasicMathFunctions/arm_offset_f64.c
+++ b/Source/BasicMathFunctions/arm_offset_f64.c
@ -0,0 +1,89 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_offset_f64.c
+ * Description:  Floating-point vector offset
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicOffset Vector Offset
+
+  Adds a constant offset to each element of a vector.
+
+  <pre>
+      pDst[n] = pSrc[n] + offset,   0 <= n < blockSize.
+  </pre>
+
+  The functions support in-place computation allowing the source and
+  destination pointers to reference the same memory buffer.
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicOffset
+  @{
+ */
+
+/**
+  @brief         Adds a constant offset to a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     offset     is the offset to be added
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+void arm_offset_f64(
+  const float64_t * pSrc,
+        float64_t offset,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A + offset */
+
+    /* Add offset and store result in destination buffer. */
+    *pDst++ = (*pSrc++) + offset;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of BasicOffset group
+ */
--- a/Source/BasicMathFunctions/arm_scale_f64.c
+++ b/Source/BasicMathFunctions/arm_scale_f64.c
@ -0,0 +1,102 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_scale_f64.c
+ * Description:  Multiplies a floating-point vector by a scalar
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicScale Vector Scale
+
+  Multiply a vector by a scalar value.  For floating-point data, the algorithm used is:
+
+  <pre>
+      pDst[n] = pSrc[n] * scale,   0 <= n < blockSize.
+  </pre>
+
+  In the fixed-point Q7, Q15, and Q31 functions, <code>scale</code> is represented by
+  a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.
+  The shift allows the gain of the scaling operation to exceed 1.0.
+  The algorithm used with fixed-point data is:
+
+  <pre>
+      pDst[n] = (pSrc[n] * scaleFract) << shift,   0 <= n < blockSize.
+  </pre>
+
+  The overall scale factor applied to the fixed-point data is
+  <pre>
+      scale = scaleFract * 2^shift.
+  </pre>
+
+  The functions support in-place computation allowing the source and destination
+  pointers to reference the same memory buffer.
+ */
+
+/**
+  @addtogroup BasicScale
+  @{
+ */
+
+/**
+  @brief         Multiplies a floating-point vector by a scalar.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     scale      scale factor to be applied
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+void arm_scale_f64(
+  const float64_t *pSrc,
+        float64_t scale,
+        float64_t *pDst,
+        uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A * scale */
+
+    /* Scale input and store result in destination buffer. */
+    *pDst++ = (*pSrc++) * scale;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of BasicScale group
+ */
--- a/Source/BasicMathFunctions/arm_sub_f64.c
+++ b/Source/BasicMathFunctions/arm_sub_f64.c
@ -0,0 +1,87 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_sub_f64.c
+ * Description:  Floating-point vector subtraction
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/basic_math_functions.h"
+
+/**
+  @ingroup groupMath
+ */
+
+/**
+  @defgroup BasicSub Vector Subtraction
+
+  Element-by-element subtraction of two vectors.
+
+  <pre>
+      pDst[n] = pSrcA[n] - pSrcB[n],   0 <= n < blockSize.
+  </pre>
+
+  There are separate functions for floating-point, Q7, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup BasicSub
+  @{
+ */
+
+/**
+  @brief         Floating-point vector subtraction.
+  @param[in]     pSrcA      points to the first input vector
+  @param[in]     pSrcB      points to the second input vector
+  @param[out]    pDst       points to the output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+
+void arm_sub_f64(
+  const float64_t * pSrcA,
+  const float64_t * pSrcB,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A - B */
+
+    /* Subtract and store result in destination buffer. */
+    *pDst++ = (*pSrcA++) - (*pSrcB++);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of BasicSub group
+ */
--- a/Source/ComplexMathFunctions/ComplexMathFunctions.c
+++ b/Source/ComplexMathFunctions/ComplexMathFunctions.c
@ -33,6 +33,7 @@
 #include "arm_cmplx_dot_prod_q15.c"
 #include "arm_cmplx_dot_prod_q31.c"
 #include "arm_cmplx_mag_f32.c"
+#include "arm_cmplx_mag_f64.c"

 #if (defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEI))  && !defined(ARM_MATH_AUTOVECTORIZE)
  #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q15_MVE)
@ -48,9 +49,11 @@
 #endif

 #include "arm_cmplx_mag_squared_f32.c"
+#include "arm_cmplx_mag_squared_f64.c"
 #include "arm_cmplx_mag_squared_q15.c"
 #include "arm_cmplx_mag_squared_q31.c"
 #include "arm_cmplx_mult_cmplx_f32.c"
+#include "arm_cmplx_mult_cmplx_f64.c"
 #include "arm_cmplx_mult_cmplx_q15.c"
 #include "arm_cmplx_mult_cmplx_q31.c"
 #include "arm_cmplx_mult_real_f32.c"
--- a/Source/ComplexMathFunctions/arm_cmplx_mag_f64.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mag_f64.c
@ -0,0 +1,100 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cmplx_mag_f64.c
+ * Description:  Floating-point complex magnitude
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/complex_math_functions.h"
+
+/**
+  @ingroup groupCmplxMath
+ */
+
+/**
+  @defgroup cmplx_mag Complex Magnitude
+
+  Computes the magnitude of the elements of a complex data vector.
+
+  The <code>pSrc</code> points to the source data and
+  <code>pDst</code> points to the where the result should be written.
+  <code>numSamples</code> specifies the number of complex samples
+  in the input array and the data is stored in an interleaved fashion
+  (real, imag, real, imag, ...).
+  The input array has a total of <code>2*numSamples</code> values;
+  the output array has a total of <code>numSamples</code> values.
+
+  The underlying algorithm is used:
+
+  <pre>
+  for (n = 0; n < numSamples; n++) {
+      pDst[n] = sqrt(pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2);
+  }
+  </pre>
+
+  There are separate functions for floating-point, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup cmplx_mag
+  @{
+ */
+
+/**
+  @brief         Floating-point complex magnitude.
+  @param[in]     pSrc        points to input vector
+  @param[out]    pDst        points to output vector
+  @param[in]     numSamples  number of samples in each vector
+  @return        none
+ */
+void arm_cmplx_mag_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t numSamples)
+{
+  uint32_t blkCnt;                               /* loop counter */
+  float64_t real, imag;                      /* Temporary variables to hold input values */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = numSamples;
+
+  while (blkCnt > 0U)
+  {
+    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
+
+    real = *pSrc++;
+    imag = *pSrc++;
+
+    /* store result in destination buffer. */
+    *pDst++ = sqrt((real * real) + (imag * imag));
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of cmplx_mag group
+ */
--- a/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f64.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f64.c
@ -0,0 +1,100 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cmplx_mag_squared_f64.c
+ * Description:  Floating-point complex magnitude squared
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/complex_math_functions.h"
+
+/**
+  @ingroup groupCmplxMath
+ */
+
+/**
+  @defgroup cmplx_mag_squared Complex Magnitude Squared
+
+  Computes the magnitude squared of the elements of a complex data vector.
+
+  The <code>pSrc</code> points to the source data and
+  <code>pDst</code> points to the where the result should be written.
+  <code>numSamples</code> specifies the number of complex samples
+  in the input array and the data is stored in an interleaved fashion
+  (real, imag, real, imag, ...).
+  The input array has a total of <code>2*numSamples</code> values;
+  the output array has a total of <code>numSamples</code> values.
+
+  The underlying algorithm is used:
+
+  <pre>
+  for (n = 0; n < numSamples; n++) {
+      pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
+  }
+  </pre>
+
+  There are separate functions for floating-point, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup cmplx_mag_squared
+  @{
+ */
+
+/**
+  @brief         Floating-point complex magnitude squared.
+  @param[in]     pSrc        points to input vector
+  @param[out]    pDst        points to output vector
+  @param[in]     numSamples  number of samples in each vector
+  @return        none
+ */
+void arm_cmplx_mag_squared_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t numSamples)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        float64_t real, imag;                          /* Temporary input variables */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = numSamples;
+
+  while (blkCnt > 0U)
+  {
+    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
+
+    real = *pSrc++;
+    imag = *pSrc++;
+
+    /* store result in destination buffer. */
+    *pDst++ = (real * real) + (imag * imag);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of cmplx_mag_squared group
+ */
--- a/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f64.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f64.c
@ -0,0 +1,105 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cmplx_mult_cmplx_f64.c
+ * Description:  Floating-point complex-by-complex multiplication
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/complex_math_functions.h"
+
+/**
+  @ingroup groupCmplxMath
+ */
+
+/**
+  @defgroup CmplxByCmplxMult Complex-by-Complex Multiplication
+
+  Multiplies a complex vector by another complex vector and generates a complex result.
+  The data in the complex arrays is stored in an interleaved fashion
+  (real, imag, real, imag, ...).
+  The parameter <code>numSamples</code> represents the number of complex
+  samples processed.  The complex arrays have a total of <code>2*numSamples</code>
+  real values.
+
+  The underlying algorithm is used:
+
+  <pre>
+  for (n = 0; n < numSamples; n++) {
+      pDst[(2*n)+0] = pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
+      pDst[(2*n)+1] = pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
+  }
+  </pre>
+
+  There are separate functions for floating-point, Q15, and Q31 data types.
+ */
+
+/**
+  @addtogroup CmplxByCmplxMult
+  @{
+ */
+
+/**
+  @brief         Floating-point complex-by-complex multiplication.
+  @param[in]     pSrcA       points to first input vector
+  @param[in]     pSrcB       points to second input vector
+  @param[out]    pDst        points to output vector
+  @param[in]     numSamples  number of samples in each vector
+  @return        none
+ */
+
+void arm_cmplx_mult_cmplx_f64(
+  const float64_t * pSrcA,
+  const float64_t * pSrcB,
+        float64_t * pDst,
+        uint32_t numSamples)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+  float64_t a, b, c, d;  /* Temporary variables to store real and imaginary values */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = numSamples;
+
+  while (blkCnt > 0U)
+  {
+    /* C[2 * i    ] = A[2 * i] * B[2 * i    ] - A[2 * i + 1] * B[2 * i + 1]. */
+    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i    ]. */
+
+    a = *pSrcA++;
+    b = *pSrcA++;
+    c = *pSrcB++;
+    d = *pSrcB++;
+
+    /* store result in destination buffer. */
+    *pDst++ = (a * c) - (b * d);
+    *pDst++ = (a * d) + (b * c);
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**
+  @} end of CmplxByCmplxMult group
+ */
--- a/Source/DistanceFunctions/DistanceFunctions.c
+++ b/Source/DistanceFunctions/DistanceFunctions.c
@ -30,11 +30,15 @@
 #include "arm_braycurtis_distance_f32.c"
 #include "arm_canberra_distance_f32.c"
 #include "arm_chebyshev_distance_f32.c"
+#include "arm_chebyshev_distance_f64.c"
 #include "arm_cityblock_distance_f32.c"
+#include "arm_cityblock_distance_f64.c"
 #include "arm_correlation_distance_f32.c"
 #include "arm_cosine_distance_f32.c"
+#include "arm_cosine_distance_f64.c"
 #include "arm_dice_distance.c"
 #include "arm_euclidean_distance_f32.c"
+#include "arm_euclidean_distance_f64.c"
 #include "arm_hamming_distance.c"
 #include "arm_jaccard_distance.c"
 #include "arm_jensenshannon_distance_f32.c"
--- a/Source/DistanceFunctions/arm_chebyshev_distance_f64.c
+++ b/Source/DistanceFunctions/arm_chebyshev_distance_f64.c
@ -0,0 +1,76 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_chebyshev_distance_f64.c
+ * Description:  Chebyshev distance between two vectors
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @addtogroup Chebyshev
+  @{
+ */
+
+
+/**
+ * @brief        Chebyshev distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float64_t arm_chebyshev_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize)
+{
+   float64_t diff=0.0f,  maxVal,tmpA, tmpB;
+
+   tmpA = *pA++;
+   tmpB = *pB++;
+   diff = fabs(tmpA - tmpB);
+   maxVal = diff;
+   blockSize--;
+
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+      diff = fabs(tmpA - tmpB);
+      if (diff > maxVal)
+      {
+        maxVal = diff;
+      }
+      blockSize --;
+   }
+  
+   return(maxVal);
+}
+
+/**
+ * @} end of Chebyshev group
+ */
--- a/Source/DistanceFunctions/arm_cityblock_distance_f64.c
+++ b/Source/DistanceFunctions/arm_cityblock_distance_f64.c
@ -0,0 +1,67 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cityblock_distance_f64.c
+ * Description:  Cityblock (Manhattan) distance between two vectors
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+/**
+  @addtogroup Manhattan
+  @{
+ */
+
+
+/**
+ * @brief        Cityblock (Manhattan) distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float64_t arm_cityblock_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize)
+{
+   float64_t accum,tmpA, tmpB;
+
+   accum = 0.0f;
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+      accum  += fabs(tmpA - tmpB);
+      
+      blockSize --;
+   }
+  
+   return(accum);
+}
+
+/**
+ * @} end of Manhattan group
+ */
--- a/Source/DistanceFunctions/arm_cosine_distance_f64.c
+++ b/Source/DistanceFunctions/arm_cosine_distance_f64.c
@ -0,0 +1,70 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cosine_distance_f64.c
+ * Description:  Cosine distance between two vectors
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @addtogroup CosineDist
+  @{
+ */
+
+
+
+/**
+ * @brief        Cosine distance between two vectors
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float64_t arm_cosine_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize)
+{
+    float64_t pwra,pwrb,dot,tmp;
+
+    arm_power_f64(pA, blockSize, &pwra);
+    arm_power_f64(pB, blockSize, &pwrb);
+
+    arm_dot_prod_f64(pA,pB,blockSize,&dot);
+
+    tmp = sqrt(pwra * pwrb);
+    return(1.0f - dot / tmp);
+
+}
+
+
+
+/**
+ * @} end of CosineDist group
+ */
--- a/Source/DistanceFunctions/arm_euclidean_distance_f64.c
+++ b/Source/DistanceFunctions/arm_euclidean_distance_f64.c
@ -0,0 +1,66 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_euclidean_distance_f64.c
+ * Description:  Euclidean distance between two vectors
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+
+/**
+  @addtogroup Euclidean
+  @{
+ */
+
+
+/**
+ * @brief        Euclidean distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float64_t arm_euclidean_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize)
+{
+   float64_t accum=0.0f,tmp;
+
+   while(blockSize > 0)
+   {
+      tmp = *pA++ - *pB++;
+      accum += SQ(tmp);
+      blockSize --;
+   }
+   tmp = sqrt(accum);
+   return(tmp);
+}
+
+/**
+ * @} end of Euclidean group
+ */
--- a/Source/FastMathFunctions/FastMathFunctions.c
+++ b/Source/FastMathFunctions/FastMathFunctions.c
@ -57,7 +57,9 @@
 #include "arm_sqrt_q15.c"
 #include "arm_sqrt_q31.c"
 #include "arm_vexp_f32.c"
+#include "arm_vexp_f64.c"
 #include "arm_vlog_f32.c"
+#include "arm_vlog_f64.c"
 #include "arm_divide_q15.c"
 #include "arm_vlog_q31.c"
 #include "arm_vlog_q15.c"
--- a/Source/FastMathFunctions/arm_vexp_f64.c
+++ b/Source/FastMathFunctions/arm_vexp_f64.c
@ -0,0 +1,51 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_vlog_f64.c
+ * Description:  Fast vectorized log
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/fast_math_functions.h"
+#include "arm_common_tables.h"
+
+void arm_vexp_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+   uint32_t blkCnt; 
+
+   blkCnt = blockSize;
+
+   while (blkCnt > 0U)
+   {
+      /* C = log(A) */
+  
+      /* Calculate log and store result in destination buffer. */
+      *pDst++ = exp(*pSrc++);
+  
+      /* Decrement loop counter */
+      blkCnt--;
+   }
+}
--- a/Source/FastMathFunctions/arm_vlog_f64.c
+++ b/Source/FastMathFunctions/arm_vlog_f64.c
@ -0,0 +1,51 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_vlog_f64.c
+ * Description:  Fast vectorized log
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/fast_math_functions.h"
+#include "arm_common_tables.h"
+
+void arm_vlog_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+   uint32_t blkCnt; 
+
+   blkCnt = blockSize;
+
+   while (blkCnt > 0U)
+   {
+      /* C = log(A) */
+  
+      /* Calculate log and store result in destination buffer. */
+      *pDst++ = log(*pSrc++);
+  
+      /* Decrement loop counter */
+      blkCnt--;
+   }
+}
--- a/Source/FilteringFunctions/FilteringFunctions.c
+++ b/Source/FilteringFunctions/FilteringFunctions.c
@ -61,6 +61,7 @@
 #include "arm_conv_q31.c"
 #include "arm_conv_q7.c"
 #include "arm_correlate_f32.c"
+#include "arm_correlate_f64.c"
 #include "arm_correlate_fast_opt_q15.c"
 #include "arm_correlate_fast_q15.c"
 #include "arm_correlate_fast_q31.c"
@ -78,9 +79,11 @@
 #include "arm_fir_decimate_q15.c"
 #include "arm_fir_decimate_q31.c"
 #include "arm_fir_f32.c"
+#include "arm_fir_f64.c"
 #include "arm_fir_fast_q15.c"
 #include "arm_fir_fast_q31.c"
 #include "arm_fir_init_f32.c"
+#include "arm_fir_init_f64.c"
 #include "arm_fir_init_q15.c"
 #include "arm_fir_init_q31.c"
 #include "arm_fir_init_q7.c"
--- a/Source/FilteringFunctions/arm_correlate_f64.c
+++ b/Source/FilteringFunctions/arm_correlate_f64.c
@ -0,0 +1,411 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_correlate_f64.c
+ * Description:  Correlation of floating-point sequences
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/filtering_functions.h"
+
+/**
+  @ingroup groupFilters
+ */
+
+/**
+  @defgroup Corr Correlation
+
+  Correlation is a mathematical operation that is similar to convolution.
+  As with convolution, correlation uses two signals to produce a third signal.
+  The underlying algorithms in correlation and convolution are identical except that one of the inputs is flipped in convolution.
+  Correlation is commonly used to measure the similarity between two signals.
+  It has applications in pattern recognition, cryptanalysis, and searching.
+  The CMSIS library provides correlation functions for Q7, Q15, Q31 and floating-point data types.
+  Fast versions of the Q15 and Q31 functions are also provided.
+
+  @par           Algorithm
+                   Let <code>a[n]</code> and <code>b[n]</code> be sequences of length <code>srcALen</code> and <code>srcBLen</code> samples respectively.
+                   The convolution of the two signals is denoted by
+  <pre>
+      c[n] = a[n] * b[n]
+  </pre>
+                   In correlation, one of the signals is flipped in time
+  <pre>
+       c[n] = a[n] * b[-n]
+  </pre>
+  @par
+                   and this is mathematically defined as
+                   \image html CorrelateEquation.gif
+  @par
+                   The <code>pSrcA</code> points to the first input vector of length <code>srcALen</code> and <code>pSrcB</code> points to the second input vector of length <code>srcBLen</code>.
+                   The result <code>c[n]</code> is of length <code>2 * max(srcALen, srcBLen) - 1</code> and is defined over the interval <code>n=0, 1, 2, ..., (2 * max(srcALen, srcBLen) - 2)</code>.
+                   The output result is written to <code>pDst</code> and the calling function must allocate <code>2 * max(srcALen, srcBLen) - 1</code> words for the result.
+
+  @note
+                   The <code>pDst</code> should be initialized to all zeros before being used.
+
+  @par           Fixed-Point Behavior
+                   Correlation requires summing up a large number of intermediate products.
+                   As such, the Q7, Q15, and Q31 functions run a risk of overflow and saturation.
+                   Refer to the function specific documentation below for further details of the particular algorithm used.
+
+  @par           Fast Versions
+                   Fast versions are supported for Q31 and Q15.  Cycles for Fast versions are less compared to Q31 and Q15 of correlate and the design requires
+                   the input signals should be scaled down to avoid intermediate overflows.
+
+  @par           Opt Versions
+                   Opt versions are supported for Q15 and Q7.  Design uses internal scratch buffer for getting good optimisation.
+                   These versions are optimised in cycles and consumes more memory (Scratch memory) compared to Q15 and Q7 versions of correlate
+ */
+
+/**
+  @addtogroup Corr
+  @{
+ */
+
+/**
+  @brief         Correlation of floating-point sequences.
+  @param[in]     pSrcA      points to the first input sequence
+  @param[in]     srcALen    length of the first input sequence
+  @param[in]     pSrcB      points to the second input sequence
+  @param[in]     srcBLen    length of the second input sequence
+  @param[out]    pDst       points to the location where the output result is written.  Length 2 * max(srcALen, srcBLen) - 1.
+  @return        none
+ */
+
+void arm_correlate_f64(
+  const float64_t * pSrcA,
+        uint32_t srcALen,
+  const float64_t * pSrcB,
+        uint32_t srcBLen,
+        float64_t * pDst)
+{
+  const float64_t *pIn1;                               /* InputA pointer */
+  const float64_t *pIn2;                               /* InputB pointer */
+        float64_t *pOut = pDst;                        /* Output pointer */
+  const float64_t *px;                                 /* Intermediate inputA pointer */
+  const float64_t *py;                                 /* Intermediate inputB pointer */
+  const float64_t *pSrc1;
+        float64_t sum;
+        uint32_t blockSize1, blockSize2, blockSize3;   /* Loop counters */
+        uint32_t j, k, count, blkCnt;                  /* Loop counters */
+        uint32_t outBlockSize;                         /* Loop counter */
+        int32_t inc = 1;                               /* Destination address modifier */
+
+  /* The algorithm implementation is based on the lengths of the inputs. */
+  /* srcB is always made to slide across srcA. */
+  /* So srcBLen is always considered as shorter or equal to srcALen */
+  /* But CORR(x, y) is reverse of CORR(y, x) */
+  /* So, when srcBLen > srcALen, output pointer is made to point to the end of the output buffer */
+  /* and the destination pointer modifier, inc is set to -1 */
+  /* If srcALen > srcBLen, zero pad has to be done to srcB to make the two inputs of same length */
+  /* But to improve the performance,
+   * we assume zeroes in the output instead of zero padding either of the the inputs*/
+  /* If srcALen > srcBLen,
+   * (srcALen - srcBLen) zeroes has to included in the starting of the output buffer */
+  /* If srcALen < srcBLen,
+   * (srcALen - srcBLen) zeroes has to included in the ending of the output buffer */
+  if (srcALen >= srcBLen)
+  {
+    /* Initialization of inputA pointer */
+    pIn1 = pSrcA;
+
+    /* Initialization of inputB pointer */
+    pIn2 = pSrcB;
+
+    /* Number of output samples is calculated */
+    outBlockSize = (2U * srcALen) - 1U;
+
+    /* When srcALen > srcBLen, zero padding has to be done to srcB
+     * to make their lengths equal.
+     * Instead, (outBlockSize - (srcALen + srcBLen - 1))
+     * number of output samples are made zero */
+    j = outBlockSize - (srcALen + (srcBLen - 1U));
+
+    /* Updating the pointer position to non zero value */
+    pOut += j;
+  }
+  else
+  {
+    /* Initialization of inputA pointer */
+    pIn1 = pSrcB;
+
+    /* Initialization of inputB pointer */
+    pIn2 = pSrcA;
+
+    /* srcBLen is always considered as shorter or equal to srcALen */
+    j = srcBLen;
+    srcBLen = srcALen;
+    srcALen = j;
+
+    /* CORR(x, y) = Reverse order(CORR(y, x)) */
+    /* Hence set the destination pointer to point to the last output sample */
+    pOut = pDst + ((srcALen + srcBLen) - 2U);
+
+    /* Destination address modifier is set to -1 */
+    inc = -1;
+  }
+
+  /* The function is internally
+   * divided into three stages according to the number of multiplications that has to be
+   * taken place between inputA samples and inputB samples. In the first stage of the
+   * algorithm, the multiplications increase by one for every iteration.
+   * In the second stage of the algorithm, srcBLen number of multiplications are done.
+   * In the third stage of the algorithm, the multiplications decrease by one
+   * for every iteration. */
+
+  /* The algorithm is implemented in three stages.
+     The loop counters of each stage is initiated here. */
+  blockSize1 = srcBLen - 1U;
+  blockSize2 = srcALen - (srcBLen - 1U);
+  blockSize3 = blockSize1;
+
+  /* --------------------------
+   * Initializations of stage1
+   * -------------------------*/
+
+  /* sum = x[0] * y[srcBlen - 1]
+   * sum = x[0] * y[srcBlen-2] + x[1] * y[srcBlen - 1]
+   * ....
+   * sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen - 1] * y[srcBLen - 1]
+   */
+
+  /* In this stage the MAC operations are increased by 1 for every iteration.
+     The count variable holds the number of MAC operations performed */
+  count = 1U;
+
+  /* Working pointer of inputA */
+  px = pIn1;
+
+  /* Working pointer of inputB */
+  pSrc1 = pIn2 + (srcBLen - 1U);
+  py = pSrc1;
+
+  /* ------------------------
+   * Stage1 process
+   * ----------------------*/
+
+  /* The first stage starts here */
+  while (blockSize1 > 0U)
+  {
+    /* Accumulator is made zero for every iteration */
+    sum = 0.0f;
+
+    /* Initialize k with number of samples */
+    k = count;
+
+    while (k > 0U)
+    {
+      /* Perform the multiply-accumulate */
+      /* x[0] * y[srcBLen - 1] */
+      sum += *px++ * *py++;
+
+      /* Decrement loop counter */
+      k--;
+    }
+
+    /* Store the result in the accumulator in the destination buffer. */
+    *pOut = sum;
+    /* Destination pointer is updated according to the address modifier, inc */
+    pOut += inc;
+
+    /* Update the inputA and inputB pointers for next MAC calculation */
+    py = pSrc1 - count;
+    px = pIn1;
+
+    /* Increment MAC count */
+    count++;
+
+    /* Decrement loop counter */
+    blockSize1--;
+  }
+
+  /* --------------------------
+   * Initializations of stage2
+   * ------------------------*/
+
+  /* sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen-1] * y[srcBLen-1]
+   * sum = x[1] * y[0] + x[2] * y[1] +...+ x[srcBLen]   * y[srcBLen-1]
+   * ....
+   * sum = x[srcALen-srcBLen-2] * y[0] + x[srcALen-srcBLen-1] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]
+   */
+
+  /* Working pointer of inputA */
+  px = pIn1;
+
+  /* Working pointer of inputB */
+  py = pIn2;
+
+  /* count is index by which the pointer pIn1 to be incremented */
+  count = 0U;
+
+  /* -------------------
+   * Stage2 process
+   * ------------------*/
+
+  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.
+   * So, to loop unroll over blockSize2,
+   * srcBLen should be greater than or equal to 4 */
+  if (srcBLen >= 4U)
+  {
+    /* Initialize blkCnt with number of samples */
+    blkCnt = blockSize2;
+
+    while (blkCnt > 0U)
+    {
+      /* Accumulator is made zero for every iteration */
+      sum = 0.0f;
+
+      /* Initialize blkCnt with number of samples */
+      k = srcBLen;
+
+      while (k > 0U)
+      {
+        /* Perform the multiply-accumulate */
+        sum += *px++ * *py++;
+
+        /* Decrement the loop counter */
+        k--;
+      }
+
+      /* Store the result in the accumulator in the destination buffer. */
+      *pOut = sum;
+
+      /* Destination pointer is updated according to the address modifier, inc */
+      pOut += inc;
+
+      /* Increment the pointer pIn1 index, count by 1 */
+      count++;
+
+      /* Update the inputA and inputB pointers for next MAC calculation */
+      px = pIn1 + count;
+      py = pIn2;
+
+      /* Decrement the loop counter */
+      blkCnt--;
+    }
+  }
+  else
+  {
+    /* If the srcBLen is not a multiple of 4,
+     * the blockSize2 loop cannot be unrolled by 4 */
+    blkCnt = blockSize2;
+
+    while (blkCnt > 0U)
+    {
+      /* Accumulator is made zero for every iteration */
+      sum = 0.0f;
+
+      /* Loop over srcBLen */
+      k = srcBLen;
+
+      while (k > 0U)
+      {
+        /* Perform the multiply-accumulate */
+        sum += *px++ * *py++;
+
+        /* Decrement the loop counter */
+        k--;
+      }
+
+      /* Store the result in the accumulator in the destination buffer. */
+      *pOut = sum;
+      /* Destination pointer is updated according to the address modifier, inc */
+      pOut += inc;
+
+      /* Increment the pointer pIn1 index, count by 1 */
+      count++;
+
+      /* Update the inputA and inputB pointers for next MAC calculation */
+      px = pIn1 + count;
+      py = pIn2;
+
+      /* Decrement the loop counter */
+      blkCnt--;
+    }
+  }
+
+
+  /* --------------------------
+   * Initializations of stage3
+   * -------------------------*/
+
+  /* sum += x[srcALen-srcBLen+1] * y[0] + x[srcALen-srcBLen+2] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]
+   * sum += x[srcALen-srcBLen+2] * y[0] + x[srcALen-srcBLen+3] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]
+   * ....
+   * sum +=  x[srcALen-2] * y[0] + x[srcALen-1] * y[1]
+   * sum +=  x[srcALen-1] * y[0]
+   */
+
+  /* In this stage the MAC operations are decreased by 1 for every iteration.
+     The count variable holds the number of MAC operations performed */
+  count = srcBLen - 1U;
+
+  /* Working pointer of inputA */
+  pSrc1 = pIn1 + (srcALen - (srcBLen - 1U));
+  px = pSrc1;
+
+  /* Working pointer of inputB */
+  py = pIn2;
+
+  /* -------------------
+   * Stage3 process
+   * ------------------*/
+
+  while (blockSize3 > 0U)
+  {
+    /* Accumulator is made zero for every iteration */
+    sum = 0.0f;
+
+    /* Initialize blkCnt with number of samples */
+    k = count;
+
+    while (k > 0U)
+    {
+      /* Perform the multiply-accumulate */
+      sum += *px++ * *py++;
+
+      /* Decrement loop counter */
+      k--;
+    }
+
+    /* Store the result in the accumulator in the destination buffer. */
+    *pOut = sum;
+    /* Destination pointer is updated according to the address modifier, inc */
+    pOut += inc;
+
+    /* Update the inputA and inputB pointers for next MAC calculation */
+    px = ++pSrc1;
+    py = pIn2;
+
+    /* Decrement MAC count */
+    count--;
+
+    /* Decrement the loop counter */
+    blockSize3--;
+  }
+}
+
+/**
+  @} end of Corr group
+ */
--- a/Source/FilteringFunctions/arm_fir_f64.c
+++ b/Source/FilteringFunctions/arm_fir_f64.c
@ -0,0 +1,231 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_fir_f64.c
+ * Description:  Floating-point FIR filter processing function
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/filtering_functions.h"
+
+/**
+  @ingroup groupFilters
+ */
+
+/**
+  @defgroup FIR Finite Impulse Response (FIR) Filters
+
+  This set of functions implements Finite Impulse Response (FIR) filters
+  for Q7, Q15, Q31, and floating-point data types.  Fast versions of Q15 and Q31 are also provided.
+  The functions operate on blocks of input and output data and each call to the function processes
+  <code>blockSize</code> samples through the filter.  <code>pSrc</code> and
+  <code>pDst</code> points to input and output arrays containing <code>blockSize</code> values.
+
+  @par           Algorithm
+                   The FIR filter algorithm is based upon a sequence of multiply-accumulate (MAC) operations.
+                   Each filter coefficient <code>b[n]</code> is multiplied by a state variable which equals a previous input sample <code>x[n]</code>.
+  <pre>
+      y[n] = b[0] * x[n] + b[1] * x[n-1] + b[2] * x[n-2] + ...+ b[numTaps-1] * x[n-numTaps+1]
+  </pre>
+  @par
+                   \image html FIR.GIF "Finite Impulse Response filter"
+  @par
+                   <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.
+                   Coefficients are stored in time reversed order.
+  @par
+  <pre>
+      {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
+  </pre>
+  @par
+                   <code>pState</code> points to a state array of size <code>numTaps + blockSize - 1</code>.
+                   Samples in the state buffer are stored in the following order.
+  @par
+  <pre>
+      {x[n-numTaps+1], x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2]....x[n](==pSrc[0]), x[n+1](==pSrc[1]), ..., x[n+blockSize-1](==pSrc[blockSize-1])}
+  </pre>
+  @par
+                   Note that the length of the state buffer exceeds the length of the coefficient array by <code>blockSize-1</code>.
+                   The increased state buffer length allows circular addressing, which is traditionally used in the FIR filters,
+                   to be avoided and yields a significant speed improvement.
+                   The state variables are updated after each block of data is processed; the coefficients are untouched.
+
+  @par           Instance Structure
+                   The coefficients and state variables for a filter are stored together in an instance data structure.
+                   A separate instance structure must be defined for each filter.
+                   Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
+                   There are separate instance structure declarations for each of the 4 supported data types.
+
+  @par           Initialization Functions
+                   There is also an associated initialization function for each data type.
+                   The initialization function performs the following operations:
+                   - Sets the values of the internal structure fields.
+                   - Zeros out the values in the state buffer.
+                   To do this manually without calling the init function, assign the follow subfields of the instance structure:
+                   numTaps, pCoeffs, pState. Also set all of the values in pState to zero.
+  @par
+                   Use of the initialization function is optional.
+                   However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
+                   To place an instance structure into a const data section, the instance structure must be manually initialized.
+                   Set the values in the state buffer to zeros before static initialization.
+                   The code below statically initializes each of the 4 different data type filter instance structures
+  <pre>
+      arm_fir_instance_f32 S = {numTaps, pState, pCoeffs};
+      arm_fir_instance_q31 S = {numTaps, pState, pCoeffs};
+      arm_fir_instance_q15 S = {numTaps, pState, pCoeffs};
+      arm_fir_instance_q7 S =  {numTaps, pState, pCoeffs};
+  </pre>
+                   where <code>numTaps</code> is the number of filter coefficients in the filter; <code>pState</code> is the address of the state buffer;
+                   <code>pCoeffs</code> is the address of the coefficient buffer.
+  @par          Initialization of Helium version
+                 For Helium version the array of coefficients must be padded with zero to contain
+                 a full number of lanes.
+
+                 The array length L must be a multiple of x. L = x * a :
+                 - x is 4  for f32
+                 - x is 4  for q31
+                 - x is 4  for f16 (so managed like the f32 version and not like the q15 one)
+                 - x is 8  for q15
+                 - x is 16 for q7
+
+                 The additional coefficients 
+                 (x * a - numTaps) must be set to 0.
+                 numTaps is still set to its right value in the init function. It means that
+                 the implementation may require to read more coefficients due to the vectorization and
+                 to avoid having to manage too many different cases in the code.
+
+                
+  @par          Helium state buffer
+                 The state buffer must contain some additional temporary data
+                 used during the computation but which is not the state of the FIR.
+                 The first A samples are temporary data.
+                 The remaining samples are the state of the FIR filter.
+  @par                 
+                 So the state buffer has size <code> numTaps + A + blockSize - 1 </code> :
+                 - A is blockSize for f32
+                 - A is 8*ceil(blockSize/8) for f16
+                 - A is 8*ceil(blockSize/4) for q31
+                 - A is 0 for other datatypes (q15 and q7)
+
+
+  @par           Fixed-Point Behavior
+                   Care must be taken when using the fixed-point versions of the FIR filter functions.
+                   In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
+                   Refer to the function specific documentation below for usage guidelines.
+
+ */
+
+/**
+  @addtogroup FIR
+  @{
+ */
+
+/**
+  @brief         Processing function for floating-point FIR filter.
+  @param[in]     S          points to an instance of the floating-point FIR filter structure
+  @param[in]     pSrc       points to the block of input data
+  @param[out]    pDst       points to the block of output data
+  @param[in]     blockSize  number of samples to process
+  @return        none
+ */
+
+void arm_fir_f64(
+  const arm_fir_instance_f64 * S,
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+        float64_t *pState = S->pState;                 /* State pointer */
+  const float64_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
+        float64_t *pStateCurnt;                        /* Points to the current sample of the state */
+        float64_t *px;                                 /* Temporary pointer for state buffer */
+  const float64_t *pb;                                 /* Temporary pointer for coefficient buffer */
+        float64_t acc0;                                /* Accumulator */
+        uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
+        uint32_t i, tapCnt, blkCnt;                    /* Loop counters */
+
+  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
+  /* pStateCurnt points to the location where the new input data should be written */
+  pStateCurnt = &(S->pState[(numTaps - 1U)]);
+
+  /* Initialize blkCnt with number of taps */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* Copy one sample at a time into state buffer */
+    *pStateCurnt++ = *pSrc++;
+
+    /* Set the accumulator to zero */
+    acc0 = 0.0f;
+
+    /* Initialize state pointer */
+    px = pState;
+
+    /* Initialize Coefficient pointer */
+    pb = pCoeffs;
+
+    i = numTaps;
+
+    /* Perform the multiply-accumulates */
+    while (i > 0U)
+    {
+      /* acc =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
+      acc0 += *px++ * *pb++;
+
+      i--;
+    }
+
+    /* Store result in destination buffer. */
+    *pDst++ = acc0;
+
+    /* Advance state pointer by 1 for the next sample */
+    pState = pState + 1U;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Processing is complete.
+     Now copy the last numTaps - 1 samples to the start of the state buffer.
+     This prepares the state buffer for the next function call. */
+
+  /* Points to the start of the state buffer */
+  pStateCurnt = S->pState;
+
+  /* Initialize tapCnt with number of taps */
+  tapCnt = (numTaps - 1U);
+
+  /* Copy remaining data */
+  while (tapCnt > 0U)
+  {
+    *pStateCurnt++ = *pState++;
+
+    /* Decrement loop counter */
+    tapCnt--;
+  }
+
+}
+
+/**
+* @} end of FIR group
+*/
--- a/Source/FilteringFunctions/arm_fir_init_f64.c
+++ b/Source/FilteringFunctions/arm_fir_init_f64.c
@ -0,0 +1,95 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_fir_init_f64.c
+ * Description:  Floating-point FIR filter initialization function
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/filtering_functions.h"
+
+/**
+  @ingroup groupFilters
+ */
+
+/**
+  @addtogroup FIR
+  @{
+ */
+
+/**
+  @brief         Initialization function for the floating-point FIR filter.
+  @param[in,out] S          points to an instance of the floating-point FIR filter structure
+  @param[in] 	 numTaps    number of filter coefficients in the filter
+  @param[in]     pCoeffs    points to the filter coefficients buffer
+  @param[in]     pState     points to the state buffer
+  @param[in]     blockSize  number of samples processed per call
+  @return        none
+
+  @par           Details
+                   <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:
+  <pre>
+      {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}
+  </pre>
+  @par
+                   <code>pState</code> points to the array of state variables and some working memory for the Helium version.
+                   <code>pState</code> is of length <code>numTaps+blockSize-1</code> samples (except for Helium - see below), where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_f32()</code>.
+  @par          Initialization of Helium version
+                 For Helium version the array of coefficients must be a multiple of 4 (4a) even if less
+                 then 4a coefficients are defined in the FIR. The additional coefficients 
+                 (4a - numTaps) must be set to 0.
+                 numTaps is still set to its right value in the init function. It means that
+                 the implementation may require to read more coefficients due to the vectorization and
+                 to avoid having to manage too many different cases in the code.
+
+  @par          Helium state buffer
+                 The state buffer must contain some additional temporary data
+                 used during the computation but which is not the state of the FIR.
+                 The first blockSize samples are temporary data.
+                 The remaining samples are the state of the FIR filter.
+                 So the state buffer has size <code> numTaps + 2 * blockSize - 1 </code>
+
+ */
+
+void arm_fir_init_f64(
+        arm_fir_instance_f64 * S,
+        uint16_t numTaps,
+  const float64_t * pCoeffs,
+        float64_t * pState,
+        uint32_t blockSize)
+{
+  /* Assign filter taps */
+  S->numTaps = numTaps;
+
+  /* Assign coefficient pointer */
+  S->pCoeffs = pCoeffs;
+
+  /* Clear state buffer. The size is always (blockSize + numTaps - 1) */
+  memset(pState, 0, (numTaps + (blockSize - 1U)) * sizeof(float64_t));
+  /* Assign state pointer */
+  S->pState = pState;
+}
+
+/**
+  @} end of FIR group
+ */
--- a/Source/StatisticsFunctions/StatisticsFunctions.c
+++ b/Source/StatisticsFunctions/StatisticsFunctions.c
@ -33,19 +33,24 @@
 #include "arm_logsumexp_dot_prod_f32.c"
 #include "arm_logsumexp_f32.c"
 #include "arm_max_f32.c"
+#include "arm_max_f64.c"
 #include "arm_max_q15.c"
 #include "arm_max_q31.c"
 #include "arm_max_q7.c"
 #include "arm_max_no_idx_f32.c"
+#include "arm_max_no_idx_f64.c"
 #include "arm_mean_f32.c"
+#include "arm_mean_f64.c"
 #include "arm_mean_q15.c"
 #include "arm_mean_q31.c"
 #include "arm_mean_q7.c"
 #include "arm_min_f32.c"
+#include "arm_min_f64.c"
 #include "arm_min_q15.c"
 #include "arm_min_q31.c"
 #include "arm_min_q7.c"
 #include "arm_power_f32.c"
+#include "arm_power_f64.c"
 #include "arm_power_q15.c"
 #include "arm_power_q31.c"
 #include "arm_power_q7.c"
@ -53,16 +58,20 @@
 #include "arm_rms_q15.c"
 #include "arm_rms_q31.c"
 #include "arm_std_f32.c"
+#include "arm_std_f64.c"
 #include "arm_std_q15.c"
 #include "arm_std_q31.c"
 #include "arm_var_f32.c"
+#include "arm_var_f64.c"
 #include "arm_var_q15.c"
 #include "arm_var_q31.c"
 #include "arm_absmax_f32.c"
+#include "arm_absmax_f64.c"
 #include "arm_absmax_q15.c"
 #include "arm_absmax_q31.c"
 #include "arm_absmax_q7.c"
 #include "arm_absmin_f32.c"
+#include "arm_absmin_f64.c"
 #include "arm_absmin_q15.c"
 #include "arm_absmin_q31.c"
 #include "arm_absmin_q7.c"
--- a/Source/StatisticsFunctions/arm_absmax_f64.c
+++ b/Source/StatisticsFunctions/arm_absmax_f64.c
@ -0,0 +1,100 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmax_f64.c
+ * Description:  Maximum value of absolute values of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @defgroup AbsMax Absolute Maximum
+
+  Computes the maximum value of absolute values of an array of data.
+  The function returns both the maximum value and its position within the array.
+  There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+  @addtogroup AbsMax
+  @{
+ */
+
+/**
+  @brief         Maximum value of absolute values of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @param[out]    pIndex     index of maximum value returned here
+  @return        none
+ */
+void arm_absmax_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex)
+{
+        float64_t maxVal, out;                         /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+
+  /* Load first input value that act as reference value for comparision */
+  out = fabs(*pSrc++);
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = fabs(*pSrc++);
+
+    /* compare for the maximum value */
+    if (out < maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**
+  @} end of AbsMax group
+ */
--- a/Source/StatisticsFunctions/arm_absmin_f64.c
+++ b/Source/StatisticsFunctions/arm_absmin_f64.c
@ -0,0 +1,98 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmin_f64.c
+ * Description:  Minimum value of absolute values of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @defgroup AbsMin Absolute Minimum
+
+  Computes the minimum value of absolute values of an array of data.
+  The function returns both the minimum value and its position within the array.
+  There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+  @addtogroup AbsMin
+  @{
+ */
+
+/**
+  @brief         Minimum value of absolute values of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @param[out]    pIndex     index of minimum value returned here
+  @return        none
+ */
+void arm_absmin_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex)
+{
+       float64_t minVal, out;                         /* Temporary variables to store the output value. */
+       uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+
+  /* Load first input value that act as reference value for comparision */
+  out = fabs(*pSrc++);
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = fabs(*pSrc++);
+
+    /* compare for the minimum value */
+    if (out > minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**
+  @} end of AbsMin group
+ */
--- a/Source/StatisticsFunctions/arm_max_f64.c
+++ b/Source/StatisticsFunctions/arm_max_f64.c
@ -0,0 +1,98 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_max_f64.c
+ * Description:  Maximum value of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @defgroup Max Maximum
+
+  Computes the maximum value of an array of data.
+  The function returns both the maximum value and its position within the array.
+  There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+  @addtogroup Max
+  @{
+ */
+
+/**
+  @brief         Maximum value of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @param[out]    pIndex     index of maximum value returned here
+  @return        none
+ */
+void arm_max_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex)
+{
+        float64_t maxVal, out;                         /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = *pSrc++;
+
+    /* compare for the maximum value */
+    if (out < maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**
+  @} end of Max group
+ */
--- a/Source/StatisticsFunctions/arm_max_no_idx_f64.c
+++ b/Source/StatisticsFunctions/arm_max_no_idx_f64.c
@ -0,0 +1,75 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_max_no_idx_f64.c
+ * Description:  Maximum value of a floating-point vector without returning the index
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup Max
+  @{
+ */
+
+/**
+  @brief         Maximum value of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @return        none
+ */
+void arm_max_no_idx_f64(
+    const float64_t *pSrc,
+    uint32_t   blockSize,
+    float64_t *pResult)
+{
+   float64_t   maxValue = F32_MIN;
+   float64_t   newVal;
+
+   while (blockSize > 0U)
+   {
+       newVal = *pSrc++;
+   
+       /* compare for the maximum value */
+       if (maxValue < newVal)
+       {
+           /* Update the maximum value and it's index */
+           maxValue = newVal;
+       }
+   
+       blockSize --;
+   }
+    
+   *pResult = maxValue;
+}
+
+/**
+  @} end of Max group
+ */
--- a/Source/StatisticsFunctions/arm_mean_f64.c
+++ b/Source/StatisticsFunctions/arm_mean_f64.c
@ -0,0 +1,75 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mean_f64.c
+ * Description:  Mean value of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup mean
+  @{
+ */
+
+/**
+  @brief         Mean value of a floating-point vector.
+  @param[in]     pSrc       points to the input vector.
+  @param[in]     blockSize  number of samples in input vector.
+  @param[out]    pResult    mean value returned here.
+  @return        none
+ */
+void arm_mean_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        float64_t sum = 0.0f;                          /* Temporary result storage */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
+  /* Store result to destination */
+  *pResult = (sum / blockSize);
+}
+
+/**
+  @} end of mean group
+ */
--- a/Source/StatisticsFunctions/arm_min_f64.c
+++ b/Source/StatisticsFunctions/arm_min_f64.c
@ -0,0 +1,98 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_min_f64.c
+ * Description:  Minimum value of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @defgroup Min Minimum
+
+  Computes the minimum value of an array of data.
+  The function returns both the minimum value and its position within the array.
+  There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+  @addtogroup Min
+  @{
+ */
+
+/**
+  @brief         Minimum value of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    minimum value returned here
+  @param[out]    pIndex     index of minimum value returned here
+  @return        none
+ */
+void arm_min_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult,
+        uint32_t * pIndex)
+{
+        float64_t minVal, out;                         /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal = *pSrc++;
+
+    /* compare for the minimum value */
+    if (out > minVal)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**
+  @} end of Min group
+ */
--- a/Source/StatisticsFunctions/arm_power_f64.c
+++ b/Source/StatisticsFunctions/arm_power_f64.c
@ -0,0 +1,94 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_power_f64.c
+ * Description:  Sum of the squares of the elements of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @defgroup power Power
+
+  Calculates the sum of the squares of the elements in the input vector.
+  The underlying algorithm is used:
+
+  <pre>
+      Result = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + pSrc[2] * pSrc[2] + ... + pSrc[blockSize-1] * pSrc[blockSize-1];
+  </pre>
+
+  There are separate functions for floating point, Q31, Q15, and Q7 data types.
+
+  Since the result is not divided by the length, those functions are in fact computing
+  something which is more an energy than a power.
+
+ */
+
+/**
+  @addtogroup power
+  @{
+ */
+
+/**
+  @brief         Sum of the squares of the elements of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    sum of the squares value returned here
+  @return        none
+ */
+void arm_power_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        float64_t sum = 0.0f;                          /* Temporary result storage */
+        float64_t in;                                  /* Temporary variable to store input value */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+    /* Compute Power and store result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in * in;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store result to destination */
+  *pResult = sum;
+}
+
+/**
+  @} end of power group
+ */
--- a/Source/StatisticsFunctions/arm_std_f64.c
+++ b/Source/StatisticsFunctions/arm_std_f64.c
@ -0,0 +1,83 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_std_f64.c
+ * Description:  Standard deviation of the elements of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @defgroup STD Standard deviation
+
+  Calculates the standard deviation of the elements in the input vector.
+
+  The float implementation is relying on arm_var_f32 which is using a two-pass algorithm
+  to avoid problem of numerical instabilities and cancellation errors.
+
+  Fixed point versions are using the standard textbook algorithm since the fixed point
+  numerical behavior is different from the float one.
+
+  Algorithm for fixed point versions is summarized below:
+
+
+  <pre>
+      Result = sqrt((sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1))
+
+      sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]
+      sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]
+  </pre>
+
+  There are separate functions for floating point, Q31, and Q15 data types.
+ */
+
+/**
+  @addtogroup STD
+  @{
+ */
+
+/**
+  @brief         Standard deviation of the elements of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    standard deviation value returned here
+  @return        none
+ */
+void arm_std_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult)
+{
+  float64_t var;
+  arm_var_f64(pSrc,blockSize,&var);
+  *pResult = sqrt(var);
+}
+
+/**
+  @} end of STD group
+ */
--- a/Source/StatisticsFunctions/arm_var_f64.c
+++ b/Source/StatisticsFunctions/arm_var_f64.c
@ -0,0 +1,115 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_var_f64.c
+ * Description:  Variance of the elements of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions.h"
+
+/**
+  @ingroup groupStats
+ */
+
+/**
+  @defgroup variance  Variance
+
+  Calculates the variance of the elements in the input vector.
+  The underlying algorithm used is the direct method sometimes referred to as the two-pass method:
+
+  <pre>
+      Result = sum(element - meanOfElements)^2) / numElement - 1
+
+      meanOfElements = ( pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] ) / blockSize
+  </pre>
+
+  There are separate functions for floating point, Q31, and Q15 data types.
+ */
+
+/**
+  @addtogroup variance
+  @{
+ */
+
+/**
+  @brief         Variance of the elements of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    variance value returned here
+  @return        none
+ */
+void arm_var_f64(
+  const float64_t * pSrc,
+        uint32_t blockSize,
+        float64_t * pResult)
+{
+        uint32_t blkCnt;                               /* Loop counter */
+        float64_t sum = 0.0f;                          /* Temporary result storage */
+        float64_t fSum = 0.0f;
+        float64_t fMean, fValue;
+  const float64_t * pInput = pSrc;
+
+  if (blockSize <= 1U)
+  {
+    *pResult = 0;
+    return;
+  }
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+
+    sum += *pInput++;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
+  fMean = sum / (float64_t) blockSize;
+
+  pInput = pSrc;
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    fValue = *pInput++ - fMean;
+    fSum += fValue * fValue;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Variance */
+  *pResult = fSum / (float64_t)(blockSize - 1.0f);
+}
+
+/**
+  @} end of variance group
+ */
--- a/Source/SupportFunctions/SupportFunctions.c
+++ b/Source/SupportFunctions/SupportFunctions.c
@ -30,10 +30,12 @@
 #include "arm_bitonic_sort_f32.c"
 #include "arm_bubble_sort_f32.c"
 #include "arm_copy_f32.c"
+#include "arm_copy_f64.c"
 #include "arm_copy_q15.c"
 #include "arm_copy_q31.c"
 #include "arm_copy_q7.c"
 #include "arm_fill_f32.c"
+#include "arm_fill_f64.c"
 #include "arm_fill_q15.c"
 #include "arm_fill_q31.c"
 #include "arm_fill_q7.c"
--- a/Source/SupportFunctions/arm_copy_f64.c
+++ b/Source/SupportFunctions/arm_copy_f64.c
@ -0,0 +1,83 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_copy_f64.c
+ * Description:  Copies the elements of a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/support_functions.h"
+
+/**
+  @ingroup groupSupport
+ */
+
+/**
+  @defgroup copy Vector Copy
+
+  Copies sample by sample from source vector to destination vector.
+
+  <pre>
+      pDst[n] = pSrc[n];   0 <= n < blockSize.
+  </pre>
+
+  There are separate functions for floating point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+  @addtogroup copy
+  @{
+ */
+
+/**
+  @brief         Copies the elements of a floating-point vector.
+  @param[in]     pSrc       points to input vector
+  @param[out]    pDst       points to output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+void arm_copy_f64(
+  const float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = A */
+
+    /* Copy and store result in destination buffer */
+    *pDst++ = *pSrc++;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+}
+
+/**
+  @} end of BasicCopy group
+ */
--- a/Source/SupportFunctions/arm_fill_f64.c
+++ b/Source/SupportFunctions/arm_fill_f64.c
@ -0,0 +1,83 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_fill_f64.c
+ * Description:  Fills a constant value into a floating-point vector
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/support_functions.h"
+
+/**
+  @ingroup groupSupport
+ */
+
+/**
+  @defgroup Fill Vector Fill
+
+  Fills the destination vector with a constant value.
+
+  <pre>
+      pDst[n] = value;   0 <= n < blockSize.
+  </pre>
+
+  There are separate functions for floating point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+  @addtogroup Fill
+  @{
+ */
+
+/**
+  @brief         Fills a constant value into a floating-point vector.
+  @param[in]     value      input value to be filled
+  @param[out]    pDst       points to output vector
+  @param[in]     blockSize  number of samples in each vector
+  @return        none
+ */
+void arm_fill_f64(
+  float64_t value,
+  float64_t * pDst,
+  uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* Loop counter */
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = blockSize;
+
+  while (blkCnt > 0U)
+  {
+    /* C = value */
+
+    /* Fill value in destination buffer */
+    *pDst++ = value;
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+}
+
+/**
+  @} end of Fill group
+ */