CMSIS-DSP: Improvement for issue 809

6 years ago · efd47b9da6
parent d2fb32de54
commit efd47b9da6
3 changed files with 57 additions and 114 deletions
--- a/Source/StatisticsFunctions/arm_std_f32.c
+++ b/Source/StatisticsFunctions/arm_std_f32.c
@ -36,7 +36,15 @@
  @defgroup STD Standard deviation

  Calculates the standard deviation of the elements in the input vector.
-  The underlying algorithm is used:
+
+  The float implementation is relying on arm_var_f32 which is using a two-pass algorithm
+  to avoid problem of numerical instabilities and cancellation errors.
+
+  Fixed point versions are using the standard textbook algorithm since the fixed point
+  numerical behavior is different from the float one.
+
+  Algorithm for fixed point versions is summarized below:
+

  <pre>
      Result = sqrt((sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1))
@ -60,7 +68,6 @@
  @param[out]    pResult    standard deviation value returned here
  @return        none
 */
-#if (defined(ARM_MATH_NEON_EXPERIMENTAL) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
 void arm_std_f32(
  const float32_t * pSrc,
        uint32_t blockSize,
@ -70,118 +77,6 @@ void arm_std_f32(
  arm_var_f32(pSrc,blockSize,&var);
  arm_sqrt_f32(var, pResult);
 }
-#else
-void arm_std_f32(
-  const float32_t * pSrc,
-        uint32_t blockSize,
-        float32_t * pResult)
-{
-        uint32_t blkCnt;                               /* Loop counter */
-        float32_t sum = 0.0f;                          /* Temporary result storage */
-        float32_t sumOfSquares = 0.0f;                 /* Sum of squares */
-        float32_t in;                                  /* Temporary variable to store input value */
-
-#ifndef ARM_MATH_CM0_FAMILY
-        float32_t meanOfSquares, mean, squareOfMean;   /* Temporary variables */
-#else
-        float32_t squareOfSum;                         /* Square of Sum */
-        float32_t var;                                 /* Temporary varaince storage */
-#endif
-
-  if (blockSize <= 1U)
-  {
-    *pResult = 0;
-    return;
-  }
-
-#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-  /* Loop unrolling: Compute 4 outputs at a time */
-  blkCnt = blockSize >> 2U;
-
-  while (blkCnt > 0U)
-  {
-    /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-    /* C = A[0] + A[1] + ... + A[blockSize-1] */
-
-    in = *pSrc++;
-    /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
-    sumOfSquares += in * in;
-    /* Compute sum and store result in a temporary variable, sum. */
-    sum += in;
-
-    in = *pSrc++;
-    sumOfSquares += in * in;
-    sum += in;
-
-    in = *pSrc++;
-    sumOfSquares += in * in;
-    sum += in;
-
-    in = *pSrc++;
-    sumOfSquares += in * in;
-    sum += in;
-
-    /* Decrement loop counter */
-    blkCnt--;
-  }
-
-  /* Loop unrolling: Compute remaining outputs */
-  blkCnt = blockSize % 0x4U;
-
-#else
-
-  /* Initialize blkCnt with number of samples */
-  blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
-  while (blkCnt > 0U)
-  {
-    /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-    /* C = A[0] + A[1] + ... + A[blockSize-1] */
-
-    in = *pSrc++;
-    /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
-    sumOfSquares += ( in * in);
-    /* Compute sum and store result in a temporary variable, sum. */
-    sum += in;
-
-    /* Decrement loop counter */
-    blkCnt--;
-  }
-
-#ifndef ARM_MATH_CM0_FAMILY
-
-  /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
-  meanOfSquares = sumOfSquares / ((float32_t) blockSize - 1.0f);
-
-  /* Compute mean of all input values */
-  mean = sum / (float32_t) blockSize;
-
-  /* Compute square of mean */
-  squareOfMean = (mean * mean) * (((float32_t) blockSize) /
-                                  ((float32_t) blockSize - 1.0f));
-
-  /* Compute standard deviation and store result to destination */
-  arm_sqrt_f32((meanOfSquares - squareOfMean), pResult);
-
-#else
-  /* Run the below code for Cortex-M0 */
-
-  /* Compute square of sum */
-  squareOfSum = ((sum * sum) / (float32_t) blockSize);
-
-  /* Compute variance */
-  var = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f));
-
-  /* Compute standard deviation and store result in destination */
-  arm_sqrt_f32(var, pResult);
-
-#endif /* #ifndef ARM_MATH_CM0_FAMILY */
-
-}
-#endif /* #if defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)*/

 /**
  @} end of STD group
--- a/Testing/Source/Tests/StatsTestsF32.cpp
+++ b/Testing/Source/Tests/StatsTestsF32.cpp
@ -196,6 +196,52 @@ a double precision computation.

    }

+
+    void StatsTestsF32::test_std_stability_f32()
+    {
+      /*
+
+      With the textbook algorithm, those values will produce a negative
+      value for the variance.
+
+      The CMSIS-DSP variance algorithm is the two pass one so will work
+      with those values.
+
+      So, it should be possible to compute the square root for the standard
+      deviation.
+
+      */
+      float32_t in[4]={4.0f, 7.0f, 13.0f, 16.0f};
+      float32_t result;
+      int i;
+      arm_status status;
+
+      /*
+
+      Add bigger offset so that average is much bigger than standard deviation.
+
+      */
+      for(i=0 ; i < 4; i++)
+      {
+        in[i] += 3e4;
+      }
+
+      arm_std_f32(in,4,&result);
+
+      /*
+
+      If variance is giving a negative value, the square root
+      should return zero.
+
+      We check it is not happening here.
+
+
+      */
+
+      ASSERT_TRUE(fabs(5.47723f - result) < 1.0e-4);
+
+    }
+
    void StatsTestsF32::test_entropy_f32()
    {
      const float32_t *inp  = inputA.ptr();
--- a/Testing/desc.txt
+++ b/Testing/desc.txt
@ -89,6 +89,8 @@ group Root {
                Test long  arm_rms_f32:test_rms_f32
                Test long  arm_std_f32:test_std_f32
                Test long  arm_var_f32:test_var_f32
+
+                Test stability  arm_std_f32:test_std_stability_f32
              }

           }