diff --git a/Source/ControllerFunctions/arm_sin_cos_f32.c b/Source/ControllerFunctions/arm_sin_cos_f32.c index 37d63a8b..d00db1db 100644 --- a/Source/ControllerFunctions/arm_sin_cos_f32.c +++ b/Source/ControllerFunctions/arm_sin_cos_f32.c @@ -1,24 +1,24 @@ -/* ---------------------------------------------------------------------- -* Copyright (C) 2010-2014 ARM Limited. All rights reserved. -* -* $Date: 19. March 2015 -* $Revision: V.1.4.5 -* -* Project: CMSIS DSP Library -* Title: arm_sin_cos_f32.c -* -* Description: Sine and Cosine calculation for floating-point values. -* +/* ---------------------------------------------------------------------- +* Copyright (C) 2010-2014 ARM Limited. All rights reserved. +* +* $Date: 22. December 2016 +* $Revision: V.1.4.5 a +* +* Project: CMSIS DSP Library +* Title: arm_sin_cos_f32.c +* +* Description: Sine and Cosine calculation for floating-point values. +* * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 -* -* Redistribution and use in source and binary forms, with or without +* +* Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in -* the documentation and/or other materials provided with the +* the documentation and/or other materials provided with the * distribution. * - Neither the name of ARM LIMITED nor the names of its contributors * may be used to endorse or promote products derived from this @@ -27,7 +27,7 @@ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; @@ -35,115 +35,120 @@ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -* POSSIBILITY OF SUCH DAMAGE. +* POSSIBILITY OF SUCH DAMAGE. * -------------------------------------------------------------------- */ #include "arm_math.h" #include "arm_common_tables.h" -/** - * @ingroup groupController +/** + * @ingroup groupController */ -/** - * @defgroup SinCos Sine Cosine - * - * Computes the trigonometric sine and cosine values using a combination of table lookup - * and linear interpolation. - * There are separate functions for Q31 and floating-point data types. - * The input to the floating-point version is in degrees while the - * fixed-point Q31 have a scaled input with the range - * [-1 0.9999] mapping to [-180 +180] degrees. +/** + * @defgroup SinCos Sine Cosine + * + * Computes the trigonometric sine and cosine values using a combination of table lookup + * and linear interpolation. + * There are separate functions for Q31 and floating-point data types. + * The input to the floating-point version is in degrees while the + * fixed-point Q31 have a scaled input with the range + * [-1 0.9999] mapping to [-180 +180] degrees. * * The floating point function also allows values that are out of the usual range. When this happens, the function will * take extra time to adjust the input value to the range of [-180 180]. - * - * The implementation is based on table lookup using 360 values together with linear interpolation. - * The steps used are: - * -# Calculation of the nearest integer table index. - * -# Compute the fractional portion (fract) of the input. - * -# Fetch the value corresponding to \c index from sine table to \c y0 and also value from \c index+1 to \c y1. - * -# Sine value is computed as *psinVal = y0 + (fract * (y1 - y0)). - * -# Fetch the value corresponding to \c index from cosine table to \c y0 and also value from \c index+1 to \c y1. - * -# Cosine value is computed as *pcosVal = y0 + (fract * (y1 - y0)). + * + * The implementation is based on table lookup using 360 values together with linear interpolation. + * The steps used are: + * -# Calculation of the nearest integer table index. + * -# Compute the fractional portion (fract) of the input. + * -# Fetch the value corresponding to \c index from sine table to \c y0 and also value from \c index+1 to \c y1. + * -# Sine value is computed as *psinVal = y0 + (fract * (y1 - y0)). + * -# Fetch the value corresponding to \c index from cosine table to \c y0 and also value from \c index+1 to \c y1. + * -# Cosine value is computed as *pcosVal = y0 + (fract * (y1 - y0)). */ - /** - * @addtogroup SinCos - * @{ + /** + * @addtogroup SinCos + * @{ */ -/** - * @brief Floating-point sin_cos function. - * @param[in] theta input value in degrees - * @param[out] *pSinVal points to the processed sine output. - * @param[out] *pCosVal points to the processed cos output. - * @return none. +/** + * @brief Floating-point sin_cos function. + * @param[in] theta input value in degrees + * @param[out] *pSinVal points to the processed sine output. + * @param[out] *pCosVal points to the processed cos output. + * @return none. */ void arm_sin_cos_f32( - float32_t theta, - float32_t * pSinVal, - float32_t * pCosVal) + float32_t theta, + float32_t * pSinVal, + float32_t * pCosVal) { - float32_t fract, in; /* Temporary variables for input, output */ - uint16_t indexS, indexC; /* Index variable */ - float32_t f1, f2, d1, d2; /* Two nearest output values */ - int32_t n; - float32_t findex, Dn, Df, temp; - - /* input x is in degrees */ - /* Scale the input, divide input by 360, for cosine add 0.25 (pi/2) to read sine table */ - in = theta * 0.00277777777778f; - - /* Calculation of floor value of input */ - n = (int32_t) in; - - /* Make negative values towards -infinity */ - if(in < 0.0f) - { - n--; - } - /* Map input value to [0 1] */ - in = in - (float32_t) n; - - /* Calculation of index of the table */ - findex = (float32_t) FAST_MATH_TABLE_SIZE * in; - indexS = ((uint16_t)findex) & 0x1ff; - indexC = (indexS + (FAST_MATH_TABLE_SIZE / 4)) & 0x1ff; - - /* fractional value calculation */ - fract = findex - (float32_t) indexS; - - /* Read two nearest values of input value from the cos & sin tables */ - f1 = sinTable_f32[indexC+0]; - f2 = sinTable_f32[indexC+1]; - d1 = -sinTable_f32[indexS+0]; - d2 = -sinTable_f32[indexS+1]; - - Dn = 0.0122718463030f; // delta between the two points (fixed), in this case 2*pi/FAST_MATH_TABLE_SIZE - Df = f2 - f1; // delta between the values of the functions - temp = Dn*(d1 + d2) - 2*Df; - temp = fract*temp + (3*Df - (d2 + 2*d1)*Dn); - temp = fract*temp + d1*Dn; - - /* Calculation of cosine value */ - *pCosVal = fract*temp + f1; - - /* Read two nearest values of input value from the cos & sin tables */ - f1 = sinTable_f32[indexS+0]; - f2 = sinTable_f32[indexS+1]; - d1 = sinTable_f32[indexC+0]; - d2 = sinTable_f32[indexC+1]; - - Df = f2 - f1; // delta between the values of the functions - temp = Dn*(d1 + d2) - 2*Df; - temp = fract*temp + (3*Df - (d2 + 2*d1)*Dn); - temp = fract*temp + d1*Dn; - - /* Calculation of sine value */ - *pSinVal = fract*temp + f1; + float32_t fract, in; /* Temporary variables for input, output */ + uint16_t indexS, indexC; /* Index variable */ + float32_t f1, f2, d1, d2; /* Two nearest output values */ + float32_t findex, Dn, Df, temp; + + /* input x is in degrees */ + /* Scale the input, divide input by 360, for cosine add 0.25 (pi/2) to read sine table */ + in = theta * 0.00277777777778f; + + if (in < 0.0f) + { + in = -in; + } + + in = in - (int32_t)in; + + /* Calculation of index of the table */ + findex = (float32_t) FAST_MATH_TABLE_SIZE * in; + indexS = ((uint16_t)findex) & 0x1ff; + indexC = (indexS + (FAST_MATH_TABLE_SIZE / 4)) & 0x1ff; + + /* fractional value calculation */ + fract = findex - (float32_t) indexS; + + /* Read two nearest values of input value from the cos & sin tables */ + f1 = sinTable_f32[indexC+0]; + f2 = sinTable_f32[indexC+1]; + d1 = -sinTable_f32[indexS+0]; + d2 = -sinTable_f32[indexS+1]; + + temp = (1.0f - fract) * f1 + fract * f2; + + Dn = 0.0122718463030f; // delta between the two points (fixed), in this case 2*pi/FAST_MATH_TABLE_SIZE + Df = f2 - f1; // delta between the values of the functions + + temp = Dn *(d1 + d2) - 2 * Df; + temp = fract * temp + (3 * Df - (d2 + 2 * d1) * Dn); + temp = fract * temp + d1 * Dn; + + /* Calculation of cosine value */ + *pCosVal = fract * temp + f1; + + /* Read two nearest values of input value from the cos & sin tables */ + f1 = sinTable_f32[indexS+0]; + f2 = sinTable_f32[indexS+1]; + d1 = sinTable_f32[indexC+0]; + d2 = sinTable_f32[indexC+1]; + + temp = (1.0f - fract) * f1 + fract * f2; + + Df = f2 - f1; // delta between the values of the functions + temp = Dn*(d1 + d2) - 2*Df; + temp = fract*temp + (3*Df - (d2 + 2*d1)*Dn); + temp = fract*temp + d1*Dn; + + /* Calculation of sine value */ + *pSinVal = fract*temp + f1; + + if (theta < 0.0f) + { + *pSinVal = -*pSinVal; + } } -/** - * @} end of SinCos group +/** + * @} end of SinCos group */ diff --git a/Source/StatisticsFunctions/arm_var_f32.c b/Source/StatisticsFunctions/arm_var_f32.c index 06491a95..ada6c3f9 100644 --- a/Source/StatisticsFunctions/arm_var_f32.c +++ b/Source/StatisticsFunctions/arm_var_f32.c @@ -1,8 +1,8 @@ /* ---------------------------------------------------------------------- * Copyright (C) 2010-2014 ARM Limited. All rights reserved. * -* $Date: 19. March 2015 -* $Revision: V.1.4.5 +* $Date: 22. December 2016 +* $Revision: V.1.4.5 a * * Project: CMSIS DSP Library * Title: arm_var_f32.c @@ -48,14 +48,13 @@ * @defgroup variance Variance * * Calculates the variance of the elements in the input vector. - * The underlying algorithm is used: + * The underlying algorithm used is the direct method sometimes referred to as the two-pass method: * *
- *   Result = (sumOfSquares - sum2 / blockSize) / (blockSize - 1)
+ *   Result = sum(element - meanOfElements)^2) / numElement - 1
  *
- *     where, sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]
+ *     where, meanOfElements = ( pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] ) / blockSize
  *
- *                     sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]
  * 
* * There are separate functions for floating point, Q31, and Q15 data types. @@ -76,116 +75,117 @@ */ void arm_var_f32( - float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult) + float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) { - float32_t sum = 0.0f; /* Temporary result storage */ - float32_t sumOfSquares = 0.0f; /* Sum of squares */ - float32_t in; /* input value */ - uint32_t blkCnt; /* loop counter */ -#ifndef ARM_MATH_CM0_FAMILY - float32_t meanOfSquares, mean, squareOfMean; /* Temporary variables */ -#else - float32_t squareOfSum; /* Square of Sum */ -#endif - - if(blockSize == 1u) - { - *pResult = 0; - return; - } - -#ifndef ARM_MATH_CM0_FAMILY - - /*loop Unrolling */ - blkCnt = blockSize >> 2u; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ - while(blkCnt > 0u) - { - /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ - /* Compute Sum of squares of the input samples - * and then store the result in a temporary variable, sum. */ - in = *pSrc++; - sum += in; - sumOfSquares += in * in; - in = *pSrc++; - sum += in; - sumOfSquares += in * in; - in = *pSrc++; - sum += in; - sumOfSquares += in * in; - in = *pSrc++; - sum += in; - sumOfSquares += in * in; - - /* Decrement the loop counter */ - blkCnt--; - } - - /* If the blockSize is not a multiple of 4, compute any remaining output samples here. - ** No loop unrolling is used. */ - blkCnt = blockSize % 0x4u; - - while(blkCnt > 0u) - { - /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ - /* Compute Sum of squares of the input samples - * and then store the result in a temporary variable, sum. */ - in = *pSrc++; - sum += in; - sumOfSquares += in * in; - - /* Decrement the loop counter */ - blkCnt--; - } - - /* Compute Mean of squares of the input samples - * and then store the result in a temporary variable, meanOfSquares. */ - meanOfSquares = sumOfSquares / ((float32_t) blockSize - 1.0f); - - /* Compute mean of all input values */ - mean = sum / (float32_t) blockSize; - - /* Compute square of mean */ - squareOfMean = (mean * mean) * (((float32_t) blockSize) / - ((float32_t) blockSize - 1.0f)); - - /* Compute variance and then store the result to the destination */ - *pResult = meanOfSquares - squareOfMean; - -#else - /* Run the below code for Cortex-M0 */ - - /* Loop over blockSize number of values */ - blkCnt = blockSize; - - while(blkCnt > 0u) - { - /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ - /* Compute Sum of squares of the input samples - * and then store the result in a temporary variable, sumOfSquares. */ - in = *pSrc++; - sumOfSquares += in * in; - - /* C = (A[0] + A[1] + ... + A[blockSize-1]) */ - /* Compute Sum of the input samples - * and then store the result in a temporary variable, sum. */ - sum += in; - - /* Decrement the loop counter */ - blkCnt--; - } - - /* Compute the square of sum */ - squareOfSum = ((sum * sum) / (float32_t) blockSize); - - /* Compute the variance */ - *pResult = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f)); - -#endif /* #ifndef ARM_MATH_CM0_FAMILY */ + float32_t fMean, fValue; + uint32_t blkCnt; /* loop counter */ + float32_t * pInput = pSrc; + float32_t sum = 0.0f; + float32_t fSum = 0.0f; + #if !defined(ARM_MATH_CM0_FAMILY) && !defined(ARM_MATH_CM3_FAMILY) + float32_t in1, in2, in3, in4; + #endif + + if (blockSize <= 1u) + { + *pResult = 0; + return; + } + + #if !defined(ARM_MATH_CM0_FAMILY) && !defined(ARM_MATH_CM3_FAMILY) + /* Run the below code for Cortex-M4 and Cortex-M7 */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0u) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + in1 = *pInput++; + in2 = *pInput++; + in3 = *pInput++; + in4 = *pInput++; + + sum += in1; + sum += in2; + sum += in3; + sum += in4; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + + #else + /* Run the below code for Cortex-M0 or Cortex-M3 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + + #endif + + while (blkCnt > 0u) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pInput++; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + fMean = sum / (float32_t) blockSize; + + pInput = pSrc; + + #if !defined(ARM_MATH_CM0_FAMILY) && !defined(ARM_MATH_CM3_FAMILY) + + /*loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0u) + { + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + + /* Decrement the loop counter */ + blkCnt--; + } + + blkCnt = blockSize % 0x4u; + #else + /* Run the below code for Cortex-M0 or Cortex-M3 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + #endif + + while (blkCnt > 0u) + { + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Variance */ + *pResult = fSum / (float32_t)(blockSize - 1.0f); } /**