From 55c9be8af0bed102a468765edb803d4ee1ee09d8 Mon Sep 17 00:00:00 2001 From: Christophe Favergeon Date: Tue, 11 Aug 2020 11:19:25 +0200 Subject: [PATCH] CMSIS-DSP: Added additional f16 statistics functions and the required f16 fast math functions. --- Include/arm_common_tables_f16.h | 8 + Include/arm_math_types_f16.h | 2 +- Include/arm_vec_math_f16.h | 232 ++++++++ Include/dsp/basic_math_functions_f16.h | 1 - Include/dsp/fast_math_functions_f16.h | 38 ++ Include/dsp/statistics_functions_f16.h | 59 ++ Source/CommonTables/arm_common_tables_f16.c | 26 + Source/FastMathFunctions/CMakeLists.txt | 6 + .../FastMathFunctions/FastMathFunctionsF16.c | 31 ++ Source/FastMathFunctions/arm_vexp_f16.c | 84 +++ Source/FastMathFunctions/arm_vinverse_f16.c | 81 +++ Source/FastMathFunctions/arm_vlog_f16.c | 83 +++ Source/SVMFunctions/arm_svm_linear_init_f32.c | 13 +- .../SVMFunctions/arm_svm_linear_predict_f32.c | 4 +- .../arm_svm_polynomial_init_f32.c | 14 +- .../arm_svm_polynomial_predict_f32.c | 4 +- Source/SVMFunctions/arm_svm_rbf_init_f32.c | 14 +- Source/SVMFunctions/arm_svm_rbf_predict_f32.c | 4 +- .../SVMFunctions/arm_svm_sigmoid_init_f32.c | 13 +- .../arm_svm_sigmoid_predict_f32.c | 4 +- Source/StatisticsFunctions/CMakeLists.txt | 5 + .../StatisticsFunctionsF16.c | 5 + Source/StatisticsFunctions/arm_entropy_f16.c | 138 +++++ Source/StatisticsFunctions/arm_entropy_f32.c | 4 +- Source/StatisticsFunctions/arm_entropy_f64.c | 4 +- .../arm_kullback_leibler_f16.c | 150 +++++ .../arm_kullback_leibler_f32.c | 4 +- .../arm_kullback_leibler_f64.c | 4 +- .../arm_logsumexp_dot_prod_f16.c | 82 +++ .../arm_logsumexp_dot_prod_f32.c | 4 +- .../StatisticsFunctions/arm_logsumexp_f16.c | 170 ++++++ .../StatisticsFunctions/arm_logsumexp_f32.c | 4 +- .../StatisticsFunctions/arm_max_no_idx_f16.c | 144 +++++ Source/StatisticsFunctions/arm_var_f16.c | 4 +- Source/SupportFunctions/arm_barycenter_f16.c | 14 +- Source/SupportFunctions/arm_barycenter_f32.c | 4 +- .../SupportFunctions/arm_weighted_sum_f16.c | 14 +- .../SupportFunctions/arm_weighted_sum_f32.c | 4 +- Testing/CMakeLists.txt | 1 + Testing/Include/Tests/FastMathF16.h | 23 + Testing/PatternGeneration/FastMath.py | 16 +- Testing/PatternGeneration/Stats.py | 8 + .../DSP/FastMath/FastMathF16/Angles1_f16.txt | 48 ++ .../DSP/FastMath/FastMathF16/Cos1_f16.txt | 48 ++ .../DSP/FastMath/FastMathF16/Exp1_f16.txt | 106 ++++ .../FastMath/FastMathF16/ExpInput1_f16.txt | 106 ++++ .../DSP/FastMath/FastMathF16/Inverse1_f16.txt | 514 ++++++++++++++++++ .../DSP/FastMath/FastMathF16/Log1_f16.txt | 52 ++ .../FastMath/FastMathF16/LogInput1_f16.txt | 52 ++ .../DSP/FastMath/FastMathF16/Samples1_f16.txt | 514 ++++++++++++++++++ .../DSP/FastMath/FastMathF16/Sin1_f16.txt | 48 ++ .../DSP/FastMath/FastMathF16/Sqrt1_f16.txt | 18 + .../FastMath/FastMathF16/SqrtInput1_f16.txt | 18 + .../DSP/Stats/StatsF16/Dims22_s16.txt | 24 + .../DSP/Stats/StatsF16/Dims23_s16.txt | 24 + .../DSP/Stats/StatsF16/Dims24_s16.txt | 24 + .../DSP/Stats/StatsF16/Dims25_s16.txt | 24 + .../DSP/Stats/StatsF16/Input22_f16.txt | 152 ++++++ .../DSP/Stats/StatsF16/Input23_f16.txt | 152 ++++++ .../DSP/Stats/StatsF16/InputA24_f16.txt | 152 ++++++ .../DSP/Stats/StatsF16/InputA25_f16.txt | 152 ++++++ .../DSP/Stats/StatsF16/InputB24_f16.txt | 152 ++++++ .../DSP/Stats/StatsF16/InputB25_f16.txt | 152 ++++++ .../DSP/Stats/StatsF16/RefEntropy22_f16.txt | 22 + .../DSP/Stats/StatsF16/RefKL24_f16.txt | 22 + .../DSP/Stats/StatsF16/RefLogSumExp23_f16.txt | 22 + .../Stats/StatsF16/RefLogSumExpDot25_f16.txt | 22 + Testing/Source/Tests/FastMathF16.cpp | 243 +++++++++ Testing/Source/Tests/StatsTestsF16.cpp | 26 +- Testing/desc_f16.txt | 88 ++- 70 files changed, 4423 insertions(+), 81 deletions(-) create mode 100755 Include/arm_vec_math_f16.h create mode 100755 Source/FastMathFunctions/FastMathFunctionsF16.c create mode 100755 Source/FastMathFunctions/arm_vexp_f16.c create mode 100755 Source/FastMathFunctions/arm_vinverse_f16.c create mode 100755 Source/FastMathFunctions/arm_vlog_f16.c create mode 100755 Source/StatisticsFunctions/arm_entropy_f16.c create mode 100755 Source/StatisticsFunctions/arm_kullback_leibler_f16.c create mode 100755 Source/StatisticsFunctions/arm_logsumexp_dot_prod_f16.c create mode 100755 Source/StatisticsFunctions/arm_logsumexp_f16.c create mode 100755 Source/StatisticsFunctions/arm_max_no_idx_f16.c create mode 100755 Testing/Include/Tests/FastMathF16.h create mode 100755 Testing/Patterns/DSP/FastMath/FastMathF16/Angles1_f16.txt create mode 100755 Testing/Patterns/DSP/FastMath/FastMathF16/Cos1_f16.txt create mode 100755 Testing/Patterns/DSP/FastMath/FastMathF16/Exp1_f16.txt create mode 100755 Testing/Patterns/DSP/FastMath/FastMathF16/ExpInput1_f16.txt create mode 100755 Testing/Patterns/DSP/FastMath/FastMathF16/Inverse1_f16.txt create mode 100755 Testing/Patterns/DSP/FastMath/FastMathF16/Log1_f16.txt create mode 100755 Testing/Patterns/DSP/FastMath/FastMathF16/LogInput1_f16.txt create mode 100755 Testing/Patterns/DSP/FastMath/FastMathF16/Samples1_f16.txt create mode 100755 Testing/Patterns/DSP/FastMath/FastMathF16/Sin1_f16.txt create mode 100755 Testing/Patterns/DSP/FastMath/FastMathF16/Sqrt1_f16.txt create mode 100755 Testing/Patterns/DSP/FastMath/FastMathF16/SqrtInput1_f16.txt create mode 100755 Testing/Patterns/DSP/Stats/StatsF16/Dims22_s16.txt create mode 100755 Testing/Patterns/DSP/Stats/StatsF16/Dims23_s16.txt create mode 100755 Testing/Patterns/DSP/Stats/StatsF16/Dims24_s16.txt create mode 100755 Testing/Patterns/DSP/Stats/StatsF16/Dims25_s16.txt create mode 100755 Testing/Patterns/DSP/Stats/StatsF16/Input22_f16.txt create mode 100755 Testing/Patterns/DSP/Stats/StatsF16/Input23_f16.txt create mode 100755 Testing/Patterns/DSP/Stats/StatsF16/InputA24_f16.txt create mode 100755 Testing/Patterns/DSP/Stats/StatsF16/InputA25_f16.txt create mode 100755 Testing/Patterns/DSP/Stats/StatsF16/InputB24_f16.txt create mode 100755 Testing/Patterns/DSP/Stats/StatsF16/InputB25_f16.txt create mode 100755 Testing/Patterns/DSP/Stats/StatsF16/RefEntropy22_f16.txt create mode 100755 Testing/Patterns/DSP/Stats/StatsF16/RefKL24_f16.txt create mode 100755 Testing/Patterns/DSP/Stats/StatsF16/RefLogSumExp23_f16.txt create mode 100755 Testing/Patterns/DSP/Stats/StatsF16/RefLogSumExpDot25_f16.txt create mode 100755 Testing/Source/Tests/FastMathF16.cpp diff --git a/Include/arm_common_tables_f16.h b/Include/arm_common_tables_f16.h index 6fd824c1..f6ef321b 100755 --- a/Include/arm_common_tables_f16.h +++ b/Include/arm_common_tables_f16.h @@ -113,6 +113,14 @@ extern "C" #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */ +#if !defined(__CC_ARM) && defined(ARM_FLOAT16_SUPPORTED) + +#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) + extern const float16_t exp_tab_f16[8]; + extern const float16_t __logf_lut_f16[8]; +#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) */ +#endif + #ifdef __cplusplus } diff --git a/Include/arm_math_types_f16.h b/Include/arm_math_types_f16.h index 6916e45e..92939b14 100755 --- a/Include/arm_math_types_f16.h +++ b/Include/arm_math_types_f16.h @@ -140,7 +140,7 @@ won't be built. #define F16_ABSMAX ((float16_t)FLT_MAX) #define F16_ABSMIN ((float16_t)0.0) - +#define F16INFINITY ((float16_t)0x07c00) #endif /* ARM_FLOAT16_SUPPORTED*/ #endif /* !defined( __CC_ARM ) */ diff --git a/Include/arm_vec_math_f16.h b/Include/arm_vec_math_f16.h new file mode 100755 index 00000000..bd292c94 --- /dev/null +++ b/Include/arm_vec_math_f16.h @@ -0,0 +1,232 @@ +/****************************************************************************** + * @file arm_vec_math_f16.h + * @brief Public header file for CMSIS DSP Library + ******************************************************************************/ +/* + * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _ARM_VEC_MATH_H +#define _ARM_VEC_MATH_H + +#include "arm_math_types_f16.h" +#include "arm_common_tables_f16.h" +#include "arm_helium_utils.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +#if defined(ARM_FLOAT16_SUPPORTED) + + +#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) + + +static const float16_t __logf_rng_f16=0.693147180f16; + + +/* fast inverse approximation (4x newton) */ +__STATIC_INLINE f16x8_t vrecip_hiprec_f16( + f16x8_t x) +{ + q15x8_t m; + f16x8_t b; + any16x8_t xinv; + f16x8_t ax = vabsq(x); + + xinv.f = ax; + + m = 0x03c00 - (xinv.i & 0x07c00); + xinv.i = xinv.i + m; + xinv.f = 1.41176471f16 - 0.47058824f16 * xinv.f; + xinv.i = xinv.i + m; + + b = 2.0f16 - xinv.f * ax; + xinv.f = xinv.f * b; + + b = 2.0f16 - xinv.f * ax; + xinv.f = xinv.f * b; + + b = 2.0f16 - xinv.f * ax; + xinv.f = xinv.f * b; + + b = 2.0f16 - xinv.f * ax; + xinv.f = xinv.f * b; + + xinv.f = vdupq_m(xinv.f, F16INFINITY, vcmpeqq(x, 0.0f)); + /* + * restore sign + */ + xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq(x, 0.0f)); + + return xinv.f; +} + +__STATIC_INLINE f16x8_t vdiv_f16( + f16x8_t num, f16x8_t den) +{ + return vmulq(num, vrecip_hiprec_f16(den)); +} + + +/** + @brief Single-precision taylor dev. + @param[in] x f16 vector input + @param[in] coeffs f16 vector coeffs + @return destination f16 vector + */ + +__STATIC_INLINE float16x8_t vtaylor_polyq_f16( + float16x8_t x, + const float16_t * coeffs) +{ + float16x8_t A = vfmasq(vdupq_n_f16(coeffs[4]), x, coeffs[0]); + float16x8_t B = vfmasq(vdupq_n_f16(coeffs[6]), x, coeffs[2]); + float16x8_t C = vfmasq(vdupq_n_f16(coeffs[5]), x, coeffs[1]); + float16x8_t D = vfmasq(vdupq_n_f16(coeffs[7]), x, coeffs[3]); + float16x8_t x2 = vmulq(x, x); + float16x8_t x4 = vmulq(x2, x2); + float16x8_t res = vfmaq(vfmaq_f16(A, B, x2), vfmaq_f16(C, D, x2), x4); + + return res; +} + +__STATIC_INLINE float16x8_t vmant_exp_f16( + float16x8_t x, + int16x8_t * e) +{ + any16x8_t r; + int16x8_t n; + + r.f = x; + n = r.i >> 10; + n = n - 15; + r.i = r.i - (n << 10); + + *e = n; + return r.f; +} + + +__STATIC_INLINE float16x8_t vlogq_f16(float16x8_t vecIn) +{ + q15x8_t vecExpUnBiased; + float16x8_t vecTmpFlt0, vecTmpFlt1; + float16x8_t vecAcc0, vecAcc1, vecAcc2, vecAcc3; + float16x8_t vecExpUnBiasedFlt; + + /* + * extract exponent + */ + vecTmpFlt1 = vmant_exp_f16(vecIn, &vecExpUnBiased); + + vecTmpFlt0 = vecTmpFlt1 * vecTmpFlt1; + /* + * a = (__logf_lut_f16[4] * r.f) + (__logf_lut_f16[0]); + */ + vecAcc0 = vdupq_n_f16(__logf_lut_f16[0]); + vecAcc0 = vfmaq(vecAcc0, vecTmpFlt1, __logf_lut_f16[4]); + /* + * b = (__logf_lut_f16[6] * r.f) + (__logf_lut_f16[2]); + */ + vecAcc1 = vdupq_n_f16(__logf_lut_f16[2]); + vecAcc1 = vfmaq(vecAcc1, vecTmpFlt1, __logf_lut_f16[6]); + /* + * c = (__logf_lut_f16[5] * r.f) + (__logf_lut_f16[1]); + */ + vecAcc2 = vdupq_n_f16(__logf_lut_f16[1]); + vecAcc2 = vfmaq(vecAcc2, vecTmpFlt1, __logf_lut_f16[5]); + /* + * d = (__logf_lut_f16[7] * r.f) + (__logf_lut_f16[3]); + */ + vecAcc3 = vdupq_n_f16(__logf_lut_f16[3]); + vecAcc3 = vfmaq(vecAcc3, vecTmpFlt1, __logf_lut_f16[7]); + /* + * a = a + b * xx; + */ + vecAcc0 = vfmaq(vecAcc0, vecAcc1, vecTmpFlt0); + /* + * c = c + d * xx; + */ + vecAcc2 = vfmaq(vecAcc2, vecAcc3, vecTmpFlt0); + /* + * xx = xx * xx; + */ + vecTmpFlt0 = vecTmpFlt0 * vecTmpFlt0; + vecExpUnBiasedFlt = vcvtq_f16_s16(vecExpUnBiased); + /* + * r.f = a + c * xx; + */ + vecAcc0 = vfmaq(vecAcc0, vecAcc2, vecTmpFlt0); + /* + * add exponent + * r.f = r.f + ((float32_t) m) * __logf_rng_f16; + */ + vecAcc0 = vfmaq(vecAcc0, vecExpUnBiasedFlt, __logf_rng_f16); + // set log0 down to -inf + vecAcc0 = vdupq_m(vecAcc0, -F16INFINITY, vcmpeqq(vecIn, 0.0f)); + return vecAcc0; +} + +__STATIC_INLINE float16x8_t vexpq_f16( + float16x8_t x) +{ + // Perform range reduction [-log(2),log(2)] + int16x8_t m = vcvtq_s16_f16(vmulq_n_f16(x, 1.4426950408f16)); + float16x8_t val = vfmsq_f16(x, vcvtq_f16_s16(m), vdupq_n_f16(0.6931471805f16)); + + // Polynomial Approximation + float16x8_t poly = vtaylor_polyq_f16(val, exp_tab_f16); + + // Reconstruct + poly = (float16x8_t) (vqaddq_s16((int16x8_t) (poly), vqshlq_n_s16(m, 10))); + + poly = vdupq_m(poly, 0.0f, vcmpltq_n_s16(m, -14)); + return poly; +} + +__STATIC_INLINE float16x8_t arm_vec_exponent_f16(float16x8_t x, int16_t nb) +{ + float16x8_t r = x; + nb--; + while (nb > 0) { + r = vmulq(r, x); + nb--; + } + return (r); +} + + + +#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)*/ + + + +#ifdef __cplusplus +} +#endif + +#endif /* ARM FLOAT16 SUPPORTED */ + +#endif /* _ARM_VEC_MATH_H */ + +/** + * + * End of file. + */ diff --git a/Include/dsp/basic_math_functions_f16.h b/Include/dsp/basic_math_functions_f16.h index 3227e626..91109983 100755 --- a/Include/dsp/basic_math_functions_f16.h +++ b/Include/dsp/basic_math_functions_f16.h @@ -36,7 +36,6 @@ extern "C" #include "dsp/none.h" #include "dsp/utils.h" -#include "dsp/fast_math_functions_f16.h" #if defined(ARM_FLOAT16_SUPPORTED) diff --git a/Include/dsp/fast_math_functions_f16.h b/Include/dsp/fast_math_functions_f16.h index cafeac0c..a0815767 100755 --- a/Include/dsp/fast_math_functions_f16.h +++ b/Include/dsp/fast_math_functions_f16.h @@ -31,6 +31,8 @@ #include "dsp/none.h" #include "dsp/utils.h" + +/* For sqrt_f32 */ #include "dsp/fast_math_functions.h" #ifdef __cplusplus @@ -69,6 +71,42 @@ __STATIC_FORCEINLINE arm_status arm_sqrt_f16( @} end of SQRT group */ +/** + @brief Floating-point vector of log values. + @param[in] pSrc points to the input vector + @param[out] pDst points to the output vector + @param[in] blockSize number of samples in each vector + @return none + */ + void arm_vlog_f16( + const float16_t * pSrc, + float16_t * pDst, + uint32_t blockSize); + +/** + @brief Floating-point vector of exp values. + @param[in] pSrc points to the input vector + @param[out] pDst points to the output vector + @param[in] blockSize number of samples in each vector + @return none + */ + void arm_vexp_f16( + const float16_t * pSrc, + float16_t * pDst, + uint32_t blockSize); + + /** + @brief Floating-point vector of inverse values. + @param[in] pSrc points to the input vector + @param[out] pDst points to the output vector + @param[in] blockSize number of samples in each vector + @return none + */ + void arm_vinverse_f16( + const float16_t * pSrc, + float16_t * pDst, + uint32_t blockSize); + #endif /*defined(ARM_FLOAT16_SUPPORTED)*/ #ifdef __cplusplus } diff --git a/Include/dsp/statistics_functions_f16.h b/Include/dsp/statistics_functions_f16.h index c891e3fe..d08e122f 100755 --- a/Include/dsp/statistics_functions_f16.h +++ b/Include/dsp/statistics_functions_f16.h @@ -123,6 +123,65 @@ extern "C" float16_t * pResult, uint32_t * pIndex); +/** + * @brief Entropy + * + * @param[in] pSrcA Array of input values. + * @param[in] blockSize Number of samples in the input array. + * @return Entropy -Sum(p ln p) + * + */ + + +float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize); + +float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize); + +/** + * @brief Dot product with log arithmetic + * + * Vectors are containing the log of the samples + * + * @param[in] pSrcA points to the first input vector + * @param[in] pSrcB points to the second input vector + * @param[in] blockSize number of samples in each vector + * @param[in] pTmpBuffer temporary buffer of length blockSize + * @return The log of the dot product . + * + */ + + +float16_t arm_logsumexp_dot_prod_f16(const float16_t * pSrcA, + const float16_t * pSrcB, + uint32_t blockSize, + float16_t *pTmpBuffer); + +/** + * @brief Kullback-Leibler + * + * @param[in] pSrcA Pointer to an array of input values for probability distribution A. + * @param[in] pSrcB Pointer to an array of input values for probability distribution B. + * @param[in] blockSize Number of samples in the input array. + * @return Kullback-Leibler Divergence D(A || B) + * + */ +float16_t arm_kullback_leibler_f16(const float16_t * pSrcA + ,const float16_t * pSrcB + ,uint32_t blockSize); + +/** + @brief Maximum value of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @return none + */ + void arm_max_no_idx_f16( + const float16_t *pSrc, + uint32_t blockSize, + float16_t *pResult); + + #endif /*defined(ARM_FLOAT16_SUPPORTED)*/ #ifdef __cplusplus diff --git a/Source/CommonTables/arm_common_tables_f16.c b/Source/CommonTables/arm_common_tables_f16.c index 4945f5fe..8298c89b 100755 --- a/Source/CommonTables/arm_common_tables_f16.c +++ b/Source/CommonTables/arm_common_tables_f16.c @@ -12550,6 +12550,32 @@ const float16_t twiddleCoefF16_rfft_4096[4096] = { #endif /*!defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)*/ +#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) +const float16_t exp_tab_f16[8] = { + (1.f16), + (0.0416598916054f16), + (0.500000596046f16), + (0.00138889f16), + (1.00000011921f16), + (0.00833693705499f16), + (0.166665703058f16), + (0.000195780929062f16), +}; + +const float16_t __logf_lut_f16[8] = { + -2.295614848256274f16, /*p0*/ + -2.470711633419806f16, /*p4*/ + -5.686926051100417f16, /*p2*/ + -0.165253547131978f16, /*p6*/ + +5.175912446351073f16, /*p1*/ + +0.844006986174912f16, /*p5*/ + +4.584458825456749f16, /*p3*/ + +0.014127821926000f16 /*p7*/ +}; + +#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) */ + + #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ #endif /* Not ARM AC5 */ diff --git a/Source/FastMathFunctions/CMakeLists.txt b/Source/FastMathFunctions/CMakeLists.txt index c784b52e..50f8109b 100644 --- a/Source/FastMathFunctions/CMakeLists.txt +++ b/Source/FastMathFunctions/CMakeLists.txt @@ -47,6 +47,12 @@ target_sources(CMSISDSPFastMath PRIVATE arm_sqrt_q31.c) target_sources(CMSISDSPFastMath PRIVATE arm_vlog_f32.c) target_sources(CMSISDSPFastMath PRIVATE arm_vexp_f32.c) +if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16)) +target_sources(CMSISDSPFastMath PRIVATE arm_vlog_f16.c) +target_sources(CMSISDSPFastMath PRIVATE arm_vexp_f16.c) +target_sources(CMSISDSPFastMath PRIVATE arm_vinverse_f16.c) +endif() + ### Includes target_include_directories(CMSISDSPFastMath PUBLIC "${DSP}/Include") diff --git a/Source/FastMathFunctions/FastMathFunctionsF16.c b/Source/FastMathFunctions/FastMathFunctionsF16.c new file mode 100755 index 00000000..a14e8998 --- /dev/null +++ b/Source/FastMathFunctions/FastMathFunctionsF16.c @@ -0,0 +1,31 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: FastMathFunctions.c + * Description: Combination of all fast math function source files. + * + * $Date: 16. March 2020 + * $Revision: V1.1.0 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_vexp_f16.c" +#include "arm_vlog_f16.c" +#include "arm_vinverse_f16.c" diff --git a/Source/FastMathFunctions/arm_vexp_f16.c b/Source/FastMathFunctions/arm_vexp_f16.c new file mode 100755 index 00000000..abfaf741 --- /dev/null +++ b/Source/FastMathFunctions/arm_vexp_f16.c @@ -0,0 +1,84 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_vlog_f16.c + * Description: Fast vectorized log + * + * $Date: 15. Octoboer 2020 + * $Revision: V1.6.0 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#include "arm_common_tables.h" + +#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM) || defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include "arm_vec_math_f16.h" +#endif + + +void arm_vexp_f16( + const float16_t * pSrc, + float16_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; + +#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) + + f16x8_t src; + f16x8_t dst; + + blkCnt = blockSize >> 3; + + while (blkCnt > 0U) + { + src = vld1q(pSrc); + dst = vexpq_f16(src); + vst1q(pDst, dst); + + pSrc += 8; + pDst += 8; + /* Decrement loop counter */ + blkCnt--; + } + + blkCnt = blockSize & 7; +#else + blkCnt = blockSize; +#endif + + while (blkCnt > 0U) + { + /* C = log(A) */ + + /* Calculate log and store result in destination buffer. */ + *pDst++ = expf(*pSrc++); + + /* Decrement loop counter */ + blkCnt--; + } +} + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Source/FastMathFunctions/arm_vinverse_f16.c b/Source/FastMathFunctions/arm_vinverse_f16.c new file mode 100755 index 00000000..8eb6488d --- /dev/null +++ b/Source/FastMathFunctions/arm_vinverse_f16.c @@ -0,0 +1,81 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_vinverse_f16.c + * Description: Fast vectorized inverse + * + * $Date: 15. Octoboer 2020 + * $Revision: V1.6.0 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#include "arm_common_tables.h" + +#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM) || defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include "arm_vec_math_f16.h" +#endif + +void arm_vinverse_f16( + const float16_t * pSrc, + float16_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; + +#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) + + f16x8_t src; + f16x8_t dst; + + blkCnt = blockSize >> 3; + + while (blkCnt > 0U) + { + src = vld1q(pSrc); + dst = vrecip_hiprec_f16(src); + vst1q(pDst, dst); + + pSrc += 8; + pDst += 8; + /* Decrement loop counter */ + blkCnt--; + } + + blkCnt = blockSize & 7; +#else + blkCnt = blockSize; +#endif + + while (blkCnt > 0U) + { + + *pDst++ = 1.0 / *pSrc++; + + /* Decrement loop counter */ + blkCnt--; + } +} + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Source/FastMathFunctions/arm_vlog_f16.c b/Source/FastMathFunctions/arm_vlog_f16.c new file mode 100755 index 00000000..88aa4c17 --- /dev/null +++ b/Source/FastMathFunctions/arm_vlog_f16.c @@ -0,0 +1,83 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_vlog_f16.c + * Description: Fast vectorized log + * + * $Date: 15. Octoboer 2020 + * $Revision: V1.6.0 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#include "arm_common_tables.h" + +#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM) || defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include "arm_vec_math_f16.h" +#endif + +void arm_vlog_f16( + const float16_t * pSrc, + float16_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; + +#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) + + f16x8_t src; + f16x8_t dst; + + blkCnt = blockSize >> 3; + + while (blkCnt > 0U) + { + src = vld1q(pSrc); + dst = vlogq_f16(src); + vst1q(pDst, dst); + + pSrc += 8; + pDst += 8; + /* Decrement loop counter */ + blkCnt--; + } + + blkCnt = blockSize & 7; +#else + blkCnt = blockSize; +#endif + + while (blkCnt > 0U) + { + /* C = log(A) */ + + /* Calculate log and store result in destination buffer. */ + *pDst++ = logf(*pSrc++); + + /* Decrement loop counter */ + blkCnt--; + } +} + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Source/SVMFunctions/arm_svm_linear_init_f32.c b/Source/SVMFunctions/arm_svm_linear_init_f32.c index b0739b1c..76171265 100755 --- a/Source/SVMFunctions/arm_svm_linear_init_f32.c +++ b/Source/SVMFunctions/arm_svm_linear_init_f32.c @@ -33,9 +33,18 @@ * */ +/** + @ingroup groupSVM + */ + +/** + @defgroup linearsvm Linear SVM + + Linear SVM classifier + */ /** - * @addtogroup groupSVM + * @addtogroup linearsvm * @{ */ @@ -77,5 +86,5 @@ void arm_svm_linear_init_f32(arm_svm_linear_instance_f32 *S, /** - * @} end of groupSVM group + * @} end of linearsvm group */ diff --git a/Source/SVMFunctions/arm_svm_linear_predict_f32.c b/Source/SVMFunctions/arm_svm_linear_predict_f32.c index 9cba7d1d..c45426ab 100755 --- a/Source/SVMFunctions/arm_svm_linear_predict_f32.c +++ b/Source/SVMFunctions/arm_svm_linear_predict_f32.c @@ -30,7 +30,7 @@ /** - * @addtogroup groupSVM + * @addtogroup linearsvm * @{ */ @@ -449,5 +449,5 @@ void arm_svm_linear_predict_f32( #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ /** - * @} end of groupSVM group + * @} end of linearsvm group */ diff --git a/Source/SVMFunctions/arm_svm_polynomial_init_f32.c b/Source/SVMFunctions/arm_svm_polynomial_init_f32.c index 45e89efb..5d427704 100755 --- a/Source/SVMFunctions/arm_svm_polynomial_init_f32.c +++ b/Source/SVMFunctions/arm_svm_polynomial_init_f32.c @@ -28,12 +28,20 @@ #include #include +/** + @ingroup groupSVM + */ /** - * @addtogroup groupSVM - * @{ + @defgroup polysvm Polynomial SVM + + Polynomial SVM classifier */ +/** + * @addtogroup polysvm + * @{ + */ /** @@ -83,5 +91,5 @@ void arm_svm_polynomial_init_f32(arm_svm_polynomial_instance_f32 *S, /** - * @} end of groupSVM group + * @} end of polysvm group */ diff --git a/Source/SVMFunctions/arm_svm_polynomial_predict_f32.c b/Source/SVMFunctions/arm_svm_polynomial_predict_f32.c index 4a108ffd..5fca1e58 100755 --- a/Source/SVMFunctions/arm_svm_polynomial_predict_f32.c +++ b/Source/SVMFunctions/arm_svm_polynomial_predict_f32.c @@ -33,7 +33,7 @@ #endif /** - * @addtogroup groupSVM + * @addtogroup polysvm * @{ */ @@ -484,5 +484,5 @@ void arm_svm_polynomial_predict_f32( /** - * @} end of groupSVM group + * @} end of polysvm group */ diff --git a/Source/SVMFunctions/arm_svm_rbf_init_f32.c b/Source/SVMFunctions/arm_svm_rbf_init_f32.c index c00f9eb4..5e0d3712 100755 --- a/Source/SVMFunctions/arm_svm_rbf_init_f32.c +++ b/Source/SVMFunctions/arm_svm_rbf_init_f32.c @@ -28,9 +28,19 @@ #include #include +/** + @ingroup groupSVM + */ + +/** + @defgroup rbfsvm RBF SVM + + RBF SVM classifier + */ + /** - * @addtogroup groupSVM + * @addtogroup rbfsvm * @{ */ @@ -75,5 +85,5 @@ void arm_svm_rbf_init_f32(arm_svm_rbf_instance_f32 *S, /** - * @} end of groupSVM group + * @} end of rbfsvm group */ diff --git a/Source/SVMFunctions/arm_svm_rbf_predict_f32.c b/Source/SVMFunctions/arm_svm_rbf_predict_f32.c index 0907dbbb..d29a2164 100755 --- a/Source/SVMFunctions/arm_svm_rbf_predict_f32.c +++ b/Source/SVMFunctions/arm_svm_rbf_predict_f32.c @@ -30,7 +30,7 @@ /** - * @addtogroup groupSVM + * @addtogroup rbfsvm * @{ */ @@ -517,5 +517,5 @@ void arm_svm_rbf_predict_f32( #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ /** - * @} end of groupSVM group + * @} end of rbfsvm group */ diff --git a/Source/SVMFunctions/arm_svm_sigmoid_init_f32.c b/Source/SVMFunctions/arm_svm_sigmoid_init_f32.c index 407b68f9..2017289f 100755 --- a/Source/SVMFunctions/arm_svm_sigmoid_init_f32.c +++ b/Source/SVMFunctions/arm_svm_sigmoid_init_f32.c @@ -28,9 +28,18 @@ #include #include +/** + @ingroup groupSVM + */ + +/** + @defgroup sigmoidsvm Sigmoid SVM + + Sigmoid SVM classifier + */ /** - * @addtogroup groupSVM + * @addtogroup sigmoidsvm * @{ */ @@ -77,5 +86,5 @@ void arm_svm_sigmoid_init_f32(arm_svm_sigmoid_instance_f32 *S, /** - * @} end of groupSVM group + * @} end of sigmoidsvm group */ diff --git a/Source/SVMFunctions/arm_svm_sigmoid_predict_f32.c b/Source/SVMFunctions/arm_svm_sigmoid_predict_f32.c index f15e236b..83972266 100755 --- a/Source/SVMFunctions/arm_svm_sigmoid_predict_f32.c +++ b/Source/SVMFunctions/arm_svm_sigmoid_predict_f32.c @@ -29,7 +29,7 @@ #include /** - * @addtogroup groupSVM + * @addtogroup sigmoidsvm * @{ */ @@ -481,5 +481,5 @@ void arm_svm_sigmoid_predict_f32( #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ /** - * @} end of groupSVM group + * @} end of sigmoidsvm group */ diff --git a/Source/StatisticsFunctions/CMakeLists.txt b/Source/StatisticsFunctions/CMakeLists.txt index b4310f30..67833f1f 100644 --- a/Source/StatisticsFunctions/CMakeLists.txt +++ b/Source/StatisticsFunctions/CMakeLists.txt @@ -59,4 +59,9 @@ target_sources(CMSISDSPStatistics PRIVATE arm_power_f16.c) target_sources(CMSISDSPStatistics PRIVATE arm_rms_f16.c) target_sources(CMSISDSPStatistics PRIVATE arm_std_f16.c) target_sources(CMSISDSPStatistics PRIVATE arm_var_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_dot_prod_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f16.c) endif() \ No newline at end of file diff --git a/Source/StatisticsFunctions/StatisticsFunctionsF16.c b/Source/StatisticsFunctions/StatisticsFunctionsF16.c index b542e976..14ceb1d1 100755 --- a/Source/StatisticsFunctions/StatisticsFunctionsF16.c +++ b/Source/StatisticsFunctions/StatisticsFunctionsF16.c @@ -33,3 +33,8 @@ #include "arm_rms_f16.c" #include "arm_std_f16.c" #include "arm_var_f16.c" +#include "arm_entropy_f16.c" +#include "arm_kullback_leibler_f16.c" +#include "arm_logsumexp_dot_prod_f16.c" +#include "arm_logsumexp_f16.c" +#include "arm_max_no_idx_f16.c" diff --git a/Source/StatisticsFunctions/arm_entropy_f16.c b/Source/StatisticsFunctions/arm_entropy_f16.c new file mode 100755 index 00000000..a3501782 --- /dev/null +++ b/Source/StatisticsFunctions/arm_entropy_f16.c @@ -0,0 +1,138 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_logsumexp_f16.c + * Description: LogSumExp + * + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#include +#include + +/** + @ingroup groupStats + */ + +/** + @defgroup Entropy Entropy + + Computes the entropy of a distribution + + */ + +/** + * @addtogroup Entropy + * @{ + */ + + +/** + * @brief Entropy + * + * @param[in] pSrcA Array of input values. + * @param[in] blockSize Number of samples in the input array. + * @return Entropy -Sum(p ln p) + * + */ + +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +#include "arm_vec_math_f16.h" + +float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize) +{ + uint32_t blkCnt; + float16_t accum=0.0f,p; + + + blkCnt = blockSize; + + f16x8_t vSum = vdupq_n_f16(0.0f); + /* Compute 4 outputs at a time */ + blkCnt = blockSize >> 3U; + + while (blkCnt > 0U) + { + f16x8_t vecIn = vld1q(pSrcA); + + vSum = vaddq_f16(vSum, vmulq(vecIn, vlogq_f16(vecIn))); + + /* + * Decrement the blockSize loop counter + * Advance vector source and destination pointers + */ + pSrcA += 8; + blkCnt --; + } + + accum = vecAddAcrossF16Mve(vSum); + + /* Tail */ + blkCnt = blockSize & 0x7; + while(blkCnt > 0) + { + p = *pSrcA++; + accum += p * logf(p); + + blkCnt--; + + } + + return (-accum); +} + +#else + +float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize) +{ + const float16_t *pIn; + uint32_t blkCnt; + float16_t accum, p; + + pIn = pSrcA; + blkCnt = blockSize; + + accum = 0.0f; + + while(blkCnt > 0) + { + p = *pIn++; + accum += p * logf(p); + + blkCnt--; + + } + + return(-accum); +} +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + * @} end of Entropy group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Source/StatisticsFunctions/arm_entropy_f32.c b/Source/StatisticsFunctions/arm_entropy_f32.c index 60c325dc..ede07ecc 100755 --- a/Source/StatisticsFunctions/arm_entropy_f32.c +++ b/Source/StatisticsFunctions/arm_entropy_f32.c @@ -30,7 +30,7 @@ /** - * @addtogroup groupStats + * @addtogroup Entropy * @{ */ @@ -168,5 +168,5 @@ float32_t arm_entropy_f32(const float32_t * pSrcA,uint32_t blockSize) #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ /** - * @} end of groupStats group + * @} end of Entropy group */ diff --git a/Source/StatisticsFunctions/arm_entropy_f64.c b/Source/StatisticsFunctions/arm_entropy_f64.c index d2a6a495..7a9de50f 100755 --- a/Source/StatisticsFunctions/arm_entropy_f64.c +++ b/Source/StatisticsFunctions/arm_entropy_f64.c @@ -29,7 +29,7 @@ #include /** - * @addtogroup groupStats + * @addtogroup Entropy * @{ */ @@ -67,5 +67,5 @@ float64_t arm_entropy_f64(const float64_t * pSrcA, uint32_t blockSize) } /** - * @} end of groupStats group + * @} end of Entropy group */ diff --git a/Source/StatisticsFunctions/arm_kullback_leibler_f16.c b/Source/StatisticsFunctions/arm_kullback_leibler_f16.c new file mode 100755 index 00000000..06f4ba65 --- /dev/null +++ b/Source/StatisticsFunctions/arm_kullback_leibler_f16.c @@ -0,0 +1,150 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_logsumexp_f16.c + * Description: LogSumExp + * + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#include +#include + +/** + @ingroup groupStats + */ + +/** + @defgroup Kullback-Leibler Kullback-Leibler divergence + + Computes the Kullback-Leibler divergence between two distributions + + */ + + +/** + * @addtogroup Kullback-Leibler + * @{ + */ + + +/** + * @brief Kullback-Leibler + * + * Distribution A may contain 0 with Neon version. + * Result will be right but some exception flags will be set. + * + * Distribution B must not contain 0 probability. + * + * @param[in] *pSrcA points to an array of input values for probaility distribution A. + * @param[in] *pSrcB points to an array of input values for probaility distribution B. + * @param[in] blockSize number of samples in the input array. + * @return Kullback-Leibler divergence D(A || B) + * + */ + +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +#include "arm_vec_math_f16.h" + +float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSrcB,uint32_t blockSize) +{ + uint32_t blkCnt; + float16_t accum, pA,pB; + + + blkCnt = blockSize; + + accum = 0.0f; + + f16x8_t vSum = vdupq_n_f16(0.0f); + blkCnt = blockSize >> 3; + while(blkCnt > 0) + { + f16x8_t vecA = vld1q(pSrcA); + f16x8_t vecB = vld1q(pSrcB); + f16x8_t vRatio; + + vRatio = vdiv_f16(vecB, vecA); + vSum = vaddq_f16(vSum, vmulq(vecA, vlogq_f16(vRatio))); + + /* + * Decrement the blockSize loop counter + * Advance vector source and destination pointers + */ + pSrcA += 8; + pSrcB += 8; + blkCnt --; + } + + accum = vecAddAcrossF16Mve(vSum); + + blkCnt = blockSize & 7; + while(blkCnt > 0) + { + pA = *pSrcA++; + pB = *pSrcB++; + accum += pA * logf(pB / pA); + + blkCnt--; + + } + + return(-accum); +} + +#else +float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSrcB,uint32_t blockSize) +{ + const float16_t *pInA, *pInB; + uint32_t blkCnt; + float16_t accum, pA,pB; + + pInA = pSrcA; + pInB = pSrcB; + blkCnt = blockSize; + + accum = 0.0f; + + while(blkCnt > 0) + { + pA = *pInA++; + pB = *pInB++; + accum += pA * logf(pB / pA); + + blkCnt--; + + } + + return(-accum); +} +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + * @} end of Kullback-Leibler group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Source/StatisticsFunctions/arm_kullback_leibler_f32.c b/Source/StatisticsFunctions/arm_kullback_leibler_f32.c index b6647579..e7817cbb 100755 --- a/Source/StatisticsFunctions/arm_kullback_leibler_f32.c +++ b/Source/StatisticsFunctions/arm_kullback_leibler_f32.c @@ -30,7 +30,7 @@ /** - * @addtogroup groupStats + * @addtogroup Kullback-Leibler * @{ */ @@ -187,5 +187,5 @@ float32_t arm_kullback_leibler_f32(const float32_t * pSrcA,const float32_t * pSr #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ /** - * @} end of groupStats group + * @} end of Kullback-Leibler group */ diff --git a/Source/StatisticsFunctions/arm_kullback_leibler_f64.c b/Source/StatisticsFunctions/arm_kullback_leibler_f64.c index ded2c6c3..e56dc828 100755 --- a/Source/StatisticsFunctions/arm_kullback_leibler_f64.c +++ b/Source/StatisticsFunctions/arm_kullback_leibler_f64.c @@ -29,7 +29,7 @@ #include /** - * @addtogroup groupStats + * @addtogroup Kullback-Leibler * @{ */ @@ -69,5 +69,5 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA, const float64_t * pS } /** - * @} end of groupStats group + * @} end of Kullback-Leibler group */ diff --git a/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f16.c b/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f16.c new file mode 100755 index 00000000..b76d17bd --- /dev/null +++ b/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f16.c @@ -0,0 +1,82 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_logsumexp_f16.c + * Description: LogSumExp + * + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#include +#include + +/** + @ingroup groupStats + */ + +/** + @defgroup LogSumExp LogSumExp + + LogSumExp optimizations to compute sum of probabilities with Gaussian distributions + + */ + +/** + * @addtogroup LogSumExp + * @{ + */ + + +/** + * @brief Dot product with log arithmetic + * + * Vectors are containing the log of the samples + * + * @param[in] *pSrcA points to the first input vector + * @param[in] *pSrcB points to the second input vector + * @param[in] blockSize number of samples in each vector + * @param[in] *pTmpBuffer temporary buffer of length blockSize + * @return The log of the dot product. + * + */ + + +float16_t arm_logsumexp_dot_prod_f16(const float16_t * pSrcA, + const float16_t * pSrcB, + uint32_t blockSize, + float16_t *pTmpBuffer) +{ + float16_t result; + arm_add_f16((float16_t*)pSrcA, (float16_t*)pSrcB, pTmpBuffer, blockSize); + + result = arm_logsumexp_f16(pTmpBuffer, blockSize); + return(result); +} + +/** + * @} end of LogSumExp group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f32.c b/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f32.c index 12ff09b7..a473ef40 100755 --- a/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f32.c +++ b/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f32.c @@ -30,7 +30,7 @@ /** - * @addtogroup groupStats + * @addtogroup LogSumExp * @{ */ @@ -62,5 +62,5 @@ float32_t arm_logsumexp_dot_prod_f32(const float32_t * pSrcA, } /** - * @} end of groupStats group + * @} end of LogSumExp group */ diff --git a/Source/StatisticsFunctions/arm_logsumexp_f16.c b/Source/StatisticsFunctions/arm_logsumexp_f16.c new file mode 100755 index 00000000..c851bf7e --- /dev/null +++ b/Source/StatisticsFunctions/arm_logsumexp_f16.c @@ -0,0 +1,170 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_logsumexp_f16.c + * Description: LogSumExp + * + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#include +#include + + +/** + * @addtogroup LogSumExp + * @{ + */ + + +/** + * @brief Computation of the LogSumExp + * + * In probabilistic computations, the dynamic of the probability values can be very + * wide because they come from gaussian functions. + * To avoid underflow and overflow issues, the values are represented by their log. + * In this representation, multiplying the original exp values is easy : their logs are added. + * But adding the original exp values is requiring some special handling and it is the + * goal of the LogSumExp function. + * + * If the values are x1...xn, the function is computing: + * + * ln(exp(x1) + ... + exp(xn)) and the computation is done in such a way that + * rounding issues are minimised. + * + * The max xm of the values is extracted and the function is computing: + * xm + ln(exp(x1 - xm) + ... + exp(xn - xm)) + * + * @param[in] *in Pointer to an array of input values. + * @param[in] blockSize Number of samples in the input array. + * @return LogSumExp + * + */ + +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +#include "arm_vec_math_f16.h" + +float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize) +{ + float16_t maxVal; + const float16_t *pIn; + int32_t blkCnt; + float16_t accum=0.0f16; + float16_t tmp; + + + arm_max_no_idx_f16((float16_t *) in, blockSize, &maxVal); + + + blkCnt = blockSize; + pIn = in; + + + f16x8_t vSum = vdupq_n_f16(0.0f16); + blkCnt = blockSize >> 3; + while(blkCnt > 0) + { + f16x8_t vecIn = vld1q(pIn); + f16x8_t vecExp; + + vecExp = vexpq_f16(vsubq_n_f16(vecIn, maxVal)); + + vSum = vaddq_f16(vSum, vecExp); + + /* + * Decrement the blockSize loop counter + * Advance vector source and destination pointers + */ + pIn += 8; + blkCnt --; + } + + /* sum + log */ + accum = vecAddAcrossF16Mve(vSum); + + blkCnt = blockSize & 0x7; + while(blkCnt > 0) + { + tmp = *pIn++; + accum += expf(tmp - maxVal); + blkCnt--; + + } + + accum = maxVal + logf(accum); + + return (accum); +} + +#else +float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize) +{ + float16_t maxVal; + float16_t tmp; + const float16_t *pIn; + uint32_t blkCnt; + float16_t accum; + + pIn = in; + blkCnt = blockSize; + + maxVal = *pIn++; + blkCnt--; + + while(blkCnt > 0) + { + tmp = *pIn++; + + if (tmp > maxVal) + { + maxVal = tmp; + } + blkCnt--; + + } + + blkCnt = blockSize; + pIn = in; + accum = 0; + while(blkCnt > 0) + { + tmp = *pIn++; + accum += expf(tmp - maxVal); + blkCnt--; + + } + accum = maxVal + logf(accum); + + return(accum); +} +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + * @} end of LogSumExp group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Source/StatisticsFunctions/arm_logsumexp_f32.c b/Source/StatisticsFunctions/arm_logsumexp_f32.c index c2e1926d..2233e042 100755 --- a/Source/StatisticsFunctions/arm_logsumexp_f32.c +++ b/Source/StatisticsFunctions/arm_logsumexp_f32.c @@ -30,7 +30,7 @@ /** - * @addtogroup groupStats + * @addtogroup LogSumExp * @{ */ @@ -271,5 +271,5 @@ float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize) #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ /** - * @} end of groupStats group + * @} end of LogSumExp group */ diff --git a/Source/StatisticsFunctions/arm_max_no_idx_f16.c b/Source/StatisticsFunctions/arm_max_no_idx_f16.c new file mode 100755 index 00000000..88a6beec --- /dev/null +++ b/Source/StatisticsFunctions/arm_max_no_idx_f16.c @@ -0,0 +1,144 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_max_no_idx_f16.c + * Description: Maximum value of a floating-point vector without returning the index + * + * $Date: 16. October 2020 + * $Revision: V1.6.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include +#endif + +/** + @ingroup groupStats + */ + + +/** + @addtogroup Max + @{ + */ + +/** + @brief Maximum value of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +void arm_max_no_idx_f16( + const float16_t *pSrc, + uint32_t blockSize, + float16_t *pResult) +{ + f16x8_t vecSrc; + f16x8_t curExtremValVec = vdupq_n_f16(F16_MIN); + float16_t maxValue = F16_MIN; + float16_t newVal; + uint32_t blkCnt; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 3U; + + while (blkCnt > 0U) + { + + vecSrc = vldrhq_f16(pSrc); + /* + * update per-lane max. + */ + curExtremValVec = vmaxnmq(vecSrc, curExtremValVec); + /* + * Decrement the blockSize loop counter + * Advance vector source and destination pointers + */ + pSrc += 8; + blkCnt --; + } + /* + * Get max value across the vector + */ + maxValue = vmaxnmvq(maxValue, curExtremValVec); + + blkCnt = blockSize & 7; + + while (blkCnt > 0U) + { + newVal = *pSrc++; + + /* compare for the maximum value */ + if (maxValue < newVal) + { + /* Update the maximum value and it's index */ + maxValue = newVal; + } + + blkCnt --; + } + + *pResult = maxValue; +} + +#else + +void arm_max_no_idx_f16( + const float16_t *pSrc, + uint32_t blockSize, + float16_t *pResult) +{ + float16_t maxValue = F16_MIN; + float16_t newVal; + + while (blockSize > 0U) + { + newVal = *pSrc++; + + /* compare for the maximum value */ + if (maxValue < newVal) + { + /* Update the maximum value and it's index */ + maxValue = newVal; + } + + blockSize --; + } + + *pResult = maxValue; +} + +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of Max group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Source/StatisticsFunctions/arm_var_f16.c b/Source/StatisticsFunctions/arm_var_f16.c index 727a5944..af35628d 100755 --- a/Source/StatisticsFunctions/arm_var_f16.c +++ b/Source/StatisticsFunctions/arm_var_f16.c @@ -74,14 +74,12 @@ void arm_var_f16( arm_mean_f16(pSrc, blockSize, &fMean); /* 6.14 bug */ -#if defined(SDCOMP_xxx) -#if defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100) +#if defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100) && (__ARMCC_VERSION < 6150001) __asm volatile( " vmov.i32 %[acc], #0 \n" : [acc] "+t"(sumVec) : : ); -#endif #endif blkCnt = blockSize; diff --git a/Source/SupportFunctions/arm_barycenter_f16.c b/Source/SupportFunctions/arm_barycenter_f16.c index 18ae0f43..14bcc864 100755 --- a/Source/SupportFunctions/arm_barycenter_f16.c +++ b/Source/SupportFunctions/arm_barycenter_f16.c @@ -31,11 +31,21 @@ #include #include - /** @ingroup groupSupport */ +/** + @defgroup barycenter Barycenter + + Barycenter of weighted vectors + */ + +/** + @addtogroup barycenter + @{ + */ + /** * @brief Barycenter @@ -255,7 +265,7 @@ void arm_barycenter_f16(const float16_t *in, const float16_t *weights, float16_t #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ /** - * @} end of groupSupport group + * @} end of barycenter group */ #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ diff --git a/Source/SupportFunctions/arm_barycenter_f32.c b/Source/SupportFunctions/arm_barycenter_f32.c index e8040931..a08fab18 100755 --- a/Source/SupportFunctions/arm_barycenter_f32.c +++ b/Source/SupportFunctions/arm_barycenter_f32.c @@ -30,7 +30,7 @@ /** - @ingroup groupSupport + @ingroup barycenter */ @@ -408,5 +408,5 @@ void arm_barycenter_f32(const float32_t *in, const float32_t *weights, float32_t #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ /** - * @} end of groupSupport group + * @} end of barycenter group */ diff --git a/Source/SupportFunctions/arm_weighted_sum_f16.c b/Source/SupportFunctions/arm_weighted_sum_f16.c index 1f3df946..1d2c6efa 100755 --- a/Source/SupportFunctions/arm_weighted_sum_f16.c +++ b/Source/SupportFunctions/arm_weighted_sum_f16.c @@ -32,9 +32,19 @@ #if defined(ARM_FLOAT16_SUPPORTED) +/** + @ingroup groupSupport + */ + +/** + @defgroup weightedsum Weighted Sum + + Weighted sum of values + */ + /** - * @addtogroup groupSupport + * @addtogroup weightedsum * @{ */ @@ -128,7 +138,7 @@ float16_t arm_weighted_sum_f16(const float16_t *in, const float16_t *weigths, ui #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ /** - * @} end of groupSupport group + * @} end of weightedsum group */ #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ diff --git a/Source/SupportFunctions/arm_weighted_sum_f32.c b/Source/SupportFunctions/arm_weighted_sum_f32.c index 9bbfe1d4..fd8d5358 100755 --- a/Source/SupportFunctions/arm_weighted_sum_f32.c +++ b/Source/SupportFunctions/arm_weighted_sum_f32.c @@ -31,7 +31,7 @@ #include "dsp/support_functions.h" /** - * @addtogroup groupSupport + * @addtogroup weightedsum * @{ */ @@ -182,5 +182,5 @@ float32_t arm_weighted_sum_f32(const float32_t *in, const float32_t *weigths, ui #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ /** - * @} end of groupSupport group + * @} end of weightedsum group */ diff --git a/Testing/CMakeLists.txt b/Testing/CMakeLists.txt index ebc02ecc..e6f46703 100644 --- a/Testing/CMakeLists.txt +++ b/Testing/CMakeLists.txt @@ -339,6 +339,7 @@ set(TESTSRC16 Source/Tests/TransformRF16.cpp Source/Tests/SupportTestsF16.cpp Source/Tests/SupportBarTestsF16.cpp + Source/Tests/FastMathF16.cpp ) endif() endif() diff --git a/Testing/Include/Tests/FastMathF16.h b/Testing/Include/Tests/FastMathF16.h new file mode 100755 index 00000000..d0fcf844 --- /dev/null +++ b/Testing/Include/Tests/FastMathF16.h @@ -0,0 +1,23 @@ +#include "Test.h" +#include "Pattern.h" + +#include "dsp/fast_math_functions_f16.h" + +class FastMathF16:public Client::Suite + { + public: + FastMathF16(Testing::testID_t id); + virtual void setUp(Testing::testID_t,std::vector& paramsArgs,Client::PatternMgr *mgr); + virtual void tearDown(Testing::testID_t,Client::PatternMgr *mgr); + private: + #include "FastMathF16_decl.h" + + Client::Pattern input; + + Client::LocalPattern output; + + // Reference patterns are not loaded when we are in dump mode + Client::RefPattern ref; + + + }; diff --git a/Testing/PatternGeneration/FastMath.py b/Testing/PatternGeneration/FastMath.py index e0b1cb5d..6dc9b132 100755 --- a/Testing/PatternGeneration/FastMath.py +++ b/Testing/PatternGeneration/FastMath.py @@ -28,7 +28,7 @@ def writeTests(config,format): vals[0] = -0.4 sqrtvals[0] = 0.0 - if format != 0: + if format != 0 and format != 16: angles=np.concatenate((a1,a2,a1)) angles = angles / (2*math.pi) config.writeInput(1, angles,"Angles") @@ -43,7 +43,7 @@ def writeTests(config,format): config.writeInput(1, samples,"Samples") -def writeTestsF32(config,format): +def writeTestsFloat(config,format): writeTests(config,format) data1 = np.random.randn(20) @@ -61,22 +61,30 @@ def writeTestsF32(config,format): v = np.exp(samples) config.writeReference(1, v,"Exp") - # For benchmarks + # For benchmarks and other tests samples=np.random.randn(NBSAMPLES) samples = np.abs(Tools.normalize(samples)) config.writeInput(1, samples,"Samples") + v = 1.0 / samples + config.writeReference(1, v,"Inverse") + + + + def generatePatterns(): PATTERNDIR = os.path.join("Patterns","DSP","FastMath","FastMath") PARAMDIR = os.path.join("Parameters","DSP","FastMath","FastMath") configf32=Tools.Config(PATTERNDIR,PARAMDIR,"f32") + configf16=Tools.Config(PATTERNDIR,PARAMDIR,"f16") configq31=Tools.Config(PATTERNDIR,PARAMDIR,"q31") configq15=Tools.Config(PATTERNDIR,PARAMDIR,"q15") - writeTestsF32(configf32,0) + writeTestsFloat(configf32,0) + writeTestsFloat(configf16,16) writeTests(configq31,31) writeTests(configq15,15) diff --git a/Testing/PatternGeneration/Stats.py b/Testing/PatternGeneration/Stats.py index e0610eb5..9ff92ad9 100755 --- a/Testing/PatternGeneration/Stats.py +++ b/Testing/PatternGeneration/Stats.py @@ -116,6 +116,13 @@ def logSumExpDotTest(config,nb): config.writeInputS16(nb, dims,"Dims") config.writeReference(nb, outputs,"RefLogSumExpDot") +def writeF16OnlyTests(config,nb): + entropyTest(config,nb) + logsumexpTest(config,nb+1) + klTest(config,nb+2) + logSumExpDotTest(config,nb+3) + return(nb+4) + def writeF32OnlyTests(config,nb): entropyTest(config,nb) logsumexpTest(config,nb+1) @@ -357,6 +364,7 @@ def generatePatterns(): writeTests(configq7,1,7) nb=writeTests(configf16,1,16) + nb=writeF16OnlyTests(configf16,22) if __name__ == '__main__': generatePatterns() diff --git a/Testing/Patterns/DSP/FastMath/FastMathF16/Angles1_f16.txt b/Testing/Patterns/DSP/FastMath/FastMathF16/Angles1_f16.txt new file mode 100755 index 00000000..23d60b64 --- /dev/null +++ b/Testing/Patterns/DSP/FastMath/FastMathF16/Angles1_f16.txt @@ -0,0 +1,48 @@ +H +23 +// 0.000000 +0x0 +// 0.785398 +0x3a48 +// 1.570796 +0x3e48 +// 2.356194 +0x40b6 +// 3.141593 +0x4248 +// 3.926991 +0x43db +// 4.712389 +0x44b6 +// 6.283184 +0x4648 +// -0.785398 +0xba48 +// -1.570796 +0xbe48 +// -2.356194 +0xc0b6 +// -3.141593 +0xc248 +// -3.926991 +0xc3db +// -4.712389 +0xc4b6 +// -6.283186 +0xc648 +// 6.283185 +0x4648 +// 7.068583 +0x4712 +// 7.853982 +0x47db +// 8.639380 +0x4852 +// 9.424778 +0x48b6 +// 10.210176 +0x491b +// 10.995574 +0x497f +// 12.566370 +0x4a48 diff --git a/Testing/Patterns/DSP/FastMath/FastMathF16/Cos1_f16.txt b/Testing/Patterns/DSP/FastMath/FastMathF16/Cos1_f16.txt new file mode 100755 index 00000000..1162760e --- /dev/null +++ b/Testing/Patterns/DSP/FastMath/FastMathF16/Cos1_f16.txt @@ -0,0 +1,48 @@ +H +23 +// 1.000000 +0x3c00 +// 0.707107 +0x39a8 +// 0.000000 +0x0 +// -0.707107 +0xb9a8 +// -1.000000 +0xbc00 +// -0.707107 +0xb9a8 +// -0.000000 +0x8000 +// 1.000000 +0x3c00 +// 0.707107 +0x39a8 +// 0.000000 +0x0 +// -0.707107 +0xb9a8 +// -1.000000 +0xbc00 +// -0.707107 +0xb9a8 +// -0.000000 +0x8000 +// 1.000000 +0x3c00 +// 1.000000 +0x3c00 +// 0.707107 +0x39a8 +// 0.000000 +0x0 +// -0.707107 +0xb9a8 +// -1.000000 +0xbc00 +// -0.707107 +0xb9a8 +// -0.000000 +0x8000 +// 1.000000 +0x3c00 diff --git a/Testing/Patterns/DSP/FastMath/FastMathF16/Exp1_f16.txt b/Testing/Patterns/DSP/FastMath/FastMathF16/Exp1_f16.txt new file mode 100755 index 00000000..ee6530f5 --- /dev/null +++ b/Testing/Patterns/DSP/FastMath/FastMathF16/Exp1_f16.txt @@ -0,0 +1,106 @@ +H +52 +// 1.000000 +0x3c00 +// 2.718282 +0x4170 +// 0.670320 +0x395d +// 0.681354 +0x3973 +// 0.692569 +0x398a +// 0.703969 +0x39a2 +// 0.715557 +0x39b9 +// 0.727336 +0x39d2 +// 0.739308 +0x39ea +// 0.751477 +0x3a03 +// 0.763847 +0x3a1c +// 0.776420 +0x3a36 +// 0.789201 +0x3a50 +// 0.802191 +0x3a6b +// 0.815396 +0x3a86 +// 0.828818 +0x3aa1 +// 0.842460 +0x3abd +// 0.856328 +0x3ada +// 0.870423 +0x3af7 +// 0.884751 +0x3b14 +// 0.899315 +0x3b32 +// 0.914118 +0x3b50 +// 0.929165 +0x3b6f +// 0.944459 +0x3b8e +// 0.960005 +0x3bae +// 0.975808 +0x3bce +// 0.991870 +0x3bef +// 1.008197 +0x3c08 +// 1.024792 +0x3c19 +// 1.041661 +0x3c2b +// 1.058807 +0x3c3c +// 1.076236 +0x3c4e +// 1.093951 +0x3c60 +// 1.111958 +0x3c73 +// 1.130261 +0x3c85 +// 1.148866 +0x3c98 +// 1.167777 +0x3cac +// 1.186999 +0x3cbf +// 1.206538 +0x3cd3 +// 1.226398 +0x3ce8 +// 1.246585 +0x3cfd +// 1.267105 +0x3d12 +// 1.287962 +0x3d27 +// 1.309163 +0x3d3d +// 1.330712 +0x3d53 +// 1.352616 +0x3d69 +// 1.374881 +0x3d80 +// 1.397513 +0x3d97 +// 1.420516 +0x3daf +// 1.443899 +0x3dc7 +// 1.467666 +0x3ddf +// 1.491825 +0x3df8 diff --git a/Testing/Patterns/DSP/FastMath/FastMathF16/ExpInput1_f16.txt b/Testing/Patterns/DSP/FastMath/FastMathF16/ExpInput1_f16.txt new file mode 100755 index 00000000..97b0292e --- /dev/null +++ b/Testing/Patterns/DSP/FastMath/FastMathF16/ExpInput1_f16.txt @@ -0,0 +1,106 @@ +H +52 +// 0.000000 +0x0 +// 1.000000 +0x3c00 +// -0.400000 +0xb666 +// -0.383673 +0xb624 +// -0.367347 +0xb5e1 +// -0.351020 +0xb59e +// -0.334694 +0xb55b +// -0.318367 +0xb518 +// -0.302041 +0xb4d5 +// -0.285714 +0xb492 +// -0.269388 +0xb44f +// -0.253061 +0xb40d +// -0.236735 +0xb393 +// -0.220408 +0xb30e +// -0.204082 +0xb288 +// -0.187755 +0xb202 +// -0.171429 +0xb17c +// -0.155102 +0xb0f7 +// -0.138776 +0xb071 +// -0.122449 +0xafd6 +// -0.106122 +0xaecb +// -0.089796 +0xadbf +// -0.073469 +0xacb4 +// -0.057143 +0xab50 +// -0.040816 +0xa939 +// -0.024490 +0xa645 +// -0.008163 +0xa02e +// 0.008163 +0x202e +// 0.024490 +0x2645 +// 0.040816 +0x2939 +// 0.057143 +0x2b50 +// 0.073469 +0x2cb4 +// 0.089796 +0x2dbf +// 0.106122 +0x2ecb +// 0.122449 +0x2fd6 +// 0.138776 +0x3071 +// 0.155102 +0x30f7 +// 0.171429 +0x317c +// 0.187755 +0x3202 +// 0.204082 +0x3288 +// 0.220408 +0x330e +// 0.236735 +0x3393 +// 0.253061 +0x340d +// 0.269388 +0x344f +// 0.285714 +0x3492 +// 0.302041 +0x34d5 +// 0.318367 +0x3518 +// 0.334694 +0x355b +// 0.351020 +0x359e +// 0.367347 +0x35e1 +// 0.383673 +0x3624 +// 0.400000 +0x3666 diff --git a/Testing/Patterns/DSP/FastMath/FastMathF16/Inverse1_f16.txt b/Testing/Patterns/DSP/FastMath/FastMathF16/Inverse1_f16.txt new file mode 100755 index 00000000..feaf72c5 --- /dev/null +++ b/Testing/Patterns/DSP/FastMath/FastMathF16/Inverse1_f16.txt @@ -0,0 +1,514 @@ +H +256 +// 13.282788 +0x4aa4 +// 74.795943 +0x54ad +// 4.534229 +0x4489 +// 3.745299 +0x437e +// 2.439216 +0x40e1 +// 6.181096 +0x462e +// 4.004707 +0x4401 +// 11.283654 +0x49a4 +// 1.847747 +0x3f64 +// 5.549382 +0x458d +// 1.536148 +0x3e25 +// 30.005060 +0x4f80 +// 31.475099 +0x4fde +// 17.543195 +0x4c63 +// 2.419261 +0x40d7 +// 4.349753 +0x445a +// 4.841152 +0x44d7 +// 8.101053 +0x480d +// 7.775466 +0x47c7 +// 10.960573 +0x497b +// 1.935619 +0x3fbe +// 14.846755 +0x4b6c +// 2.538388 +0x4114 +// 2.328174 +0x40a8 +// 1.187660 +0x3cc0 +// 9.459035 +0x48bb +// 6.531679 +0x4688 +// 28.111782 +0x4f07 +// 2.775787 +0x418d +// 1.610728 +0x3e71 +// 4.295367 +0x444c +// 8.503271 +0x4840 +// 6.431921 +0x466f +// 1.644415 +0x3e94 +// 5.022253 +0x4506 +// 46.675156 +0x51d6 +// 7.923678 +0x47ec +// 1.986785 +0x3ff2 +// 12.264212 +0x4a22 +// 5.127496 +0x4521 +// 3.526537 +0x430e +// 3.099069 +0x4233 +// 11.281743 +0x49a4 +// 5.690813 +0x45b1 +// 25.784472 +0x4e72 +// 31.172867 +0x4fcb +// 5.406237 +0x4568 +// 1.003867 +0x3c04 +// 2.474874 +0x40f3 +// 5.259653 +0x4542 +// 6.833799 +0x46d5 +// 2.146320 +0x404b +// 4.075154 +0x4413 +// 6.755841 +0x46c1 +// 3.298992 +0x4299 +// 3.211582 +0x426c +// 154.452815 +0x58d4 +// 2.467569 +0x40ef +// 13.353500 +0x4aad +// 2.897118 +0x41cb +// 11.198146 +0x4999 +// 2.095061 +0x4031 +// 5.949994 +0x45f3 +// 502.264324 +0x5fd9 +// 9.785310 +0x48e5 +// 24.679848 +0x4e2c +// 9.141598 +0x4892 +// 4.842086 +0x44d8 +// 2.305929 +0x409d +// 2.553810 +0x411c +// 6.529844 +0x4688 +// 12.616308 +0x4a4f +// 10.160835 +0x4915 +// 4.741947 +0x44be +// 3.062033 +0x4220 +// 15.072163 +0x4b89 +// 19.437242 +0x4cdc +// 304.465872 +0x5cc2 +// 3.697883 +0x4365 +// 1.200278 +0x3ccd +// 12.039526 +0x4a05 +// 6.285477 +0x4649 +// 41.940922 +0x513e +// 14.367260 +0x4b2f +// 2.844286 +0x41b0 +// 2.434156 +0x40de +// 18.803330 +0x4cb3 +// 3.590405 +0x432e +// 45.916673 +0x51bd +// 16.744938 +0x4c30 +// 9.513594 +0x48c2 +// 266.925697 +0x5c2c +// 141.996772 +0x5870 +// 1.273840 +0x3d18 +// 4.064921 +0x4411 +// 3.059166 +0x421e +// 3.063974 +0x4221 +// 12.270595 +0x4a23 +// 17.040917 +0x4c43 +// 1.259633 +0x3d0a +// 2.821234 +0x41a4 +// 6.853772 +0x46db +// 7.454620 +0x4774 +// 1.609663 +0x3e70 +// 11.592823 +0x49cc +// 8.194720 +0x4819 +// 2.951149 +0x41e7 +// 2.312031 +0x40a0 +// 9.662832 +0x48d5 +// 1.513853 +0x3e0e +// 93.930231 +0x55df +// 91.754898 +0x55bc +// 2.936342 +0x41df +// 36.983413 +0x509f +// 1.683027 +0x3ebb +// 3.835847 +0x43ac +// 4.018890 +0x4405 +// 8.686859 +0x4858 +// 3.832640 +0x43aa +// 5.651256 +0x45a7 +// 167.057056 +0x5938 +// 10.621388 +0x4950 +// 2.039201 +0x4014 +// 3.169404 +0x4257 +// 121.699150 +0x579b +// 1.962375 +0x3fd9 +// 3.588653 +0x432d +// 6.551802 +0x468d +// 2.273146 +0x408c +// 6.206085 +0x4635 +// 1.260120 +0x3d0a +// 1.328843 +0x3d51 +// 7.193014 +0x4731 +// 5.073456 +0x4513 +// 4.099381 +0x4419 +// 14.640782 +0x4b52 +// 8.470057 +0x483c +// 35.841993 +0x507b +// 25.902541 +0x4e7a +// 8.278306 +0x4824 +// 6.579874 +0x4694 +// 3.183432 +0x425e +// 3.157985 +0x4251 +// 30.987647 +0x4fbf +// 4.960553 +0x44f6 +// 28.319462 +0x4f14 +// 3.360753 +0x42b9 +// 13.357196 +0x4aae +// 7.617099 +0x479e +// 12.543129 +0x4a46 +// 3.206430 +0x426a +// 24.728589 +0x4e2f +// 2.883364 +0x41c4 +// 4.739018 +0x44bd +// 11.599710 +0x49cd +// 9.538527 +0x48c5 +// 16.661500 +0x4c2a +// 2.736285 +0x4179 +// 6.631133 +0x46a2 +// 11.402722 +0x49b4 +// 1.858302 +0x3f6f +// 11.107479 +0x498e +// 2.144436 +0x404a +// 3.471115 +0x42f1 +// 7.195419 +0x4732 +// 2.250048 +0x4080 +// 5.355021 +0x455b +// 2.175851 +0x405a +// 6.975780 +0x46fa +// 10.830583 +0x496a +// 4.039975 +0x440a +// 19.297319 +0x4cd3 +// 14.568924 +0x4b49 +// 1.760573 +0x3f0b +// 4.344142 +0x4458 +// 1.911989 +0x3fa6 +// 125.811431 +0x57dd +// 6.147110 +0x4626 +// 3.323063 +0x42a5 +// 141.465322 +0x586c +// 2.428645 +0x40db +// 5.057885 +0x450f +// 17.465511 +0x4c5e +// 5.487886 +0x457d +// 3.693320 +0x4363 +// 5.722713 +0x45b9 +// 2.815819 +0x41a2 +// 17.741187 +0x4c6f +// 156.719386 +0x58e6 +// 9.622230 +0x48d0 +// 3.582729 +0x432a +// 7.122762 +0x471f +// 3.801118 +0x439a +// 21.059681 +0x4d44 +// 7.002552 +0x4701 +// 1.681254 +0x3eba +// 26.237564 +0x4e8f +// 2.196333 +0x4065 +// 3.057311 +0x421d +// 43.974325 +0x517f +// 4.871455 +0x44df +// 58.682025 +0x5356 +// 1.935719 +0x3fbe +// 50.178529 +0x5246 +// 14.081570 +0x4b0a +// 2.276558 +0x408e +// 15.295183 +0x4ba6 +// 3.718388 +0x4370 +// 3.839651 +0x43ae +// 3.527233 +0x430e +// 2.428714 +0x40dc +// 11.062054 +0x4988 +// 5.778279 +0x45c7 +// 2.901877 +0x41ce +// 4.614082 +0x449d +// 2.846533 +0x41b1 +// 8.471893 +0x483c +// 5.528337 +0x4587 +// 4.069745 +0x4412 +// 4.311615 +0x4450 +// 2.410911 +0x40d2 +// 4.794024 +0x44cb +// 1.952501 +0x3fcf +// 2.350841 +0x40b4 +// 2.240902 +0x407b +// 5.954644 +0x45f4 +// 6.121318 +0x461f +// 6.649823 +0x46a6 +// 16.968851 +0x4c3e +// 1.000000 +0x3c00 +// 5.024657 +0x4506 +// 5.569085 +0x4592 +// 8.212669 +0x481b +// 2.487941 +0x40fa +// 1.966557 +0x3fde +// 3.370575 +0x42be +// 29.614104 +0x4f67 +// 3.196824 +0x4265 +// 1.470828 +0x3de2 +// 8.924856 +0x4876 +// 3.005112 +0x4203 +// 3.024847 +0x420d +// 4.340886 +0x4457 +// 1.894158 +0x3f94 +// 3.562064 +0x4320 +// 3.233189 +0x4277 +// 1.628957 +0x3e84 +// 2.791436 +0x4195 +// 8.235823 +0x481e +// 8.808764 +0x4868 +// 36.005208 +0x5080 +// 12.766707 +0x4a62 +// 5.964918 +0x45f7 +// 1.671910 +0x3eb0 +// 2.699011 +0x4166 +// 16.411460 +0x4c1a diff --git a/Testing/Patterns/DSP/FastMath/FastMathF16/Log1_f16.txt b/Testing/Patterns/DSP/FastMath/FastMathF16/Log1_f16.txt new file mode 100755 index 00000000..d7ded174 --- /dev/null +++ b/Testing/Patterns/DSP/FastMath/FastMathF16/Log1_f16.txt @@ -0,0 +1,52 @@ +H +25 +// -2.302585 +0xc09b +// -1.203973 +0xbcd1 +// -0.693147 +0xb98c +// 0.000000 +0x0 +// 0.693147 +0x398c +// -2.516839 +0xc109 +// 0.000000 +0x0 +// -5.908962 +0xc5e9 +// -1.345933 +0xbd62 +// -0.923815 +0xbb64 +// -4.046497 +0xc40c +// -2.530234 +0xc10f +// -0.724334 +0xb9cb +// -1.436949 +0xbdbf +// -1.327187 +0xbd4f +// -1.741553 +0xbef7 +// -0.066722 +0xac45 +// -0.616041 +0xb8ee +// -0.822195 +0xba94 +// -1.579204 +0xbe51 +// -1.333689 +0xbd56 +// -0.860545 +0xbae2 +// -1.080309 +0xbc52 +// -1.977120 +0xbfe9 +// -1.877663 +0xbf83 diff --git a/Testing/Patterns/DSP/FastMath/FastMathF16/LogInput1_f16.txt b/Testing/Patterns/DSP/FastMath/FastMathF16/LogInput1_f16.txt new file mode 100755 index 00000000..3fde3aab --- /dev/null +++ b/Testing/Patterns/DSP/FastMath/FastMathF16/LogInput1_f16.txt @@ -0,0 +1,52 @@ +H +25 +// 0.100000 +0x2e66 +// 0.300000 +0x34cd +// 0.500000 +0x3800 +// 1.000000 +0x3c00 +// 2.000000 +0x4000 +// 0.080714 +0x2d2a +// 1.000000 +0x3c00 +// 0.002715 +0x198f +// 0.260297 +0x342a +// 0.397001 +0x365a +// 0.017484 +0x247a +// 0.079640 +0x2d19 +// 0.484647 +0x37c1 +// 0.237652 +0x339b +// 0.265222 +0x343e +// 0.175248 +0x319c +// 0.935456 +0x3b7c +// 0.540078 +0x3852 +// 0.439466 +0x3708 +// 0.206139 +0x3299 +// 0.263503 +0x3437 +// 0.422932 +0x36c4 +// 0.339491 +0x356f +// 0.138467 +0x306e +// 0.152947 +0x30e5 diff --git a/Testing/Patterns/DSP/FastMath/FastMathF16/Samples1_f16.txt b/Testing/Patterns/DSP/FastMath/FastMathF16/Samples1_f16.txt new file mode 100755 index 00000000..295c0a4d --- /dev/null +++ b/Testing/Patterns/DSP/FastMath/FastMathF16/Samples1_f16.txt @@ -0,0 +1,514 @@ +H +256 +// 0.075285 +0x2cd1 +// 0.013370 +0x22d8 +// 0.220545 +0x330f +// 0.267001 +0x3446 +// 0.409968 +0x368f +// 0.161784 +0x312d +// 0.249706 +0x33fe +// 0.088624 +0x2dac +// 0.541200 +0x3854 +// 0.180200 +0x31c4 +// 0.650979 +0x3935 +// 0.033328 +0x2844 +// 0.031771 +0x2811 +// 0.057002 +0x2b4c +// 0.413349 +0x369d +// 0.229898 +0x335b +// 0.206562 +0x329c +// 0.123441 +0x2fe6 +// 0.128610 +0x301e +// 0.091236 +0x2dd7 +// 0.516631 +0x3822 +// 0.067355 +0x2c50 +// 0.393951 +0x364e +// 0.429521 +0x36df +// 0.841992 +0x3abc +// 0.105719 +0x2ec4 +// 0.153100 +0x30e6 +// 0.035572 +0x288e +// 0.360258 +0x35c4 +// 0.620837 +0x38f7 +// 0.232809 +0x3373 +// 0.117602 +0x2f87 +// 0.155475 +0x30fa +// 0.608119 +0x38dd +// 0.199114 +0x325f +// 0.021425 +0x257c +// 0.126204 +0x300a +// 0.503326 +0x3807 +// 0.081538 +0x2d38 +// 0.195027 +0x323e +// 0.283564 +0x3489 +// 0.322678 +0x352a +// 0.088639 +0x2dac +// 0.175722 +0x31a0 +// 0.038783 +0x28f7 +// 0.032079 +0x281b +// 0.184972 +0x31eb +// 0.996148 +0x3bf8 +// 0.404061 +0x3677 +// 0.190127 +0x3216 +// 0.146331 +0x30af +// 0.465914 +0x3774 +// 0.245390 +0x33da +// 0.148020 +0x30bd +// 0.303123 +0x34da +// 0.311373 +0x34fb +// 0.006474 +0x1ea1 +// 0.405257 +0x367c +// 0.074887 +0x2ccb +// 0.345171 +0x3586 +// 0.089300 +0x2db7 +// 0.477313 +0x37a3 +// 0.168067 +0x3161 +// 0.001991 +0x1814 +// 0.102194 +0x2e8a +// 0.040519 +0x2930 +// 0.109390 +0x2f00 +// 0.206523 +0x329c +// 0.433665 +0x36f0 +// 0.391572 +0x3644 +// 0.153143 +0x30e7 +// 0.079262 +0x2d13 +// 0.098417 +0x2e4c +// 0.210884 +0x32c0 +// 0.326580 +0x353a +// 0.066347 +0x2c3f +// 0.051448 +0x2a96 +// 0.003284 +0x1aba +// 0.270425 +0x3454 +// 0.833140 +0x3aaa +// 0.083060 +0x2d51 +// 0.159097 +0x3117 +// 0.023843 +0x261b +// 0.069603 +0x2c74 +// 0.351582 +0x35a0 +// 0.410820 +0x3693 +// 0.053182 +0x2acf +// 0.278520 +0x3475 +// 0.021779 +0x2593 +// 0.059720 +0x2ba5 +// 0.105113 +0x2eba +// 0.003746 +0x1bac +// 0.007042 +0x1f36 +// 0.785028 +0x3a48 +// 0.246007 +0x33df +// 0.326886 +0x353b +// 0.326374 +0x3539 +// 0.081496 +0x2d37 +// 0.058682 +0x2b83 +// 0.793882 +0x3a5a +// 0.354455 +0x35ac +// 0.145905 +0x30ab +// 0.134145 +0x304b +// 0.621248 +0x38f8 +// 0.086260 +0x2d85 +// 0.122030 +0x2fcf +// 0.338851 +0x356c +// 0.432520 +0x36ec +// 0.103489 +0x2ea0 +// 0.660566 +0x3949 +// 0.010646 +0x2173 +// 0.010899 +0x2195 +// 0.340560 +0x3573 +// 0.027039 +0x26ec +// 0.594168 +0x38c1 +// 0.260699 +0x342c +// 0.248825 +0x33f6 +// 0.115116 +0x2f5e +// 0.260917 +0x342d +// 0.176952 +0x31aa +// 0.005986 +0x1e21 +// 0.094150 +0x2e07 +// 0.490388 +0x37d9 +// 0.315517 +0x350c +// 0.008217 +0x2035 +// 0.509586 +0x3814 +// 0.278656 +0x3475 +// 0.152630 +0x30e2 +// 0.439919 +0x370a +// 0.161132 +0x3128 +// 0.793575 +0x3a59 +// 0.752534 +0x3a05 +// 0.139024 +0x3073 +// 0.197104 +0x324f +// 0.243939 +0x33ce +// 0.068302 +0x2c5f +// 0.118063 +0x2f8e +// 0.027900 +0x2724 +// 0.038606 +0x28f1 +// 0.120798 +0x2fbb +// 0.151979 +0x30dd +// 0.314126 +0x3507 +// 0.316658 +0x3511 +// 0.032271 +0x2821 +// 0.201590 +0x3273 +// 0.035311 +0x2885 +// 0.297552 +0x34c3 +// 0.074866 +0x2ccb +// 0.131284 +0x3033 +// 0.079725 +0x2d1a +// 0.311873 +0x34fd +// 0.040439 +0x292d +// 0.346817 +0x358d +// 0.211014 +0x32c1 +// 0.086209 +0x2d84 +// 0.104838 +0x2eb6 +// 0.060019 +0x2baf +// 0.365459 +0x35d9 +// 0.150804 +0x30d3 +// 0.087698 +0x2d9d +// 0.538126 +0x384e +// 0.090029 +0x2dc3 +// 0.466323 +0x3776 +// 0.288092 +0x349c +// 0.138977 +0x3073 +// 0.444435 +0x371c +// 0.186741 +0x31fa +// 0.459590 +0x375a +// 0.143353 +0x3096 +// 0.092331 +0x2de9 +// 0.247526 +0x33ec +// 0.051821 +0x2aa2 +// 0.068639 +0x2c65 +// 0.567997 +0x388b +// 0.230195 +0x335e +// 0.523016 +0x382f +// 0.007948 +0x2012 +// 0.162678 +0x3135 +// 0.300927 +0x34d1 +// 0.007069 +0x1f3d +// 0.411752 +0x3697 +// 0.197711 +0x3254 +// 0.057256 +0x2b54 +// 0.182220 +0x31d5 +// 0.270759 +0x3455 +// 0.174742 +0x3197 +// 0.355136 +0x35af +// 0.056366 +0x2b37 +// 0.006381 +0x1e89 +// 0.103926 +0x2ea7 +// 0.279117 +0x3477 +// 0.140395 +0x307e +// 0.263080 +0x3436 +// 0.047484 +0x2a14 +// 0.142805 +0x3092 +// 0.594794 +0x38c2 +// 0.038113 +0x28e1 +// 0.455304 +0x3749 +// 0.327085 +0x353c +// 0.022741 +0x25d2 +// 0.205277 +0x3292 +// 0.017041 +0x245d +// 0.516604 +0x3822 +// 0.019929 +0x251a +// 0.071015 +0x2c8c +// 0.439260 +0x3707 +// 0.065380 +0x2c2f +// 0.268934 +0x344e +// 0.260440 +0x342b +// 0.283508 +0x3489 +// 0.411741 +0x3696 +// 0.090399 +0x2dc9 +// 0.173062 +0x318a +// 0.344605 +0x3584 +// 0.216728 +0x32ef +// 0.351305 +0x359f +// 0.118037 +0x2f8e +// 0.180886 +0x31ca +// 0.245716 +0x33dd +// 0.231932 +0x336c +// 0.414781 +0x36a3 +// 0.208593 +0x32ad +// 0.512164 +0x3819 +// 0.425380 +0x36ce +// 0.446249 +0x3724 +// 0.167936 +0x3160 +// 0.163364 +0x313a +// 0.150380 +0x30d0 +// 0.058932 +0x2b8b +// 1.000000 +0x3c00 +// 0.199019 +0x325e +// 0.179563 +0x31bf +// 0.121763 +0x2fcb +// 0.401939 +0x366e +// 0.508503 +0x3811 +// 0.296685 +0x34bf +// 0.033768 +0x2852 +// 0.312811 +0x3501 +// 0.679889 +0x3970 +// 0.112047 +0x2f2c +// 0.332766 +0x3553 +// 0.330595 +0x354a +// 0.230368 +0x335f +// 0.527939 +0x3839 +// 0.280736 +0x347e +// 0.309292 +0x34f3 +// 0.613890 +0x38e9 +// 0.358239 +0x35bb +// 0.121421 +0x2fc5 +// 0.113523 +0x2f44 +// 0.027774 +0x271c +// 0.078329 +0x2d03 +// 0.167647 +0x315d +// 0.598118 +0x38c9 +// 0.370506 +0x35ee +// 0.060933 +0x2bcd diff --git a/Testing/Patterns/DSP/FastMath/FastMathF16/Sin1_f16.txt b/Testing/Patterns/DSP/FastMath/FastMathF16/Sin1_f16.txt new file mode 100755 index 00000000..5cf96240 --- /dev/null +++ b/Testing/Patterns/DSP/FastMath/FastMathF16/Sin1_f16.txt @@ -0,0 +1,48 @@ +H +23 +// 0.000000 +0x0 +// 0.707107 +0x39a8 +// 1.000000 +0x3c00 +// 0.707107 +0x39a8 +// 0.000000 +0x0 +// -0.707107 +0xb9a8 +// -1.000000 +0xbc00 +// -0.000001 +0x8011 +// -0.707107 +0xb9a8 +// -1.000000 +0xbc00 +// -0.707107 +0xb9a8 +// -0.000000 +0x8000 +// 0.707107 +0x39a8 +// 1.000000 +0x3c00 +// -0.000001 +0x8011 +// -0.000000 +0x8000 +// 0.707107 +0x39a8 +// 1.000000 +0x3c00 +// 0.707107 +0x39a8 +// 0.000000 +0x0 +// -0.707107 +0xb9a8 +// -1.000000 +0xbc00 +// -0.000001 +0x8011 diff --git a/Testing/Patterns/DSP/FastMath/FastMathF16/Sqrt1_f16.txt b/Testing/Patterns/DSP/FastMath/FastMathF16/Sqrt1_f16.txt new file mode 100755 index 00000000..6e525066 --- /dev/null +++ b/Testing/Patterns/DSP/FastMath/FastMathF16/Sqrt1_f16.txt @@ -0,0 +1,18 @@ +H +8 +// 0.000000 +0x0 +// 0.000000 +0x0 +// 0.316228 +0x350f +// 1.000000 +0x3c00 +// 1.414214 +0x3da8 +// 1.732051 +0x3eee +// 1.870829 +0x3f7c +// 1.897367 +0x3f97 diff --git a/Testing/Patterns/DSP/FastMath/FastMathF16/SqrtInput1_f16.txt b/Testing/Patterns/DSP/FastMath/FastMathF16/SqrtInput1_f16.txt new file mode 100755 index 00000000..9226084b --- /dev/null +++ b/Testing/Patterns/DSP/FastMath/FastMathF16/SqrtInput1_f16.txt @@ -0,0 +1,18 @@ +H +8 +// -0.400000 +0xb666 +// 0.000000 +0x0 +// 0.100000 +0x2e66 +// 1.000000 +0x3c00 +// 2.000000 +0x4000 +// 3.000000 +0x4200 +// 3.500000 +0x4300 +// 3.600000 +0x4333 diff --git a/Testing/Patterns/DSP/Stats/StatsF16/Dims22_s16.txt b/Testing/Patterns/DSP/Stats/StatsF16/Dims22_s16.txt new file mode 100755 index 00000000..b8c911f7 --- /dev/null +++ b/Testing/Patterns/DSP/Stats/StatsF16/Dims22_s16.txt @@ -0,0 +1,24 @@ +H +11 +// 10 +0x000A +// 3 +0x0003 +// 8 +0x0008 +// 9 +0x0009 +// 12 +0x000C +// 3 +0x0003 +// 8 +0x0008 +// 9 +0x0009 +// 12 +0x000C +// 3 +0x0003 +// 8 +0x0008 diff --git a/Testing/Patterns/DSP/Stats/StatsF16/Dims23_s16.txt b/Testing/Patterns/DSP/Stats/StatsF16/Dims23_s16.txt new file mode 100755 index 00000000..b8c911f7 --- /dev/null +++ b/Testing/Patterns/DSP/Stats/StatsF16/Dims23_s16.txt @@ -0,0 +1,24 @@ +H +11 +// 10 +0x000A +// 3 +0x0003 +// 8 +0x0008 +// 9 +0x0009 +// 12 +0x000C +// 3 +0x0003 +// 8 +0x0008 +// 9 +0x0009 +// 12 +0x000C +// 3 +0x0003 +// 8 +0x0008 diff --git a/Testing/Patterns/DSP/Stats/StatsF16/Dims24_s16.txt b/Testing/Patterns/DSP/Stats/StatsF16/Dims24_s16.txt new file mode 100755 index 00000000..b8c911f7 --- /dev/null +++ b/Testing/Patterns/DSP/Stats/StatsF16/Dims24_s16.txt @@ -0,0 +1,24 @@ +H +11 +// 10 +0x000A +// 3 +0x0003 +// 8 +0x0008 +// 9 +0x0009 +// 12 +0x000C +// 3 +0x0003 +// 8 +0x0008 +// 9 +0x0009 +// 12 +0x000C +// 3 +0x0003 +// 8 +0x0008 diff --git a/Testing/Patterns/DSP/Stats/StatsF16/Dims25_s16.txt b/Testing/Patterns/DSP/Stats/StatsF16/Dims25_s16.txt new file mode 100755 index 00000000..b8c911f7 --- /dev/null +++ b/Testing/Patterns/DSP/Stats/StatsF16/Dims25_s16.txt @@ -0,0 +1,24 @@ +H +11 +// 10 +0x000A +// 3 +0x0003 +// 8 +0x0008 +// 9 +0x0009 +// 12 +0x000C +// 3 +0x0003 +// 8 +0x0008 +// 9 +0x0009 +// 12 +0x000C +// 3 +0x0003 +// 8 +0x0008 diff --git a/Testing/Patterns/DSP/Stats/StatsF16/Input22_f16.txt b/Testing/Patterns/DSP/Stats/StatsF16/Input22_f16.txt new file mode 100755 index 00000000..7313f06d --- /dev/null +++ b/Testing/Patterns/DSP/Stats/StatsF16/Input22_f16.txt @@ -0,0 +1,152 @@ +H +75 +// 0.322836 +0x352a +// 0.198525 +0x325a +// 0.478639 +0x37a9 +// 0.026143 +0x26b1 +// 0.098991 +0x2e56 +// 0.143166 +0x3095 +// 0.085541 +0x2d7a +// 0.068541 +0x2c63 +// 0.211153 +0x32c2 +// 0.181133 +0x31cc +// 0.185330 +0x31ee +// 0.089042 +0x2db3 +// 0.147745 +0x30ba +// 0.095482 +0x2e1c +// 0.040729 +0x2937 +// 0.148711 +0x30c2 +// 0.132218 +0x303b +// 0.141558 +0x3088 +// 0.075285 +0x2cd1 +// 0.129230 +0x3023 +// 0.111640 +0x2f25 +// 0.120751 +0x2fba +// 0.013239 +0x22c7 +// 0.060065 +0x2bb0 +// 0.127400 +0x3014 +// 0.094640 +0x2e0f +// 0.106302 +0x2ece +// 0.047992 +0x2a25 +// 0.051468 +0x2a97 +// 0.125120 +0x3001 +// 0.120732 +0x2fba +// 0.020651 +0x2549 +// 0.144559 +0x30a0 +// 0.511255 +0x3817 +// 0.344187 +0x3582 +// 0.115065 +0x2f5d +// 0.068302 +0x2c5f +// 0.205347 +0x3292 +// 0.160452 +0x3122 +// 0.065289 +0x2c2e +// 0.127455 +0x3014 +// 0.248741 +0x33f6 +// 0.009349 +0x20c9 +// 0.100586 +0x2e70 +// 0.102405 +0x2e8e +// 0.188184 +0x3206 +// 0.192786 +0x322b +// 0.160000 +0x311f +// 0.027773 +0x271c +// 0.112430 +0x2f32 +// 0.001022 +0x142f +// 0.114814 +0x2f59 +// 0.056143 +0x2b30 +// 0.122610 +0x2fd9 +// 0.165753 +0x314e +// 0.135443 +0x3056 +// 0.019103 +0x24e4 +// 0.028083 +0x2730 +// 0.013344 +0x22d5 +// 0.165378 +0x314b +// 0.075133 +0x2ccf +// 0.056506 +0x2b3c +// 0.005057 +0x1d2e +// 0.157446 +0x310a +// 0.426391 +0x36d2 +// 0.165223 +0x314a +// 0.408385 +0x3689 +// 0.017004 +0x245a +// 0.296335 +0x34be +// 0.005781 +0x1deb +// 0.076763 +0x2cea +// 0.223549 +0x3327 +// 0.055634 +0x2b1f +// 0.222075 +0x331b +// 0.102857 +0x2e95 diff --git a/Testing/Patterns/DSP/Stats/StatsF16/Input23_f16.txt b/Testing/Patterns/DSP/Stats/StatsF16/Input23_f16.txt new file mode 100755 index 00000000..49142501 --- /dev/null +++ b/Testing/Patterns/DSP/Stats/StatsF16/Input23_f16.txt @@ -0,0 +1,152 @@ +H +75 +// 0.475515 +0x379c +// 0.270052 +0x3452 +// 0.254433 +0x3412 +// 0.055055 +0x2b0c +// 0.202411 +0x327a +// 0.124266 +0x2ff4 +// 0.046322 +0x29ee +// 0.178920 +0x31ba +// 0.078939 +0x2d0d +// 0.111714 +0x2f26 +// 0.202373 +0x327a +// 0.187127 +0x31fd +// 0.154401 +0x30f1 +// 0.054258 +0x2af2 +// 0.123123 +0x2fe1 +// 0.099707 +0x2e62 +// 0.011955 +0x221f +// 0.166787 +0x3156 +// 0.088390 +0x2da8 +// 0.114251 +0x2f50 +// 0.070847 +0x2c89 +// 0.118218 +0x2f91 +// 0.057549 +0x2b5e +// 0.143263 +0x3096 +// 0.031234 +0x27ff +// 0.169478 +0x316c +// 0.045658 +0x29d8 +// 0.170969 +0x3179 +// 0.011664 +0x21f9 +// 0.043150 +0x2986 +// 0.056999 +0x2b4c +// 0.080973 +0x2d2f +// 0.301702 +0x34d4 +// 0.375561 +0x3602 +// 0.322737 +0x352a +// 0.069801 +0x2c78 +// 0.234393 +0x3380 +// 0.188541 +0x3209 +// 0.233770 +0x337b +// 0.089282 +0x2db7 +// 0.019572 +0x2503 +// 0.075178 +0x2cd0 +// 0.089462 +0x2dba +// 0.116452 +0x2f74 +// 0.111320 +0x2f20 +// 0.215334 +0x32e4 +// 0.119770 +0x2faa +// 0.058668 +0x2b82 +// 0.204849 +0x328e +// 0.009485 +0x20db +// 0.158639 +0x3114 +// 0.005484 +0x1d9e +// 0.137894 +0x306a +// 0.154182 +0x30ef +// 0.008037 +0x201d +// 0.040326 +0x2929 +// 0.038281 +0x28e6 +// 0.082787 +0x2d4c +// 0.046186 +0x29e9 +// 0.118893 +0x2f9c +// 0.034195 +0x2860 +// 0.090605 +0x2dcc +// 0.156341 +0x3101 +// 0.092273 +0x2de8 +// 0.578286 +0x38a0 +// 0.049254 +0x2a4e +// 0.372461 +0x35f6 +// 0.021948 +0x259e +// 0.059545 +0x2b9f +// 0.138834 +0x3071 +// 0.220685 +0x3310 +// 0.062460 +0x2bff +// 0.201480 +0x3273 +// 0.087765 +0x2d9e +// 0.207282 +0x32a2 diff --git a/Testing/Patterns/DSP/Stats/StatsF16/InputA24_f16.txt b/Testing/Patterns/DSP/Stats/StatsF16/InputA24_f16.txt new file mode 100755 index 00000000..bd93a482 --- /dev/null +++ b/Testing/Patterns/DSP/Stats/StatsF16/InputA24_f16.txt @@ -0,0 +1,152 @@ +H +75 +// 0.066000 +0x2c39 +// 0.029316 +0x2781 +// 0.904684 +0x3b3d +// 0.055167 +0x2b10 +// 0.210490 +0x32bc +// 0.148163 +0x30be +// 0.189833 +0x3213 +// 0.038896 +0x28fb +// 0.138513 +0x306f +// 0.022802 +0x25d6 +// 0.196137 +0x3247 +// 0.076794 +0x2cea +// 0.010108 +0x212d +// 0.220958 +0x3312 +// 0.129726 +0x3027 +// 0.099061 +0x2e57 +// 0.136545 +0x305f +// 0.232233 +0x336e +// 0.014392 +0x235e +// 0.080182 +0x2d22 +// 0.004332 +0x1c70 +// 0.083574 +0x2d59 +// 0.080585 +0x2d28 +// 0.117765 +0x2f89 +// 0.162459 +0x3133 +// 0.019641 +0x2507 +// 0.075399 +0x2cd3 +// 0.144758 +0x30a2 +// 0.064852 +0x2c27 +// 0.130529 +0x302d +// 0.026411 +0x26c3 +// 0.089694 +0x2dbe +// 0.345746 +0x3588 +// 0.286879 +0x3497 +// 0.367375 +0x35e1 +// 0.099391 +0x2e5c +// 0.081898 +0x2d3e +// 0.075065 +0x2cce +// 0.179149 +0x31bc +// 0.135742 +0x3058 +// 0.071606 +0x2c95 +// 0.174743 +0x3197 +// 0.182405 +0x31d6 +// 0.067165 +0x2c4c +// 0.071465 +0x2c93 +// 0.128327 +0x301b +// 0.219746 +0x3308 +// 0.104638 +0x2eb2 +// 0.145188 +0x30a5 +// 0.010493 +0x215f +// 0.166402 +0x3153 +// 0.086577 +0x2d8a +// 0.134145 +0x304b +// 0.007296 +0x1f79 +// 0.070782 +0x2c88 +// 0.104541 +0x2eb1 +// 0.119510 +0x2fa6 +// 0.109169 +0x2efd +// 0.070861 +0x2c89 +// 0.052457 +0x2ab7 +// 0.013363 +0x22d7 +// 0.067024 +0x2c4a +// 0.142142 +0x308c +// 0.108710 +0x2ef5 +// 0.182145 +0x31d4 +// 0.359778 +0x35c2 +// 0.458077 +0x3754 +// 0.166456 +0x3154 +// 0.192489 +0x3229 +// 0.161820 +0x312e +// 0.183137 +0x31dc +// 0.038873 +0x28fa +// 0.103665 +0x2ea2 +// 0.103497 +0x2ea0 +// 0.050063 +0x2a68 diff --git a/Testing/Patterns/DSP/Stats/StatsF16/InputA25_f16.txt b/Testing/Patterns/DSP/Stats/StatsF16/InputA25_f16.txt new file mode 100755 index 00000000..e78320d7 --- /dev/null +++ b/Testing/Patterns/DSP/Stats/StatsF16/InputA25_f16.txt @@ -0,0 +1,152 @@ +H +75 +// -0.959212 +0xbbac +// -0.670324 +0xb95d +// -2.232477 +0xc077 +// -1.387737 +0xbd8d +// -4.329000 +0xc454 +// -1.806354 +0xbf3a +// -2.749885 +0xc180 +// -1.850150 +0xbf67 +// -1.610971 +0xbe72 +// -3.493034 +0xc2fc +// -2.050449 +0xc01a +// -2.407527 +0xc0d1 +// -2.544094 +0xc117 +// -2.448739 +0xc0e6 +// -1.799126 +0xbf32 +// -1.731562 +0xbeed +// -1.709799 +0xbed7 +// -4.335314 +0xc456 +// -2.254958 +0xc083 +// -2.192057 +0xc062 +// -1.938937 +0xbfc1 +// -2.430444 +0xc0dc +// -4.417635 +0xc46b +// -2.415578 +0xc0d5 +// -2.538128 +0xc114 +// -3.305833 +0xc29d +// -3.357861 +0xc2b7 +// -2.316667 +0xc0a2 +// -2.270840 +0xc08b +// -2.387285 +0xc0c6 +// -2.154695 +0xc04f +// -2.140212 +0xc048 +// -1.657703 +0xbea1 +// -0.645126 +0xb929 +// -1.248875 +0xbcff +// -1.603551 +0xbe6a +// -2.633577 +0xc144 +// -1.835578 +0xbf58 +// -2.357818 +0xc0b7 +// -3.324743 +0xc2a6 +// -1.648598 +0xbe98 +// -2.037842 +0xc013 +// -2.109987 +0xc038 +// -1.195792 +0xbcc8 +// -2.659356 +0xc152 +// -1.980175 +0xbfec +// -2.161604 +0xc053 +// -1.794321 +0xbf2d +// -2.246154 +0xc07e +// -2.408418 +0xc0d1 +// -3.953591 +0xc3e8 +// -6.752190 +0xc6c1 +// -2.878867 +0xc1c2 +// -2.157113 +0xc050 +// -2.660457 +0xc152 +// -2.162948 +0xc053 +// -3.034069 +0xc211 +// -2.095012 +0xc031 +// -4.019581 +0xc405 +// -1.844349 +0xbf61 +// -2.773310 +0xc18c +// -2.257637 +0xc084 +// -4.031211 +0xc408 +// -2.102431 +0xc034 +// -1.073346 +0xbc4b +// -0.878225 +0xbb07 +// -1.408060 +0xbda2 +// -2.406489 +0xc0d0 +// -1.872983 +0xbf7e +// -2.021809 +0xc00b +// -1.781448 +0xbf20 +// -1.769868 +0xbf14 +// -1.823862 +0xbf4c +// -2.119634 +0xc03d +// -4.550491 +0xc48d diff --git a/Testing/Patterns/DSP/Stats/StatsF16/InputB24_f16.txt b/Testing/Patterns/DSP/Stats/StatsF16/InputB24_f16.txt new file mode 100755 index 00000000..2eb02f80 --- /dev/null +++ b/Testing/Patterns/DSP/Stats/StatsF16/InputB24_f16.txt @@ -0,0 +1,152 @@ +H +75 +// 0.320653 +0x3521 +// 0.199694 +0x3264 +// 0.479652 +0x37ad +// 0.102236 +0x2e8b +// 0.105106 +0x2eba +// 0.201341 +0x3271 +// 0.132813 +0x3040 +// 0.088571 +0x2dab +// 0.058607 +0x2b80 +// 0.249883 +0x33ff +// 0.061443 +0x2bdd +// 0.071180 +0x2c8e +// 0.107576 +0x2ee3 +// 0.057274 +0x2b55 +// 0.105606 +0x2ec2 +// 0.152027 +0x30dd +// 0.103325 +0x2e9d +// 0.049270 +0x2a4e +// 0.220453 +0x330e +// 0.133290 +0x3044 +// 0.105547 +0x2ec1 +// 0.084060 +0x2d61 +// 0.137416 +0x3066 +// 0.030274 +0x27c0 +// 0.048675 +0x2a3b +// 0.127130 +0x3011 +// 0.046091 +0x29e6 +// 0.066165 +0x2c3c +// 0.067586 +0x2c53 +// 0.097630 +0x2e40 +// 0.072033 +0x2c9c +// 0.117393 +0x2f83 +// 0.453053 +0x3740 +// 0.081985 +0x2d3f +// 0.464962 +0x3770 +// 0.052770 +0x2ac1 +// 0.131843 +0x3038 +// 0.057721 +0x2b63 +// 0.152657 +0x30e3 +// 0.210732 +0x32be +// 0.038870 +0x28fa +// 0.265057 +0x343e +// 0.090349 +0x2dc8 +// 0.153684 +0x30eb +// 0.057884 +0x2b69 +// 0.057020 +0x2b4c +// 0.071140 +0x2c8e +// 0.261764 +0x3430 +// 0.015038 +0x23b3 +// 0.081591 +0x2d39 +// 0.262766 +0x3434 +// 0.039113 +0x2902 +// 0.156950 +0x3106 +// 0.095202 +0x2e18 +// 0.025249 +0x2677 +// 0.037328 +0x28c7 +// 0.083821 +0x2d5d +// 0.104429 +0x2eaf +// 0.087592 +0x2d9b +// 0.114188 +0x2f4f +// 0.056203 +0x2b32 +// 0.004401 +0x1c82 +// 0.095480 +0x2e1c +// 0.139157 +0x3074 +// 0.294734 +0x34b7 +// 0.268507 +0x344c +// 0.436759 +0x36fd +// 0.009448 +0x20d6 +// 0.081745 +0x2d3b +// 0.168340 +0x3163 +// 0.148550 +0x30c1 +// 0.136539 +0x305f +// 0.036541 +0x28ad +// 0.253112 +0x340d +// 0.165724 +0x314e diff --git a/Testing/Patterns/DSP/Stats/StatsF16/InputB25_f16.txt b/Testing/Patterns/DSP/Stats/StatsF16/InputB25_f16.txt new file mode 100755 index 00000000..2ff588a9 --- /dev/null +++ b/Testing/Patterns/DSP/Stats/StatsF16/InputB25_f16.txt @@ -0,0 +1,152 @@ +H +75 +// -0.550708 +0xb868 +// -1.086194 +0xbc58 +// -2.430874 +0xc0dd +// -2.189178 +0xc061 +// -1.915799 +0xbfaa +// -1.850925 +0xbf67 +// -1.791474 +0xbf2a +// -1.795864 +0xbf2f +// -1.805296 +0xbf39 +// -3.914507 +0xc3d4 +// -2.609097 +0xc138 +// -2.791088 +0xc195 +// -1.657411 +0xbea1 +// -3.818442 +0xc3a3 +// -2.085128 +0xc02c +// -1.898179 +0xbf98 +// -5.553102 +0xc58e +// -1.827261 +0xbf4f +// -2.342852 +0xc0b0 +// -1.613762 +0xbe74 +// -2.609171 +0xc138 +// -1.941609 +0xbfc4 +// -3.429489 +0xc2dc +// -3.528022 +0xc30e +// -2.070397 +0xc024 +// -1.926029 +0xbfb4 +// -3.317714 +0xc2a3 +// -2.238273 +0xc07a +// -1.737780 +0xbef3 +// -3.121640 +0xc23e +// -4.527533 +0xc487 +// -2.446193 +0xc0e4 +// -2.637576 +0xc146 +// -1.176571 +0xbcb5 +// -0.474604 +0xb798 +// -3.033151 +0xc211 +// -1.835852 +0xbf58 +// -1.691092 +0xbec4 +// -1.805705 +0xbf39 +// -2.648123 +0xc14c +// -2.160886 +0xc052 +// -2.461351 +0xc0ec +// -1.718385 +0xbee0 +// -3.930567 +0xc3dc +// -2.625716 +0xc140 +// -1.569582 +0xbe47 +// -1.492665 +0xbdf8 +// -2.078453 +0xc028 +// -2.391468 +0xc0c8 +// -3.195368 +0xc264 +// -2.475005 +0xc0f3 +// -1.956577 +0xbfd4 +// -2.960791 +0xc1ec +// -2.717495 +0xc16f +// -2.590550 +0xc12e +// -2.850811 +0xc1b4 +// -2.901796 +0xc1ce +// -2.290420 +0xc095 +// -2.218488 +0xc070 +// -2.282453 +0xc091 +// -2.319957 +0xc0a4 +// -2.432125 +0xc0dd +// -2.239578 +0xc07b +// -2.294428 +0xc097 +// -0.590827 +0xb8ba +// -1.218422 +0xbce0 +// -1.881018 +0xbf86 +// -1.877314 +0xbf82 +// -2.526120 +0xc10d +// -1.792010 +0xbf2b +// -1.845023 +0xbf61 +// -1.752139 +0xbf02 +// -2.295159 +0xc097 +// -3.509155 +0xc305 +// -1.928857 +0xbfb7 diff --git a/Testing/Patterns/DSP/Stats/StatsF16/RefEntropy22_f16.txt b/Testing/Patterns/DSP/Stats/StatsF16/RefEntropy22_f16.txt new file mode 100755 index 00000000..c6fbbd8d --- /dev/null +++ b/Testing/Patterns/DSP/Stats/StatsF16/RefEntropy22_f16.txt @@ -0,0 +1,22 @@ +H +10 +// 1.038650 +0x3c28 +// 1.946771 +0x3fc9 +// 2.139348 +0x4047 +// 2.343983 +0x40b0 +// 0.989680 +0x3beb +// 1.881268 +0x3f86 +// 1.990067 +0x3ff6 +// 2.193480 +0x4063 +// 1.026660 +0x3c1b +// 1.720281 +0x3ee2 diff --git a/Testing/Patterns/DSP/Stats/StatsF16/RefKL24_f16.txt b/Testing/Patterns/DSP/Stats/StatsF16/RefKL24_f16.txt new file mode 100755 index 00000000..7d2c5846 --- /dev/null +++ b/Testing/Patterns/DSP/Stats/StatsF16/RefKL24_f16.txt @@ -0,0 +1,22 @@ +H +10 +// 0.413470 +0x369e +// 0.394708 +0x3651 +// 0.582607 +0x38a9 +// 0.396809 +0x3659 +// 0.179323 +0x31bd +// 0.111712 +0x2f26 +// 0.515907 +0x3821 +// 0.325282 +0x3534 +// 0.039444 +0x290c +// 0.581119 +0x38a6 diff --git a/Testing/Patterns/DSP/Stats/StatsF16/RefLogSumExp23_f16.txt b/Testing/Patterns/DSP/Stats/StatsF16/RefLogSumExp23_f16.txt new file mode 100755 index 00000000..bcc850da --- /dev/null +++ b/Testing/Patterns/DSP/Stats/StatsF16/RefLogSumExp23_f16.txt @@ -0,0 +1,22 @@ +H +10 +// 1.437131 +0x3dc0 +// 2.206211 +0x406a +// 2.309694 +0x409f +// 2.569599 +0x4124 +// 1.432431 +0x3dbb +// 2.207405 +0x406a +// 2.310898 +0x409f +// 2.569412 +0x4124 +// 1.455066 +0x3dd2 +// 2.207080 +0x406a diff --git a/Testing/Patterns/DSP/Stats/StatsF16/RefLogSumExpDot25_f16.txt b/Testing/Patterns/DSP/Stats/StatsF16/RefLogSumExpDot25_f16.txt new file mode 100755 index 00000000..79a361e9 --- /dev/null +++ b/Testing/Patterns/DSP/Stats/StatsF16/RefLogSumExpDot25_f16.txt @@ -0,0 +1,22 @@ +H +10 +// -0.908800 +0xbb45 +// -1.999736 +0xc000 +// -2.257576 +0xc084 +// -2.442263 +0xc0e2 +// -1.038944 +0xbc28 +// -2.090152 +0xc02e +// -2.287087 +0xc093 +// -2.463247 +0xc0ed +// -1.051243 +0xbc34 +// -2.074013 +0xc026 diff --git a/Testing/Source/Tests/FastMathF16.cpp b/Testing/Source/Tests/FastMathF16.cpp new file mode 100755 index 00000000..ba91fc0f --- /dev/null +++ b/Testing/Source/Tests/FastMathF16.cpp @@ -0,0 +1,243 @@ +#include "FastMathF16.h" +#include +#include "Error.h" +#include "arm_vec_math_f16.h" +#include "Test.h" + + +#define SNR_THRESHOLD 60 +#define SNR_LOG_THRESHOLD 40 + +/* + +Reference patterns are generated with +a double precision computation. + +*/ +#define REL_ERROR (1.0e-3) +#define ABS_ERROR (1.0e-3) + +#define REL_LOG_ERROR (3.0e-2) +#define ABS_LOG_ERROR (3.0e-2) + +#if 0 + void FastMathF16::test_cos_f16() + { + const float16_t *inp = input.ptr(); + float16_t *outp = output.ptr(); + unsigned long i; + + for(i=0; i < ref.nbSamples(); i++) + { + outp[i]=arm_cos_f16(inp[i]); + } + + ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD); + ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR); + + } + + void FastMathF16::test_sin_f16() + { + const float16_t *inp = input.ptr(); + float16_t *outp = output.ptr(); + unsigned long i; + + for(i=0; i < ref.nbSamples(); i++) + { + outp[i]=arm_sin_f16(inp[i]); + } + + ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD); + ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR); + + } + +#endif + + void FastMathF16::test_sqrt_f16() + { + const float16_t *inp = input.ptr(); + float16_t *outp = output.ptr(); + arm_status status; + unsigned long i; + + for(i=0; i < ref.nbSamples(); i++) + { + status=arm_sqrt_f16(inp[i],&outp[i]); + ASSERT_TRUE((status == ARM_MATH_SUCCESS) || ((inp[i] < 0.0f) && (status == ARM_MATH_ARGUMENT_ERROR))); + } + + + ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD); + ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR); + + + } + + void FastMathF16::test_vlog_f16() + { + const float16_t *inp = input.ptr(); + float16_t *outp = output.ptr(); + + arm_vlog_f16(inp,outp,ref.nbSamples()); + + ASSERT_SNR(ref,output,(float16_t)SNR_LOG_THRESHOLD); + ASSERT_CLOSE_ERROR(ref,output,ABS_LOG_ERROR,REL_LOG_ERROR); + ASSERT_EMPTY_TAIL(output); + + } + + void FastMathF16::test_vexp_f16() + { + const float16_t *inp = input.ptr(); + float16_t *outp = output.ptr(); + + arm_vexp_f16(inp,outp,ref.nbSamples()); + + ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR); + ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD); + ASSERT_EMPTY_TAIL(output); + + } + + void FastMathF16::test_inverse_f16() + { + const float16_t *inp = input.ptr(); + + float16_t *outp = output.ptr(); + + arm_vinverse_f16(inp,outp,ref.nbSamples()); + + ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR); + ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD); + ASSERT_EMPTY_TAIL(output); + + } + + + void FastMathF16::setUp(Testing::testID_t id,std::vector& paramsArgs,Client::PatternMgr *mgr) + { + (void)paramsArgs; + switch(id) + { +#if 0 + case FastMathF16::TEST_COS_F16_1: + { + input.reload(FastMathF16::ANGLES1_F16_ID,mgr); + ref.reload(FastMathF16::COS1_F16_ID,mgr); + output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr); + + } + break; + + case FastMathF16::TEST_SIN_F16_2: + { + input.reload(FastMathF16::ANGLES1_F16_ID,mgr); + ref.reload(FastMathF16::SIN1_F16_ID,mgr); + output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr); + + } + break; +#endif + + case FastMathF16::TEST_SQRT_F16_3: + { + input.reload(FastMathF16::SQRTINPUT1_F16_ID,mgr); + ref.reload(FastMathF16::SQRT1_F16_ID,mgr); + output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr); + + } + break; + + case FastMathF16::TEST_VLOG_F16_4: + { + input.reload(FastMathF16::LOGINPUT1_F16_ID,mgr); + ref.reload(FastMathF16::LOG1_F16_ID,mgr); + output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr); + + } + break; + + case FastMathF16::TEST_VLOG_F16_5: + { + input.reload(FastMathF16::LOGINPUT1_F16_ID,mgr,7); + ref.reload(FastMathF16::LOG1_F16_ID,mgr,7); + output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr); + + } + break; + + case FastMathF16::TEST_VLOG_F16_6: + { + input.reload(FastMathF16::LOGINPUT1_F16_ID,mgr,16); + ref.reload(FastMathF16::LOG1_F16_ID,mgr,16); + output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr); + + } + break; + + case FastMathF16::TEST_VLOG_F16_7: + { + input.reload(FastMathF16::LOGINPUT1_F16_ID,mgr,23); + ref.reload(FastMathF16::LOG1_F16_ID,mgr,23); + output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr); + + } + break; + + case FastMathF16::TEST_VEXP_F16_8: + { + + input.reload(FastMathF16::EXPINPUT1_F16_ID,mgr); + ref.reload(FastMathF16::EXP1_F16_ID,mgr); + output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr); + + } + break; + + case FastMathF16::TEST_VEXP_F16_9: + { + input.reload(FastMathF16::EXPINPUT1_F16_ID,mgr,7); + ref.reload(FastMathF16::EXP1_F16_ID,mgr,7); + output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr); + + } + break; + + case FastMathF16::TEST_VEXP_F16_10: + { + input.reload(FastMathF16::EXPINPUT1_F16_ID,mgr,16); + ref.reload(FastMathF16::EXP1_F16_ID,mgr,16); + output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr); + + } + break; + + case FastMathF16::TEST_VEXP_F16_11: + { + input.reload(FastMathF16::EXPINPUT1_F16_ID,mgr,23); + ref.reload(FastMathF16::EXP1_F16_ID,mgr,23); + output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr); + + } + break; + + case FastMathF16::TEST_INVERSE_F16_12: + { + input.reload(FastMathF16::INPUT1_F16_ID,mgr); + ref.reload(FastMathF16::INVERSE1_F16_ID,mgr); + output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr); + + } + break; + } + + } + + void FastMathF16::tearDown(Testing::testID_t id,Client::PatternMgr *mgr) + { + (void)id; + output.dump(mgr); + + } diff --git a/Testing/Source/Tests/StatsTestsF16.cpp b/Testing/Source/Tests/StatsTestsF16.cpp index 909651df..1887a860 100755 --- a/Testing/Source/Tests/StatsTestsF16.cpp +++ b/Testing/Source/Tests/StatsTestsF16.cpp @@ -4,14 +4,18 @@ #include "Test.h" -#define SNR_THRESHOLD 50 +#define SNR_THRESHOLD 48 +#define SNR_KULLBACK_THRESHOLD 40 /* Reference patterns are generated with a double precision computation. */ -#define REL_ERROR (3.0e-3) +#define REL_ERROR (6.0e-3) + +#define REL_KULLBACK_ERROR (5.0e-3) +#define ABS_KULLBACK_ERROR (5.0e-3) void StatsTestsF16::test_max_f16() { @@ -39,7 +43,7 @@ a double precision computation. } -#if 0 + void StatsTestsF16::test_max_no_idx_f16() { const float16_t *inp = inputA.ptr(); @@ -59,7 +63,7 @@ a double precision computation. ASSERT_EQ(result,refp[this->refOffset]); } -#endif + void StatsTestsF16::test_min_f16() { @@ -241,7 +245,7 @@ a double precision computation. } -#if 0 + void StatsTestsF16::test_entropy_f16() { const float16_t *inp = inputA.ptr(); @@ -298,9 +302,9 @@ a double precision computation. inpB += dimsp[i+1]; } - ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD); + ASSERT_SNR(ref,output,(float16_t)SNR_KULLBACK_THRESHOLD); - ASSERT_REL_ERROR(ref,output,REL_ERROR); + ASSERT_CLOSE_ERROR(ref,output,ABS_KULLBACK_ERROR,REL_KULLBACK_ERROR); } void StatsTestsF16::test_logsumexp_dot_prod_f16() @@ -325,7 +329,7 @@ a double precision computation. ASSERT_REL_ERROR(ref,output,REL_ERROR); } - #endif + void StatsTestsF16::setUp(Testing::testID_t id,std::vector& paramsArgs,Client::PatternMgr *mgr) { @@ -595,7 +599,7 @@ a double precision computation. refOffset = 2; } break; -#if 0 + case StatsTestsF16::TEST_ENTROPY_F16_22: { inputA.reload(StatsTestsF16::INPUT22_F16_ID,mgr); @@ -651,7 +655,7 @@ a double precision computation. case StatsTestsF16::TEST_MAX_NO_IDX_F16_26: { - inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,3); + inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,7); ref.reload(StatsTestsF16::MAXVALS_F16_ID,mgr); @@ -684,7 +688,7 @@ a double precision computation. refOffset = 2; } break; -#endif + case TEST_MEAN_F16_29: inputA.reload(StatsTestsF16::INPUT2_F16_ID,mgr,100); diff --git a/Testing/desc_f16.txt b/Testing/desc_f16.txt index b5f27502..21635bf4 100755 --- a/Testing/desc_f16.txt +++ b/Testing/desc_f16.txt @@ -25,23 +25,23 @@ group Root { Pattern STDVALS_F16_ID : StdVals6_f16.txt Pattern VARVALS_F16_ID : VarVals7_f16.txt - //Pattern INPUT22_F16_ID : Input22_f16.txt - //Pattern DIM22_S16_ID : Dims22_s16.txt - //Pattern REF22_ENTROPY_F16_ID : RefEntropy22_f16.txt - // - //Pattern INPUT23_F16_ID : Input23_f16.txt - //Pattern DIM23_S16_ID : Dims23_s16.txt - //Pattern REF23_LOGSUMEXP_F16_ID : RefLogSumExp23_f16.txt - // - //Pattern INPUTA24_F16_ID : InputA24_f16.txt - //Pattern INPUTB24_F16_ID : InputB24_f16.txt - //Pattern DIM24_S16_ID : Dims24_s16.txt - //Pattern REF24_KL_F16_ID : RefKL24_f16.txt - // - //Pattern INPUTA25_F16_ID : InputA25_f16.txt - //Pattern INPUTB25_F16_ID : InputB25_f16.txt - //Pattern DIM25_S16_ID : Dims25_s16.txt - //Pattern REF25_LOGSUMEXP_DOT_F16_ID : RefLogSumExpDot25_f16.txt + Pattern INPUT22_F16_ID : Input22_f16.txt + Pattern DIM22_S16_ID : Dims22_s16.txt + Pattern REF22_ENTROPY_F16_ID : RefEntropy22_f16.txt + + Pattern INPUT23_F16_ID : Input23_f16.txt + Pattern DIM23_S16_ID : Dims23_s16.txt + Pattern REF23_LOGSUMEXP_F16_ID : RefLogSumExp23_f16.txt + + Pattern INPUTA24_F16_ID : InputA24_f16.txt + Pattern INPUTB24_F16_ID : InputB24_f16.txt + Pattern DIM24_S16_ID : Dims24_s16.txt + Pattern REF24_KL_F16_ID : RefKL24_f16.txt + + Pattern INPUTA25_F16_ID : InputA25_f16.txt + Pattern INPUTB25_F16_ID : InputB25_f16.txt + Pattern DIM25_S16_ID : Dims25_s16.txt + Pattern REF25_LOGSUMEXP_DOT_F16_ID : RefLogSumExpDot25_f16.txt Output OUT_F16_ID : Output Output OUT_S16_ID : Index @@ -76,14 +76,14 @@ group Root { Test nb=8n arm_var_f16:test_var_f16 Test nb=8n+1 arm_var_f16:test_var_f16 - disabled{arm_entropy_f16:test_entropy_f16} - disabled{arm_logsumexp_f16:test_logsumexp_f16} - disabled{arm_kullback_leibler_f16:test_kullback_leibler_f16} - disabled{arm_logsumexp_dot_prod_f16:test_logsumexp_dot_prod_f16} + arm_entropy_f16:test_entropy_f16 + arm_logsumexp_f16:test_logsumexp_f16 + arm_kullback_leibler_f16:test_kullback_leibler_f16 + arm_logsumexp_dot_prod_f16:test_logsumexp_dot_prod_f16 - disabled{Test nb=7 arm_max_no_idx_f16:test_max_no_idx_f16} - disabled{Test nb=8n arm_max_no_idx_f16:test_max_no_idx_f16} - disabled{Test nb=8n+1 arm_max_no_idx_f16:test_max_no_idx_f16} + Test nb=7 arm_max_no_idx_f16:test_max_no_idx_f16 + Test nb=8n arm_max_no_idx_f16:test_max_no_idx_f16 + Test nb=8n+1 arm_max_no_idx_f16:test_max_no_idx_f16 Test long arm_mean_f16:test_mean_f16 Test long arm_rms_f16:test_rms_f16 @@ -342,6 +342,46 @@ group Root { } + group Fast Maths Tests { + class = FastMath + folder = FastMath + + suite Fast Maths F16 { + class = FastMathF16 + folder = FastMathF16 + + Pattern ANGLES1_F16_ID : Angles1_f16.txt + Pattern SQRTINPUT1_F16_ID : SqrtInput1_f16.txt + Pattern LOGINPUT1_F16_ID : LogInput1_f16.txt + Pattern EXPINPUT1_F16_ID : ExpInput1_f16.txt + Pattern INPUT1_F16_ID : Samples1_f16.txt + + Pattern COS1_F16_ID : Cos1_f16.txt + Pattern SIN1_F16_ID : Sin1_f16.txt + Pattern SQRT1_F16_ID : Sqrt1_f16.txt + Pattern LOG1_F16_ID : Log1_f16.txt + Pattern EXP1_F16_ID : Exp1_f16.txt + Pattern INVERSE1_F16_ID : Inverse1_f16.txt + + Output OUT_F16_ID : Output + + Functions { + disabled{test_cos_f16:test_cos_f16} + disabled{test_sin_f16:test_sin_f16} + test_sqrt_f16:test_sqrt_f16 + test_vlog_f16:test_vlog_f16 + test_vlog_f16 nb=3:test_vlog_f16 + test_vlog_f16 nb=4n:test_vlog_f16 + test_vlog_f16 nb=4n+1:test_vlog_f16 + test_vexp_f16:test_vexp_f16 + test_vexp_f16 nb=3:test_vexp_f16 + test_vexp_f16 nb=4n:test_vexp_f16 + test_vexp_f16 nb=4n+1:test_vexp_f16 + test_inverse_f16:test_inverse_f16 + } + } + } + group Filtering Tests { class = FilteringTests folder = Filtering