CMSIS-DSP: Added additional f16 statistics functions

and the required f16 fast math functions.
pull/19/head
Christophe Favergeon 6 years ago
parent 534c34f883
commit 55c9be8af0

@ -113,6 +113,14 @@ extern "C"
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
#if !defined(__CC_ARM) && defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
extern const float16_t exp_tab_f16[8];
extern const float16_t __logf_lut_f16[8];
#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) */
#endif
#ifdef __cplusplus
}

@ -140,7 +140,7 @@ won't be built.
#define F16_ABSMAX ((float16_t)FLT_MAX)
#define F16_ABSMIN ((float16_t)0.0)
#define F16INFINITY ((float16_t)0x07c00)
#endif /* ARM_FLOAT16_SUPPORTED*/
#endif /* !defined( __CC_ARM ) */

@ -0,0 +1,232 @@
/******************************************************************************
* @file arm_vec_math_f16.h
* @brief Public header file for CMSIS DSP Library
******************************************************************************/
/*
* Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _ARM_VEC_MATH_H
#define _ARM_VEC_MATH_H
#include "arm_math_types_f16.h"
#include "arm_common_tables_f16.h"
#include "arm_helium_utils.h"
#ifdef __cplusplus
extern "C"
{
#endif
#if defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
static const float16_t __logf_rng_f16=0.693147180f16;
/* fast inverse approximation (4x newton) */
__STATIC_INLINE f16x8_t vrecip_hiprec_f16(
f16x8_t x)
{
q15x8_t m;
f16x8_t b;
any16x8_t xinv;
f16x8_t ax = vabsq(x);
xinv.f = ax;
m = 0x03c00 - (xinv.i & 0x07c00);
xinv.i = xinv.i + m;
xinv.f = 1.41176471f16 - 0.47058824f16 * xinv.f;
xinv.i = xinv.i + m;
b = 2.0f16 - xinv.f * ax;
xinv.f = xinv.f * b;
b = 2.0f16 - xinv.f * ax;
xinv.f = xinv.f * b;
b = 2.0f16 - xinv.f * ax;
xinv.f = xinv.f * b;
b = 2.0f16 - xinv.f * ax;
xinv.f = xinv.f * b;
xinv.f = vdupq_m(xinv.f, F16INFINITY, vcmpeqq(x, 0.0f));
/*
* restore sign
*/
xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq(x, 0.0f));
return xinv.f;
}
__STATIC_INLINE f16x8_t vdiv_f16(
f16x8_t num, f16x8_t den)
{
return vmulq(num, vrecip_hiprec_f16(den));
}
/**
@brief Single-precision taylor dev.
@param[in] x f16 vector input
@param[in] coeffs f16 vector coeffs
@return destination f16 vector
*/
__STATIC_INLINE float16x8_t vtaylor_polyq_f16(
float16x8_t x,
const float16_t * coeffs)
{
float16x8_t A = vfmasq(vdupq_n_f16(coeffs[4]), x, coeffs[0]);
float16x8_t B = vfmasq(vdupq_n_f16(coeffs[6]), x, coeffs[2]);
float16x8_t C = vfmasq(vdupq_n_f16(coeffs[5]), x, coeffs[1]);
float16x8_t D = vfmasq(vdupq_n_f16(coeffs[7]), x, coeffs[3]);
float16x8_t x2 = vmulq(x, x);
float16x8_t x4 = vmulq(x2, x2);
float16x8_t res = vfmaq(vfmaq_f16(A, B, x2), vfmaq_f16(C, D, x2), x4);
return res;
}
__STATIC_INLINE float16x8_t vmant_exp_f16(
float16x8_t x,
int16x8_t * e)
{
any16x8_t r;
int16x8_t n;
r.f = x;
n = r.i >> 10;
n = n - 15;
r.i = r.i - (n << 10);
*e = n;
return r.f;
}
__STATIC_INLINE float16x8_t vlogq_f16(float16x8_t vecIn)
{
q15x8_t vecExpUnBiased;
float16x8_t vecTmpFlt0, vecTmpFlt1;
float16x8_t vecAcc0, vecAcc1, vecAcc2, vecAcc3;
float16x8_t vecExpUnBiasedFlt;
/*
* extract exponent
*/
vecTmpFlt1 = vmant_exp_f16(vecIn, &vecExpUnBiased);
vecTmpFlt0 = vecTmpFlt1 * vecTmpFlt1;
/*
* a = (__logf_lut_f16[4] * r.f) + (__logf_lut_f16[0]);
*/
vecAcc0 = vdupq_n_f16(__logf_lut_f16[0]);
vecAcc0 = vfmaq(vecAcc0, vecTmpFlt1, __logf_lut_f16[4]);
/*
* b = (__logf_lut_f16[6] * r.f) + (__logf_lut_f16[2]);
*/
vecAcc1 = vdupq_n_f16(__logf_lut_f16[2]);
vecAcc1 = vfmaq(vecAcc1, vecTmpFlt1, __logf_lut_f16[6]);
/*
* c = (__logf_lut_f16[5] * r.f) + (__logf_lut_f16[1]);
*/
vecAcc2 = vdupq_n_f16(__logf_lut_f16[1]);
vecAcc2 = vfmaq(vecAcc2, vecTmpFlt1, __logf_lut_f16[5]);
/*
* d = (__logf_lut_f16[7] * r.f) + (__logf_lut_f16[3]);
*/
vecAcc3 = vdupq_n_f16(__logf_lut_f16[3]);
vecAcc3 = vfmaq(vecAcc3, vecTmpFlt1, __logf_lut_f16[7]);
/*
* a = a + b * xx;
*/
vecAcc0 = vfmaq(vecAcc0, vecAcc1, vecTmpFlt0);
/*
* c = c + d * xx;
*/
vecAcc2 = vfmaq(vecAcc2, vecAcc3, vecTmpFlt0);
/*
* xx = xx * xx;
*/
vecTmpFlt0 = vecTmpFlt0 * vecTmpFlt0;
vecExpUnBiasedFlt = vcvtq_f16_s16(vecExpUnBiased);
/*
* r.f = a + c * xx;
*/
vecAcc0 = vfmaq(vecAcc0, vecAcc2, vecTmpFlt0);
/*
* add exponent
* r.f = r.f + ((float32_t) m) * __logf_rng_f16;
*/
vecAcc0 = vfmaq(vecAcc0, vecExpUnBiasedFlt, __logf_rng_f16);
// set log0 down to -inf
vecAcc0 = vdupq_m(vecAcc0, -F16INFINITY, vcmpeqq(vecIn, 0.0f));
return vecAcc0;
}
__STATIC_INLINE float16x8_t vexpq_f16(
float16x8_t x)
{
// Perform range reduction [-log(2),log(2)]
int16x8_t m = vcvtq_s16_f16(vmulq_n_f16(x, 1.4426950408f16));
float16x8_t val = vfmsq_f16(x, vcvtq_f16_s16(m), vdupq_n_f16(0.6931471805f16));
// Polynomial Approximation
float16x8_t poly = vtaylor_polyq_f16(val, exp_tab_f16);
// Reconstruct
poly = (float16x8_t) (vqaddq_s16((int16x8_t) (poly), vqshlq_n_s16(m, 10)));
poly = vdupq_m(poly, 0.0f, vcmpltq_n_s16(m, -14));
return poly;
}
__STATIC_INLINE float16x8_t arm_vec_exponent_f16(float16x8_t x, int16_t nb)
{
float16x8_t r = x;
nb--;
while (nb > 0) {
r = vmulq(r, x);
nb--;
}
return (r);
}
#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)*/
#ifdef __cplusplus
}
#endif
#endif /* ARM FLOAT16 SUPPORTED */
#endif /* _ARM_VEC_MATH_H */
/**
*
* End of file.
*/

@ -36,7 +36,6 @@ extern "C"
#include "dsp/none.h"
#include "dsp/utils.h"
#include "dsp/fast_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)

@ -31,6 +31,8 @@
#include "dsp/none.h"
#include "dsp/utils.h"
/* For sqrt_f32 */
#include "dsp/fast_math_functions.h"
#ifdef __cplusplus
@ -69,6 +71,42 @@ __STATIC_FORCEINLINE arm_status arm_sqrt_f16(
@} end of SQRT group
*/
/**
@brief Floating-point vector of log values.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
*/
void arm_vlog_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t blockSize);
/**
@brief Floating-point vector of exp values.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
*/
void arm_vexp_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t blockSize);
/**
@brief Floating-point vector of inverse values.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
*/
void arm_vinverse_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t blockSize);
#endif /*defined(ARM_FLOAT16_SUPPORTED)*/
#ifdef __cplusplus
}

@ -123,6 +123,65 @@ extern "C"
float16_t * pResult,
uint32_t * pIndex);
/**
* @brief Entropy
*
* @param[in] pSrcA Array of input values.
* @param[in] blockSize Number of samples in the input array.
* @return Entropy -Sum(p ln p)
*
*/
float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize);
float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize);
/**
* @brief Dot product with log arithmetic
*
* Vectors are containing the log of the samples
*
* @param[in] pSrcA points to the first input vector
* @param[in] pSrcB points to the second input vector
* @param[in] blockSize number of samples in each vector
* @param[in] pTmpBuffer temporary buffer of length blockSize
* @return The log of the dot product .
*
*/
float16_t arm_logsumexp_dot_prod_f16(const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t blockSize,
float16_t *pTmpBuffer);
/**
* @brief Kullback-Leibler
*
* @param[in] pSrcA Pointer to an array of input values for probability distribution A.
* @param[in] pSrcB Pointer to an array of input values for probability distribution B.
* @param[in] blockSize Number of samples in the input array.
* @return Kullback-Leibler Divergence D(A || B)
*
*/
float16_t arm_kullback_leibler_f16(const float16_t * pSrcA
,const float16_t * pSrcB
,uint32_t blockSize);
/**
@brief Maximum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
void arm_max_no_idx_f16(
const float16_t *pSrc,
uint32_t blockSize,
float16_t *pResult);
#endif /*defined(ARM_FLOAT16_SUPPORTED)*/
#ifdef __cplusplus

@ -12550,6 +12550,32 @@ const float16_t twiddleCoefF16_rfft_4096[4096] = {
#endif /*!defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)*/
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
const float16_t exp_tab_f16[8] = {
(1.f16),
(0.0416598916054f16),
(0.500000596046f16),
(0.00138889f16),
(1.00000011921f16),
(0.00833693705499f16),
(0.166665703058f16),
(0.000195780929062f16),
};
const float16_t __logf_lut_f16[8] = {
-2.295614848256274f16, /*p0*/
-2.470711633419806f16, /*p4*/
-5.686926051100417f16, /*p2*/
-0.165253547131978f16, /*p6*/
+5.175912446351073f16, /*p1*/
+0.844006986174912f16, /*p5*/
+4.584458825456749f16, /*p3*/
+0.014127821926000f16 /*p7*/
};
#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) */
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
#endif /* Not ARM AC5 */

@ -47,6 +47,12 @@ target_sources(CMSISDSPFastMath PRIVATE arm_sqrt_q31.c)
target_sources(CMSISDSPFastMath PRIVATE arm_vlog_f32.c)
target_sources(CMSISDSPFastMath PRIVATE arm_vexp_f32.c)
if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
target_sources(CMSISDSPFastMath PRIVATE arm_vlog_f16.c)
target_sources(CMSISDSPFastMath PRIVATE arm_vexp_f16.c)
target_sources(CMSISDSPFastMath PRIVATE arm_vinverse_f16.c)
endif()
### Includes
target_include_directories(CMSISDSPFastMath PUBLIC "${DSP}/Include")

@ -0,0 +1,31 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: FastMathFunctions.c
* Description: Combination of all fast math function source files.
*
* $Date: 16. March 2020
* $Revision: V1.1.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_vexp_f16.c"
#include "arm_vlog_f16.c"
#include "arm_vinverse_f16.c"

@ -0,0 +1,84 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_vlog_f16.c
* Description: Fast vectorized log
*
* $Date: 15. Octoboer 2020
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#include "arm_common_tables.h"
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM) || defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_vec_math_f16.h"
#endif
void arm_vexp_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
f16x8_t src;
f16x8_t dst;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
src = vld1q(pSrc);
dst = vexpq_f16(src);
vst1q(pDst, dst);
pSrc += 8;
pDst += 8;
/* Decrement loop counter */
blkCnt--;
}
blkCnt = blockSize & 7;
#else
blkCnt = blockSize;
#endif
while (blkCnt > 0U)
{
/* C = log(A) */
/* Calculate log and store result in destination buffer. */
*pDst++ = expf(*pSrc++);
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -0,0 +1,81 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_vinverse_f16.c
* Description: Fast vectorized inverse
*
* $Date: 15. Octoboer 2020
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#include "arm_common_tables.h"
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM) || defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_vec_math_f16.h"
#endif
void arm_vinverse_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
f16x8_t src;
f16x8_t dst;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
src = vld1q(pSrc);
dst = vrecip_hiprec_f16(src);
vst1q(pDst, dst);
pSrc += 8;
pDst += 8;
/* Decrement loop counter */
blkCnt--;
}
blkCnt = blockSize & 7;
#else
blkCnt = blockSize;
#endif
while (blkCnt > 0U)
{
*pDst++ = 1.0 / *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -0,0 +1,83 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_vlog_f16.c
* Description: Fast vectorized log
*
* $Date: 15. Octoboer 2020
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#include "arm_common_tables.h"
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM) || defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_vec_math_f16.h"
#endif
void arm_vlog_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
f16x8_t src;
f16x8_t dst;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
src = vld1q(pSrc);
dst = vlogq_f16(src);
vst1q(pDst, dst);
pSrc += 8;
pDst += 8;
/* Decrement loop counter */
blkCnt--;
}
blkCnt = blockSize & 7;
#else
blkCnt = blockSize;
#endif
while (blkCnt > 0U)
{
/* C = log(A) */
/* Calculate log and store result in destination buffer. */
*pDst++ = logf(*pSrc++);
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -33,9 +33,18 @@
*
*/
/**
@ingroup groupSVM
*/
/**
@defgroup linearsvm Linear SVM
Linear SVM classifier
*/
/**
* @addtogroup groupSVM
* @addtogroup linearsvm
* @{
*/
@ -77,5 +86,5 @@ void arm_svm_linear_init_f32(arm_svm_linear_instance_f32 *S,
/**
* @} end of groupSVM group
* @} end of linearsvm group
*/

@ -30,7 +30,7 @@
/**
* @addtogroup groupSVM
* @addtogroup linearsvm
* @{
*/
@ -449,5 +449,5 @@ void arm_svm_linear_predict_f32(
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of groupSVM group
* @} end of linearsvm group
*/

@ -28,12 +28,20 @@
#include <limits.h>
#include <math.h>
/**
@ingroup groupSVM
*/
/**
* @addtogroup groupSVM
* @{
@defgroup polysvm Polynomial SVM
Polynomial SVM classifier
*/
/**
* @addtogroup polysvm
* @{
*/
/**
@ -83,5 +91,5 @@ void arm_svm_polynomial_init_f32(arm_svm_polynomial_instance_f32 *S,
/**
* @} end of groupSVM group
* @} end of polysvm group
*/

@ -33,7 +33,7 @@
#endif
/**
* @addtogroup groupSVM
* @addtogroup polysvm
* @{
*/
@ -484,5 +484,5 @@ void arm_svm_polynomial_predict_f32(
/**
* @} end of groupSVM group
* @} end of polysvm group
*/

@ -28,9 +28,19 @@
#include <limits.h>
#include <math.h>
/**
@ingroup groupSVM
*/
/**
@defgroup rbfsvm RBF SVM
RBF SVM classifier
*/
/**
* @addtogroup groupSVM
* @addtogroup rbfsvm
* @{
*/
@ -75,5 +85,5 @@ void arm_svm_rbf_init_f32(arm_svm_rbf_instance_f32 *S,
/**
* @} end of groupSVM group
* @} end of rbfsvm group
*/

@ -30,7 +30,7 @@
/**
* @addtogroup groupSVM
* @addtogroup rbfsvm
* @{
*/
@ -517,5 +517,5 @@ void arm_svm_rbf_predict_f32(
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of groupSVM group
* @} end of rbfsvm group
*/

@ -28,9 +28,18 @@
#include <limits.h>
#include <math.h>
/**
@ingroup groupSVM
*/
/**
@defgroup sigmoidsvm Sigmoid SVM
Sigmoid SVM classifier
*/
/**
* @addtogroup groupSVM
* @addtogroup sigmoidsvm
* @{
*/
@ -77,5 +86,5 @@ void arm_svm_sigmoid_init_f32(arm_svm_sigmoid_instance_f32 *S,
/**
* @} end of groupSVM group
* @} end of sigmoidsvm group
*/

@ -29,7 +29,7 @@
#include <math.h>
/**
* @addtogroup groupSVM
* @addtogroup sigmoidsvm
* @{
*/
@ -481,5 +481,5 @@ void arm_svm_sigmoid_predict_f32(
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of groupSVM group
* @} end of sigmoidsvm group
*/

@ -59,4 +59,9 @@ target_sources(CMSISDSPStatistics PRIVATE arm_power_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_rms_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_std_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_var_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_dot_prod_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f16.c)
endif()

@ -33,3 +33,8 @@
#include "arm_rms_f16.c"
#include "arm_std_f16.c"
#include "arm_var_f16.c"
#include "arm_entropy_f16.c"
#include "arm_kullback_leibler_f16.c"
#include "arm_logsumexp_dot_prod_f16.c"
#include "arm_logsumexp_f16.c"
#include "arm_max_no_idx_f16.c"

@ -0,0 +1,138 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f16.c
* Description: LogSumExp
*
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#include <limits.h>
#include <math.h>
/**
@ingroup groupStats
*/
/**
@defgroup Entropy Entropy
Computes the entropy of a distribution
*/
/**
* @addtogroup Entropy
* @{
*/
/**
* @brief Entropy
*
* @param[in] pSrcA Array of input values.
* @param[in] blockSize Number of samples in the input array.
* @return Entropy -Sum(p ln p)
*
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math_f16.h"
float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize)
{
uint32_t blkCnt;
float16_t accum=0.0f,p;
blkCnt = blockSize;
f16x8_t vSum = vdupq_n_f16(0.0f);
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 3U;
while (blkCnt > 0U)
{
f16x8_t vecIn = vld1q(pSrcA);
vSum = vaddq_f16(vSum, vmulq(vecIn, vlogq_f16(vecIn)));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrcA += 8;
blkCnt --;
}
accum = vecAddAcrossF16Mve(vSum);
/* Tail */
blkCnt = blockSize & 0x7;
while(blkCnt > 0)
{
p = *pSrcA++;
accum += p * logf(p);
blkCnt--;
}
return (-accum);
}
#else
float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize)
{
const float16_t *pIn;
uint32_t blkCnt;
float16_t accum, p;
pIn = pSrcA;
blkCnt = blockSize;
accum = 0.0f;
while(blkCnt > 0)
{
p = *pIn++;
accum += p * logf(p);
blkCnt--;
}
return(-accum);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of Entropy group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -30,7 +30,7 @@
/**
* @addtogroup groupStats
* @addtogroup Entropy
* @{
*/
@ -168,5 +168,5 @@ float32_t arm_entropy_f32(const float32_t * pSrcA,uint32_t blockSize)
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of groupStats group
* @} end of Entropy group
*/

@ -29,7 +29,7 @@
#include <math.h>
/**
* @addtogroup groupStats
* @addtogroup Entropy
* @{
*/
@ -67,5 +67,5 @@ float64_t arm_entropy_f64(const float64_t * pSrcA, uint32_t blockSize)
}
/**
* @} end of groupStats group
* @} end of Entropy group
*/

@ -0,0 +1,150 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f16.c
* Description: LogSumExp
*
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#include <limits.h>
#include <math.h>
/**
@ingroup groupStats
*/
/**
@defgroup Kullback-Leibler Kullback-Leibler divergence
Computes the Kullback-Leibler divergence between two distributions
*/
/**
* @addtogroup Kullback-Leibler
* @{
*/
/**
* @brief Kullback-Leibler
*
* Distribution A may contain 0 with Neon version.
* Result will be right but some exception flags will be set.
*
* Distribution B must not contain 0 probability.
*
* @param[in] *pSrcA points to an array of input values for probaility distribution A.
* @param[in] *pSrcB points to an array of input values for probaility distribution B.
* @param[in] blockSize number of samples in the input array.
* @return Kullback-Leibler divergence D(A || B)
*
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math_f16.h"
float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSrcB,uint32_t blockSize)
{
uint32_t blkCnt;
float16_t accum, pA,pB;
blkCnt = blockSize;
accum = 0.0f;
f16x8_t vSum = vdupq_n_f16(0.0f);
blkCnt = blockSize >> 3;
while(blkCnt > 0)
{
f16x8_t vecA = vld1q(pSrcA);
f16x8_t vecB = vld1q(pSrcB);
f16x8_t vRatio;
vRatio = vdiv_f16(vecB, vecA);
vSum = vaddq_f16(vSum, vmulq(vecA, vlogq_f16(vRatio)));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrcA += 8;
pSrcB += 8;
blkCnt --;
}
accum = vecAddAcrossF16Mve(vSum);
blkCnt = blockSize & 7;
while(blkCnt > 0)
{
pA = *pSrcA++;
pB = *pSrcB++;
accum += pA * logf(pB / pA);
blkCnt--;
}
return(-accum);
}
#else
float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSrcB,uint32_t blockSize)
{
const float16_t *pInA, *pInB;
uint32_t blkCnt;
float16_t accum, pA,pB;
pInA = pSrcA;
pInB = pSrcB;
blkCnt = blockSize;
accum = 0.0f;
while(blkCnt > 0)
{
pA = *pInA++;
pB = *pInB++;
accum += pA * logf(pB / pA);
blkCnt--;
}
return(-accum);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of Kullback-Leibler group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -30,7 +30,7 @@
/**
* @addtogroup groupStats
* @addtogroup Kullback-Leibler
* @{
*/
@ -187,5 +187,5 @@ float32_t arm_kullback_leibler_f32(const float32_t * pSrcA,const float32_t * pSr
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of groupStats group
* @} end of Kullback-Leibler group
*/

@ -29,7 +29,7 @@
#include <math.h>
/**
* @addtogroup groupStats
* @addtogroup Kullback-Leibler
* @{
*/
@ -69,5 +69,5 @@ float64_t arm_kullback_leibler_f64(const float64_t * pSrcA, const float64_t * pS
}
/**
* @} end of groupStats group
* @} end of Kullback-Leibler group
*/

@ -0,0 +1,82 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f16.c
* Description: LogSumExp
*
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#include <limits.h>
#include <math.h>
/**
@ingroup groupStats
*/
/**
@defgroup LogSumExp LogSumExp
LogSumExp optimizations to compute sum of probabilities with Gaussian distributions
*/
/**
* @addtogroup LogSumExp
* @{
*/
/**
* @brief Dot product with log arithmetic
*
* Vectors are containing the log of the samples
*
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[in] blockSize number of samples in each vector
* @param[in] *pTmpBuffer temporary buffer of length blockSize
* @return The log of the dot product.
*
*/
float16_t arm_logsumexp_dot_prod_f16(const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t blockSize,
float16_t *pTmpBuffer)
{
float16_t result;
arm_add_f16((float16_t*)pSrcA, (float16_t*)pSrcB, pTmpBuffer, blockSize);
result = arm_logsumexp_f16(pTmpBuffer, blockSize);
return(result);
}
/**
* @} end of LogSumExp group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -30,7 +30,7 @@
/**
* @addtogroup groupStats
* @addtogroup LogSumExp
* @{
*/
@ -62,5 +62,5 @@ float32_t arm_logsumexp_dot_prod_f32(const float32_t * pSrcA,
}
/**
* @} end of groupStats group
* @} end of LogSumExp group
*/

@ -0,0 +1,170 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f16.c
* Description: LogSumExp
*
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#include <limits.h>
#include <math.h>
/**
* @addtogroup LogSumExp
* @{
*/
/**
* @brief Computation of the LogSumExp
*
* In probabilistic computations, the dynamic of the probability values can be very
* wide because they come from gaussian functions.
* To avoid underflow and overflow issues, the values are represented by their log.
* In this representation, multiplying the original exp values is easy : their logs are added.
* But adding the original exp values is requiring some special handling and it is the
* goal of the LogSumExp function.
*
* If the values are x1...xn, the function is computing:
*
* ln(exp(x1) + ... + exp(xn)) and the computation is done in such a way that
* rounding issues are minimised.
*
* The max xm of the values is extracted and the function is computing:
* xm + ln(exp(x1 - xm) + ... + exp(xn - xm))
*
* @param[in] *in Pointer to an array of input values.
* @param[in] blockSize Number of samples in the input array.
* @return LogSumExp
*
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math_f16.h"
float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize)
{
float16_t maxVal;
const float16_t *pIn;
int32_t blkCnt;
float16_t accum=0.0f16;
float16_t tmp;
arm_max_no_idx_f16((float16_t *) in, blockSize, &maxVal);
blkCnt = blockSize;
pIn = in;
f16x8_t vSum = vdupq_n_f16(0.0f16);
blkCnt = blockSize >> 3;
while(blkCnt > 0)
{
f16x8_t vecIn = vld1q(pIn);
f16x8_t vecExp;
vecExp = vexpq_f16(vsubq_n_f16(vecIn, maxVal));
vSum = vaddq_f16(vSum, vecExp);
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pIn += 8;
blkCnt --;
}
/* sum + log */
accum = vecAddAcrossF16Mve(vSum);
blkCnt = blockSize & 0x7;
while(blkCnt > 0)
{
tmp = *pIn++;
accum += expf(tmp - maxVal);
blkCnt--;
}
accum = maxVal + logf(accum);
return (accum);
}
#else
float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize)
{
float16_t maxVal;
float16_t tmp;
const float16_t *pIn;
uint32_t blkCnt;
float16_t accum;
pIn = in;
blkCnt = blockSize;
maxVal = *pIn++;
blkCnt--;
while(blkCnt > 0)
{
tmp = *pIn++;
if (tmp > maxVal)
{
maxVal = tmp;
}
blkCnt--;
}
blkCnt = blockSize;
pIn = in;
accum = 0;
while(blkCnt > 0)
{
tmp = *pIn++;
accum += expf(tmp - maxVal);
blkCnt--;
}
accum = maxVal + logf(accum);
return(accum);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of LogSumExp group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -30,7 +30,7 @@
/**
* @addtogroup groupStats
* @addtogroup LogSumExp
* @{
*/
@ -271,5 +271,5 @@ float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize)
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of groupStats group
* @} end of LogSumExp group
*/

@ -0,0 +1,144 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_no_idx_f16.c
* Description: Maximum value of a floating-point vector without returning the index
*
* $Date: 16. October 2020
* $Revision: V1.6.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_max_no_idx_f16(
const float16_t *pSrc,
uint32_t blockSize,
float16_t *pResult)
{
f16x8_t vecSrc;
f16x8_t curExtremValVec = vdupq_n_f16(F16_MIN);
float16_t maxValue = F16_MIN;
float16_t newVal;
uint32_t blkCnt;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 3U;
while (blkCnt > 0U)
{
vecSrc = vldrhq_f16(pSrc);
/*
* update per-lane max.
*/
curExtremValVec = vmaxnmq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrc += 8;
blkCnt --;
}
/*
* Get max value across the vector
*/
maxValue = vmaxnmvq(maxValue, curExtremValVec);
blkCnt = blockSize & 7;
while (blkCnt > 0U)
{
newVal = *pSrc++;
/* compare for the maximum value */
if (maxValue < newVal)
{
/* Update the maximum value and it's index */
maxValue = newVal;
}
blkCnt --;
}
*pResult = maxValue;
}
#else
void arm_max_no_idx_f16(
const float16_t *pSrc,
uint32_t blockSize,
float16_t *pResult)
{
float16_t maxValue = F16_MIN;
float16_t newVal;
while (blockSize > 0U)
{
newVal = *pSrc++;
/* compare for the maximum value */
if (maxValue < newVal)
{
/* Update the maximum value and it's index */
maxValue = newVal;
}
blockSize --;
}
*pResult = maxValue;
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Max group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -74,14 +74,12 @@ void arm_var_f16(
arm_mean_f16(pSrc, blockSize, &fMean);
/* 6.14 bug */
#if defined(SDCOMP_xxx)
#if defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100)
#if defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100) && (__ARMCC_VERSION < 6150001)
__asm volatile(
" vmov.i32 %[acc], #0 \n"
: [acc] "+t"(sumVec)
:
: );
#endif
#endif
blkCnt = blockSize;

@ -31,11 +31,21 @@
#include <limits.h>
#include <math.h>
/**
@ingroup groupSupport
*/
/**
@defgroup barycenter Barycenter
Barycenter of weighted vectors
*/
/**
@addtogroup barycenter
@{
*/
/**
* @brief Barycenter
@ -255,7 +265,7 @@ void arm_barycenter_f16(const float16_t *in, const float16_t *weights, float16_t
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of groupSupport group
* @} end of barycenter group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -30,7 +30,7 @@
/**
@ingroup groupSupport
@ingroup barycenter
*/
@ -408,5 +408,5 @@ void arm_barycenter_f32(const float32_t *in, const float32_t *weights, float32_t
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of groupSupport group
* @} end of barycenter group
*/

@ -32,9 +32,19 @@
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupSupport
*/
/**
@defgroup weightedsum Weighted Sum
Weighted sum of values
*/
/**
* @addtogroup groupSupport
* @addtogroup weightedsum
* @{
*/
@ -128,7 +138,7 @@ float16_t arm_weighted_sum_f16(const float16_t *in, const float16_t *weigths, ui
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of groupSupport group
* @} end of weightedsum group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -31,7 +31,7 @@
#include "dsp/support_functions.h"
/**
* @addtogroup groupSupport
* @addtogroup weightedsum
* @{
*/
@ -182,5 +182,5 @@ float32_t arm_weighted_sum_f32(const float32_t *in, const float32_t *weigths, ui
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of groupSupport group
* @} end of weightedsum group
*/

@ -339,6 +339,7 @@ set(TESTSRC16
Source/Tests/TransformRF16.cpp
Source/Tests/SupportTestsF16.cpp
Source/Tests/SupportBarTestsF16.cpp
Source/Tests/FastMathF16.cpp
)
endif()
endif()

@ -0,0 +1,23 @@
#include "Test.h"
#include "Pattern.h"
#include "dsp/fast_math_functions_f16.h"
class FastMathF16:public Client::Suite
{
public:
FastMathF16(Testing::testID_t id);
virtual void setUp(Testing::testID_t,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr);
virtual void tearDown(Testing::testID_t,Client::PatternMgr *mgr);
private:
#include "FastMathF16_decl.h"
Client::Pattern<float16_t> input;
Client::LocalPattern<float16_t> output;
// Reference patterns are not loaded when we are in dump mode
Client::RefPattern<float16_t> ref;
};

@ -28,7 +28,7 @@ def writeTests(config,format):
vals[0] = -0.4
sqrtvals[0] = 0.0
if format != 0:
if format != 0 and format != 16:
angles=np.concatenate((a1,a2,a1))
angles = angles / (2*math.pi)
config.writeInput(1, angles,"Angles")
@ -43,7 +43,7 @@ def writeTests(config,format):
config.writeInput(1, samples,"Samples")
def writeTestsF32(config,format):
def writeTestsFloat(config,format):
writeTests(config,format)
data1 = np.random.randn(20)
@ -61,22 +61,30 @@ def writeTestsF32(config,format):
v = np.exp(samples)
config.writeReference(1, v,"Exp")
# For benchmarks
# For benchmarks and other tests
samples=np.random.randn(NBSAMPLES)
samples = np.abs(Tools.normalize(samples))
config.writeInput(1, samples,"Samples")
v = 1.0 / samples
config.writeReference(1, v,"Inverse")
def generatePatterns():
PATTERNDIR = os.path.join("Patterns","DSP","FastMath","FastMath")
PARAMDIR = os.path.join("Parameters","DSP","FastMath","FastMath")
configf32=Tools.Config(PATTERNDIR,PARAMDIR,"f32")
configf16=Tools.Config(PATTERNDIR,PARAMDIR,"f16")
configq31=Tools.Config(PATTERNDIR,PARAMDIR,"q31")
configq15=Tools.Config(PATTERNDIR,PARAMDIR,"q15")
writeTestsF32(configf32,0)
writeTestsFloat(configf32,0)
writeTestsFloat(configf16,16)
writeTests(configq31,31)
writeTests(configq15,15)

@ -116,6 +116,13 @@ def logSumExpDotTest(config,nb):
config.writeInputS16(nb, dims,"Dims")
config.writeReference(nb, outputs,"RefLogSumExpDot")
def writeF16OnlyTests(config,nb):
entropyTest(config,nb)
logsumexpTest(config,nb+1)
klTest(config,nb+2)
logSumExpDotTest(config,nb+3)
return(nb+4)
def writeF32OnlyTests(config,nb):
entropyTest(config,nb)
logsumexpTest(config,nb+1)
@ -357,6 +364,7 @@ def generatePatterns():
writeTests(configq7,1,7)
nb=writeTests(configf16,1,16)
nb=writeF16OnlyTests(configf16,22)
if __name__ == '__main__':
generatePatterns()

@ -0,0 +1,48 @@
H
23
// 0.000000
0x0
// 0.785398
0x3a48
// 1.570796
0x3e48
// 2.356194
0x40b6
// 3.141593
0x4248
// 3.926991
0x43db
// 4.712389
0x44b6
// 6.283184
0x4648
// -0.785398
0xba48
// -1.570796
0xbe48
// -2.356194
0xc0b6
// -3.141593
0xc248
// -3.926991
0xc3db
// -4.712389
0xc4b6
// -6.283186
0xc648
// 6.283185
0x4648
// 7.068583
0x4712
// 7.853982
0x47db
// 8.639380
0x4852
// 9.424778
0x48b6
// 10.210176
0x491b
// 10.995574
0x497f
// 12.566370
0x4a48

@ -0,0 +1,48 @@
H
23
// 1.000000
0x3c00
// 0.707107
0x39a8
// 0.000000
0x0
// -0.707107
0xb9a8
// -1.000000
0xbc00
// -0.707107
0xb9a8
// -0.000000
0x8000
// 1.000000
0x3c00
// 0.707107
0x39a8
// 0.000000
0x0
// -0.707107
0xb9a8
// -1.000000
0xbc00
// -0.707107
0xb9a8
// -0.000000
0x8000
// 1.000000
0x3c00
// 1.000000
0x3c00
// 0.707107
0x39a8
// 0.000000
0x0
// -0.707107
0xb9a8
// -1.000000
0xbc00
// -0.707107
0xb9a8
// -0.000000
0x8000
// 1.000000
0x3c00

@ -0,0 +1,106 @@
H
52
// 1.000000
0x3c00
// 2.718282
0x4170
// 0.670320
0x395d
// 0.681354
0x3973
// 0.692569
0x398a
// 0.703969
0x39a2
// 0.715557
0x39b9
// 0.727336
0x39d2
// 0.739308
0x39ea
// 0.751477
0x3a03
// 0.763847
0x3a1c
// 0.776420
0x3a36
// 0.789201
0x3a50
// 0.802191
0x3a6b
// 0.815396
0x3a86
// 0.828818
0x3aa1
// 0.842460
0x3abd
// 0.856328
0x3ada
// 0.870423
0x3af7
// 0.884751
0x3b14
// 0.899315
0x3b32
// 0.914118
0x3b50
// 0.929165
0x3b6f
// 0.944459
0x3b8e
// 0.960005
0x3bae
// 0.975808
0x3bce
// 0.991870
0x3bef
// 1.008197
0x3c08
// 1.024792
0x3c19
// 1.041661
0x3c2b
// 1.058807
0x3c3c
// 1.076236
0x3c4e
// 1.093951
0x3c60
// 1.111958
0x3c73
// 1.130261
0x3c85
// 1.148866
0x3c98
// 1.167777
0x3cac
// 1.186999
0x3cbf
// 1.206538
0x3cd3
// 1.226398
0x3ce8
// 1.246585
0x3cfd
// 1.267105
0x3d12
// 1.287962
0x3d27
// 1.309163
0x3d3d
// 1.330712
0x3d53
// 1.352616
0x3d69
// 1.374881
0x3d80
// 1.397513
0x3d97
// 1.420516
0x3daf
// 1.443899
0x3dc7
// 1.467666
0x3ddf
// 1.491825
0x3df8

@ -0,0 +1,106 @@
H
52
// 0.000000
0x0
// 1.000000
0x3c00
// -0.400000
0xb666
// -0.383673
0xb624
// -0.367347
0xb5e1
// -0.351020
0xb59e
// -0.334694
0xb55b
// -0.318367
0xb518
// -0.302041
0xb4d5
// -0.285714
0xb492
// -0.269388
0xb44f
// -0.253061
0xb40d
// -0.236735
0xb393
// -0.220408
0xb30e
// -0.204082
0xb288
// -0.187755
0xb202
// -0.171429
0xb17c
// -0.155102
0xb0f7
// -0.138776
0xb071
// -0.122449
0xafd6
// -0.106122
0xaecb
// -0.089796
0xadbf
// -0.073469
0xacb4
// -0.057143
0xab50
// -0.040816
0xa939
// -0.024490
0xa645
// -0.008163
0xa02e
// 0.008163
0x202e
// 0.024490
0x2645
// 0.040816
0x2939
// 0.057143
0x2b50
// 0.073469
0x2cb4
// 0.089796
0x2dbf
// 0.106122
0x2ecb
// 0.122449
0x2fd6
// 0.138776
0x3071
// 0.155102
0x30f7
// 0.171429
0x317c
// 0.187755
0x3202
// 0.204082
0x3288
// 0.220408
0x330e
// 0.236735
0x3393
// 0.253061
0x340d
// 0.269388
0x344f
// 0.285714
0x3492
// 0.302041
0x34d5
// 0.318367
0x3518
// 0.334694
0x355b
// 0.351020
0x359e
// 0.367347
0x35e1
// 0.383673
0x3624
// 0.400000
0x3666

@ -0,0 +1,514 @@
H
256
// 13.282788
0x4aa4
// 74.795943
0x54ad
// 4.534229
0x4489
// 3.745299
0x437e
// 2.439216
0x40e1
// 6.181096
0x462e
// 4.004707
0x4401
// 11.283654
0x49a4
// 1.847747
0x3f64
// 5.549382
0x458d
// 1.536148
0x3e25
// 30.005060
0x4f80
// 31.475099
0x4fde
// 17.543195
0x4c63
// 2.419261
0x40d7
// 4.349753
0x445a
// 4.841152
0x44d7
// 8.101053
0x480d
// 7.775466
0x47c7
// 10.960573
0x497b
// 1.935619
0x3fbe
// 14.846755
0x4b6c
// 2.538388
0x4114
// 2.328174
0x40a8
// 1.187660
0x3cc0
// 9.459035
0x48bb
// 6.531679
0x4688
// 28.111782
0x4f07
// 2.775787
0x418d
// 1.610728
0x3e71
// 4.295367
0x444c
// 8.503271
0x4840
// 6.431921
0x466f
// 1.644415
0x3e94
// 5.022253
0x4506
// 46.675156
0x51d6
// 7.923678
0x47ec
// 1.986785
0x3ff2
// 12.264212
0x4a22
// 5.127496
0x4521
// 3.526537
0x430e
// 3.099069
0x4233
// 11.281743
0x49a4
// 5.690813
0x45b1
// 25.784472
0x4e72
// 31.172867
0x4fcb
// 5.406237
0x4568
// 1.003867
0x3c04
// 2.474874
0x40f3
// 5.259653
0x4542
// 6.833799
0x46d5
// 2.146320
0x404b
// 4.075154
0x4413
// 6.755841
0x46c1
// 3.298992
0x4299
// 3.211582
0x426c
// 154.452815
0x58d4
// 2.467569
0x40ef
// 13.353500
0x4aad
// 2.897118
0x41cb
// 11.198146
0x4999
// 2.095061
0x4031
// 5.949994
0x45f3
// 502.264324
0x5fd9
// 9.785310
0x48e5
// 24.679848
0x4e2c
// 9.141598
0x4892
// 4.842086
0x44d8
// 2.305929
0x409d
// 2.553810
0x411c
// 6.529844
0x4688
// 12.616308
0x4a4f
// 10.160835
0x4915
// 4.741947
0x44be
// 3.062033
0x4220
// 15.072163
0x4b89
// 19.437242
0x4cdc
// 304.465872
0x5cc2
// 3.697883
0x4365
// 1.200278
0x3ccd
// 12.039526
0x4a05
// 6.285477
0x4649
// 41.940922
0x513e
// 14.367260
0x4b2f
// 2.844286
0x41b0
// 2.434156
0x40de
// 18.803330
0x4cb3
// 3.590405
0x432e
// 45.916673
0x51bd
// 16.744938
0x4c30
// 9.513594
0x48c2
// 266.925697
0x5c2c
// 141.996772
0x5870
// 1.273840
0x3d18
// 4.064921
0x4411
// 3.059166
0x421e
// 3.063974
0x4221
// 12.270595
0x4a23
// 17.040917
0x4c43
// 1.259633
0x3d0a
// 2.821234
0x41a4
// 6.853772
0x46db
// 7.454620
0x4774
// 1.609663
0x3e70
// 11.592823
0x49cc
// 8.194720
0x4819
// 2.951149
0x41e7
// 2.312031
0x40a0
// 9.662832
0x48d5
// 1.513853
0x3e0e
// 93.930231
0x55df
// 91.754898
0x55bc
// 2.936342
0x41df
// 36.983413
0x509f
// 1.683027
0x3ebb
// 3.835847
0x43ac
// 4.018890
0x4405
// 8.686859
0x4858
// 3.832640
0x43aa
// 5.651256
0x45a7
// 167.057056
0x5938
// 10.621388
0x4950
// 2.039201
0x4014
// 3.169404
0x4257
// 121.699150
0x579b
// 1.962375
0x3fd9
// 3.588653
0x432d
// 6.551802
0x468d
// 2.273146
0x408c
// 6.206085
0x4635
// 1.260120
0x3d0a
// 1.328843
0x3d51
// 7.193014
0x4731
// 5.073456
0x4513
// 4.099381
0x4419
// 14.640782
0x4b52
// 8.470057
0x483c
// 35.841993
0x507b
// 25.902541
0x4e7a
// 8.278306
0x4824
// 6.579874
0x4694
// 3.183432
0x425e
// 3.157985
0x4251
// 30.987647
0x4fbf
// 4.960553
0x44f6
// 28.319462
0x4f14
// 3.360753
0x42b9
// 13.357196
0x4aae
// 7.617099
0x479e
// 12.543129
0x4a46
// 3.206430
0x426a
// 24.728589
0x4e2f
// 2.883364
0x41c4
// 4.739018
0x44bd
// 11.599710
0x49cd
// 9.538527
0x48c5
// 16.661500
0x4c2a
// 2.736285
0x4179
// 6.631133
0x46a2
// 11.402722
0x49b4
// 1.858302
0x3f6f
// 11.107479
0x498e
// 2.144436
0x404a
// 3.471115
0x42f1
// 7.195419
0x4732
// 2.250048
0x4080
// 5.355021
0x455b
// 2.175851
0x405a
// 6.975780
0x46fa
// 10.830583
0x496a
// 4.039975
0x440a
// 19.297319
0x4cd3
// 14.568924
0x4b49
// 1.760573
0x3f0b
// 4.344142
0x4458
// 1.911989
0x3fa6
// 125.811431
0x57dd
// 6.147110
0x4626
// 3.323063
0x42a5
// 141.465322
0x586c
// 2.428645
0x40db
// 5.057885
0x450f
// 17.465511
0x4c5e
// 5.487886
0x457d
// 3.693320
0x4363
// 5.722713
0x45b9
// 2.815819
0x41a2
// 17.741187
0x4c6f
// 156.719386
0x58e6
// 9.622230
0x48d0
// 3.582729
0x432a
// 7.122762
0x471f
// 3.801118
0x439a
// 21.059681
0x4d44
// 7.002552
0x4701
// 1.681254
0x3eba
// 26.237564
0x4e8f
// 2.196333
0x4065
// 3.057311
0x421d
// 43.974325
0x517f
// 4.871455
0x44df
// 58.682025
0x5356
// 1.935719
0x3fbe
// 50.178529
0x5246
// 14.081570
0x4b0a
// 2.276558
0x408e
// 15.295183
0x4ba6
// 3.718388
0x4370
// 3.839651
0x43ae
// 3.527233
0x430e
// 2.428714
0x40dc
// 11.062054
0x4988
// 5.778279
0x45c7
// 2.901877
0x41ce
// 4.614082
0x449d
// 2.846533
0x41b1
// 8.471893
0x483c
// 5.528337
0x4587
// 4.069745
0x4412
// 4.311615
0x4450
// 2.410911
0x40d2
// 4.794024
0x44cb
// 1.952501
0x3fcf
// 2.350841
0x40b4
// 2.240902
0x407b
// 5.954644
0x45f4
// 6.121318
0x461f
// 6.649823
0x46a6
// 16.968851
0x4c3e
// 1.000000
0x3c00
// 5.024657
0x4506
// 5.569085
0x4592
// 8.212669
0x481b
// 2.487941
0x40fa
// 1.966557
0x3fde
// 3.370575
0x42be
// 29.614104
0x4f67
// 3.196824
0x4265
// 1.470828
0x3de2
// 8.924856
0x4876
// 3.005112
0x4203
// 3.024847
0x420d
// 4.340886
0x4457
// 1.894158
0x3f94
// 3.562064
0x4320
// 3.233189
0x4277
// 1.628957
0x3e84
// 2.791436
0x4195
// 8.235823
0x481e
// 8.808764
0x4868
// 36.005208
0x5080
// 12.766707
0x4a62
// 5.964918
0x45f7
// 1.671910
0x3eb0
// 2.699011
0x4166
// 16.411460
0x4c1a

@ -0,0 +1,52 @@
H
25
// -2.302585
0xc09b
// -1.203973
0xbcd1
// -0.693147
0xb98c
// 0.000000
0x0
// 0.693147
0x398c
// -2.516839
0xc109
// 0.000000
0x0
// -5.908962
0xc5e9
// -1.345933
0xbd62
// -0.923815
0xbb64
// -4.046497
0xc40c
// -2.530234
0xc10f
// -0.724334
0xb9cb
// -1.436949
0xbdbf
// -1.327187
0xbd4f
// -1.741553
0xbef7
// -0.066722
0xac45
// -0.616041
0xb8ee
// -0.822195
0xba94
// -1.579204
0xbe51
// -1.333689
0xbd56
// -0.860545
0xbae2
// -1.080309
0xbc52
// -1.977120
0xbfe9
// -1.877663
0xbf83

@ -0,0 +1,52 @@
H
25
// 0.100000
0x2e66
// 0.300000
0x34cd
// 0.500000
0x3800
// 1.000000
0x3c00
// 2.000000
0x4000
// 0.080714
0x2d2a
// 1.000000
0x3c00
// 0.002715
0x198f
// 0.260297
0x342a
// 0.397001
0x365a
// 0.017484
0x247a
// 0.079640
0x2d19
// 0.484647
0x37c1
// 0.237652
0x339b
// 0.265222
0x343e
// 0.175248
0x319c
// 0.935456
0x3b7c
// 0.540078
0x3852
// 0.439466
0x3708
// 0.206139
0x3299
// 0.263503
0x3437
// 0.422932
0x36c4
// 0.339491
0x356f
// 0.138467
0x306e
// 0.152947
0x30e5

@ -0,0 +1,514 @@
H
256
// 0.075285
0x2cd1
// 0.013370
0x22d8
// 0.220545
0x330f
// 0.267001
0x3446
// 0.409968
0x368f
// 0.161784
0x312d
// 0.249706
0x33fe
// 0.088624
0x2dac
// 0.541200
0x3854
// 0.180200
0x31c4
// 0.650979
0x3935
// 0.033328
0x2844
// 0.031771
0x2811
// 0.057002
0x2b4c
// 0.413349
0x369d
// 0.229898
0x335b
// 0.206562
0x329c
// 0.123441
0x2fe6
// 0.128610
0x301e
// 0.091236
0x2dd7
// 0.516631
0x3822
// 0.067355
0x2c50
// 0.393951
0x364e
// 0.429521
0x36df
// 0.841992
0x3abc
// 0.105719
0x2ec4
// 0.153100
0x30e6
// 0.035572
0x288e
// 0.360258
0x35c4
// 0.620837
0x38f7
// 0.232809
0x3373
// 0.117602
0x2f87
// 0.155475
0x30fa
// 0.608119
0x38dd
// 0.199114
0x325f
// 0.021425
0x257c
// 0.126204
0x300a
// 0.503326
0x3807
// 0.081538
0x2d38
// 0.195027
0x323e
// 0.283564
0x3489
// 0.322678
0x352a
// 0.088639
0x2dac
// 0.175722
0x31a0
// 0.038783
0x28f7
// 0.032079
0x281b
// 0.184972
0x31eb
// 0.996148
0x3bf8
// 0.404061
0x3677
// 0.190127
0x3216
// 0.146331
0x30af
// 0.465914
0x3774
// 0.245390
0x33da
// 0.148020
0x30bd
// 0.303123
0x34da
// 0.311373
0x34fb
// 0.006474
0x1ea1
// 0.405257
0x367c
// 0.074887
0x2ccb
// 0.345171
0x3586
// 0.089300
0x2db7
// 0.477313
0x37a3
// 0.168067
0x3161
// 0.001991
0x1814
// 0.102194
0x2e8a
// 0.040519
0x2930
// 0.109390
0x2f00
// 0.206523
0x329c
// 0.433665
0x36f0
// 0.391572
0x3644
// 0.153143
0x30e7
// 0.079262
0x2d13
// 0.098417
0x2e4c
// 0.210884
0x32c0
// 0.326580
0x353a
// 0.066347
0x2c3f
// 0.051448
0x2a96
// 0.003284
0x1aba
// 0.270425
0x3454
// 0.833140
0x3aaa
// 0.083060
0x2d51
// 0.159097
0x3117
// 0.023843
0x261b
// 0.069603
0x2c74
// 0.351582
0x35a0
// 0.410820
0x3693
// 0.053182
0x2acf
// 0.278520
0x3475
// 0.021779
0x2593
// 0.059720
0x2ba5
// 0.105113
0x2eba
// 0.003746
0x1bac
// 0.007042
0x1f36
// 0.785028
0x3a48
// 0.246007
0x33df
// 0.326886
0x353b
// 0.326374
0x3539
// 0.081496
0x2d37
// 0.058682
0x2b83
// 0.793882
0x3a5a
// 0.354455
0x35ac
// 0.145905
0x30ab
// 0.134145
0x304b
// 0.621248
0x38f8
// 0.086260
0x2d85
// 0.122030
0x2fcf
// 0.338851
0x356c
// 0.432520
0x36ec
// 0.103489
0x2ea0
// 0.660566
0x3949
// 0.010646
0x2173
// 0.010899
0x2195
// 0.340560
0x3573
// 0.027039
0x26ec
// 0.594168
0x38c1
// 0.260699
0x342c
// 0.248825
0x33f6
// 0.115116
0x2f5e
// 0.260917
0x342d
// 0.176952
0x31aa
// 0.005986
0x1e21
// 0.094150
0x2e07
// 0.490388
0x37d9
// 0.315517
0x350c
// 0.008217
0x2035
// 0.509586
0x3814
// 0.278656
0x3475
// 0.152630
0x30e2
// 0.439919
0x370a
// 0.161132
0x3128
// 0.793575
0x3a59
// 0.752534
0x3a05
// 0.139024
0x3073
// 0.197104
0x324f
// 0.243939
0x33ce
// 0.068302
0x2c5f
// 0.118063
0x2f8e
// 0.027900
0x2724
// 0.038606
0x28f1
// 0.120798
0x2fbb
// 0.151979
0x30dd
// 0.314126
0x3507
// 0.316658
0x3511
// 0.032271
0x2821
// 0.201590
0x3273
// 0.035311
0x2885
// 0.297552
0x34c3
// 0.074866
0x2ccb
// 0.131284
0x3033
// 0.079725
0x2d1a
// 0.311873
0x34fd
// 0.040439
0x292d
// 0.346817
0x358d
// 0.211014
0x32c1
// 0.086209
0x2d84
// 0.104838
0x2eb6
// 0.060019
0x2baf
// 0.365459
0x35d9
// 0.150804
0x30d3
// 0.087698
0x2d9d
// 0.538126
0x384e
// 0.090029
0x2dc3
// 0.466323
0x3776
// 0.288092
0x349c
// 0.138977
0x3073
// 0.444435
0x371c
// 0.186741
0x31fa
// 0.459590
0x375a
// 0.143353
0x3096
// 0.092331
0x2de9
// 0.247526
0x33ec
// 0.051821
0x2aa2
// 0.068639
0x2c65
// 0.567997
0x388b
// 0.230195
0x335e
// 0.523016
0x382f
// 0.007948
0x2012
// 0.162678
0x3135
// 0.300927
0x34d1
// 0.007069
0x1f3d
// 0.411752
0x3697
// 0.197711
0x3254
// 0.057256
0x2b54
// 0.182220
0x31d5
// 0.270759
0x3455
// 0.174742
0x3197
// 0.355136
0x35af
// 0.056366
0x2b37
// 0.006381
0x1e89
// 0.103926
0x2ea7
// 0.279117
0x3477
// 0.140395
0x307e
// 0.263080
0x3436
// 0.047484
0x2a14
// 0.142805
0x3092
// 0.594794
0x38c2
// 0.038113
0x28e1
// 0.455304
0x3749
// 0.327085
0x353c
// 0.022741
0x25d2
// 0.205277
0x3292
// 0.017041
0x245d
// 0.516604
0x3822
// 0.019929
0x251a
// 0.071015
0x2c8c
// 0.439260
0x3707
// 0.065380
0x2c2f
// 0.268934
0x344e
// 0.260440
0x342b
// 0.283508
0x3489
// 0.411741
0x3696
// 0.090399
0x2dc9
// 0.173062
0x318a
// 0.344605
0x3584
// 0.216728
0x32ef
// 0.351305
0x359f
// 0.118037
0x2f8e
// 0.180886
0x31ca
// 0.245716
0x33dd
// 0.231932
0x336c
// 0.414781
0x36a3
// 0.208593
0x32ad
// 0.512164
0x3819
// 0.425380
0x36ce
// 0.446249
0x3724
// 0.167936
0x3160
// 0.163364
0x313a
// 0.150380
0x30d0
// 0.058932
0x2b8b
// 1.000000
0x3c00
// 0.199019
0x325e
// 0.179563
0x31bf
// 0.121763
0x2fcb
// 0.401939
0x366e
// 0.508503
0x3811
// 0.296685
0x34bf
// 0.033768
0x2852
// 0.312811
0x3501
// 0.679889
0x3970
// 0.112047
0x2f2c
// 0.332766
0x3553
// 0.330595
0x354a
// 0.230368
0x335f
// 0.527939
0x3839
// 0.280736
0x347e
// 0.309292
0x34f3
// 0.613890
0x38e9
// 0.358239
0x35bb
// 0.121421
0x2fc5
// 0.113523
0x2f44
// 0.027774
0x271c
// 0.078329
0x2d03
// 0.167647
0x315d
// 0.598118
0x38c9
// 0.370506
0x35ee
// 0.060933
0x2bcd

@ -0,0 +1,48 @@
H
23
// 0.000000
0x0
// 0.707107
0x39a8
// 1.000000
0x3c00
// 0.707107
0x39a8
// 0.000000
0x0
// -0.707107
0xb9a8
// -1.000000
0xbc00
// -0.000001
0x8011
// -0.707107
0xb9a8
// -1.000000
0xbc00
// -0.707107
0xb9a8
// -0.000000
0x8000
// 0.707107
0x39a8
// 1.000000
0x3c00
// -0.000001
0x8011
// -0.000000
0x8000
// 0.707107
0x39a8
// 1.000000
0x3c00
// 0.707107
0x39a8
// 0.000000
0x0
// -0.707107
0xb9a8
// -1.000000
0xbc00
// -0.000001
0x8011

@ -0,0 +1,18 @@
H
8
// 0.000000
0x0
// 0.000000
0x0
// 0.316228
0x350f
// 1.000000
0x3c00
// 1.414214
0x3da8
// 1.732051
0x3eee
// 1.870829
0x3f7c
// 1.897367
0x3f97

@ -0,0 +1,18 @@
H
8
// -0.400000
0xb666
// 0.000000
0x0
// 0.100000
0x2e66
// 1.000000
0x3c00
// 2.000000
0x4000
// 3.000000
0x4200
// 3.500000
0x4300
// 3.600000
0x4333

@ -0,0 +1,24 @@
H
11
// 10
0x000A
// 3
0x0003
// 8
0x0008
// 9
0x0009
// 12
0x000C
// 3
0x0003
// 8
0x0008
// 9
0x0009
// 12
0x000C
// 3
0x0003
// 8
0x0008

@ -0,0 +1,24 @@
H
11
// 10
0x000A
// 3
0x0003
// 8
0x0008
// 9
0x0009
// 12
0x000C
// 3
0x0003
// 8
0x0008
// 9
0x0009
// 12
0x000C
// 3
0x0003
// 8
0x0008

@ -0,0 +1,24 @@
H
11
// 10
0x000A
// 3
0x0003
// 8
0x0008
// 9
0x0009
// 12
0x000C
// 3
0x0003
// 8
0x0008
// 9
0x0009
// 12
0x000C
// 3
0x0003
// 8
0x0008

@ -0,0 +1,24 @@
H
11
// 10
0x000A
// 3
0x0003
// 8
0x0008
// 9
0x0009
// 12
0x000C
// 3
0x0003
// 8
0x0008
// 9
0x0009
// 12
0x000C
// 3
0x0003
// 8
0x0008

@ -0,0 +1,152 @@
H
75
// 0.322836
0x352a
// 0.198525
0x325a
// 0.478639
0x37a9
// 0.026143
0x26b1
// 0.098991
0x2e56
// 0.143166
0x3095
// 0.085541
0x2d7a
// 0.068541
0x2c63
// 0.211153
0x32c2
// 0.181133
0x31cc
// 0.185330
0x31ee
// 0.089042
0x2db3
// 0.147745
0x30ba
// 0.095482
0x2e1c
// 0.040729
0x2937
// 0.148711
0x30c2
// 0.132218
0x303b
// 0.141558
0x3088
// 0.075285
0x2cd1
// 0.129230
0x3023
// 0.111640
0x2f25
// 0.120751
0x2fba
// 0.013239
0x22c7
// 0.060065
0x2bb0
// 0.127400
0x3014
// 0.094640
0x2e0f
// 0.106302
0x2ece
// 0.047992
0x2a25
// 0.051468
0x2a97
// 0.125120
0x3001
// 0.120732
0x2fba
// 0.020651
0x2549
// 0.144559
0x30a0
// 0.511255
0x3817
// 0.344187
0x3582
// 0.115065
0x2f5d
// 0.068302
0x2c5f
// 0.205347
0x3292
// 0.160452
0x3122
// 0.065289
0x2c2e
// 0.127455
0x3014
// 0.248741
0x33f6
// 0.009349
0x20c9
// 0.100586
0x2e70
// 0.102405
0x2e8e
// 0.188184
0x3206
// 0.192786
0x322b
// 0.160000
0x311f
// 0.027773
0x271c
// 0.112430
0x2f32
// 0.001022
0x142f
// 0.114814
0x2f59
// 0.056143
0x2b30
// 0.122610
0x2fd9
// 0.165753
0x314e
// 0.135443
0x3056
// 0.019103
0x24e4
// 0.028083
0x2730
// 0.013344
0x22d5
// 0.165378
0x314b
// 0.075133
0x2ccf
// 0.056506
0x2b3c
// 0.005057
0x1d2e
// 0.157446
0x310a
// 0.426391
0x36d2
// 0.165223
0x314a
// 0.408385
0x3689
// 0.017004
0x245a
// 0.296335
0x34be
// 0.005781
0x1deb
// 0.076763
0x2cea
// 0.223549
0x3327
// 0.055634
0x2b1f
// 0.222075
0x331b
// 0.102857
0x2e95

@ -0,0 +1,152 @@
H
75
// 0.475515
0x379c
// 0.270052
0x3452
// 0.254433
0x3412
// 0.055055
0x2b0c
// 0.202411
0x327a
// 0.124266
0x2ff4
// 0.046322
0x29ee
// 0.178920
0x31ba
// 0.078939
0x2d0d
// 0.111714
0x2f26
// 0.202373
0x327a
// 0.187127
0x31fd
// 0.154401
0x30f1
// 0.054258
0x2af2
// 0.123123
0x2fe1
// 0.099707
0x2e62
// 0.011955
0x221f
// 0.166787
0x3156
// 0.088390
0x2da8
// 0.114251
0x2f50
// 0.070847
0x2c89
// 0.118218
0x2f91
// 0.057549
0x2b5e
// 0.143263
0x3096
// 0.031234
0x27ff
// 0.169478
0x316c
// 0.045658
0x29d8
// 0.170969
0x3179
// 0.011664
0x21f9
// 0.043150
0x2986
// 0.056999
0x2b4c
// 0.080973
0x2d2f
// 0.301702
0x34d4
// 0.375561
0x3602
// 0.322737
0x352a
// 0.069801
0x2c78
// 0.234393
0x3380
// 0.188541
0x3209
// 0.233770
0x337b
// 0.089282
0x2db7
// 0.019572
0x2503
// 0.075178
0x2cd0
// 0.089462
0x2dba
// 0.116452
0x2f74
// 0.111320
0x2f20
// 0.215334
0x32e4
// 0.119770
0x2faa
// 0.058668
0x2b82
// 0.204849
0x328e
// 0.009485
0x20db
// 0.158639
0x3114
// 0.005484
0x1d9e
// 0.137894
0x306a
// 0.154182
0x30ef
// 0.008037
0x201d
// 0.040326
0x2929
// 0.038281
0x28e6
// 0.082787
0x2d4c
// 0.046186
0x29e9
// 0.118893
0x2f9c
// 0.034195
0x2860
// 0.090605
0x2dcc
// 0.156341
0x3101
// 0.092273
0x2de8
// 0.578286
0x38a0
// 0.049254
0x2a4e
// 0.372461
0x35f6
// 0.021948
0x259e
// 0.059545
0x2b9f
// 0.138834
0x3071
// 0.220685
0x3310
// 0.062460
0x2bff
// 0.201480
0x3273
// 0.087765
0x2d9e
// 0.207282
0x32a2

@ -0,0 +1,152 @@
H
75
// 0.066000
0x2c39
// 0.029316
0x2781
// 0.904684
0x3b3d
// 0.055167
0x2b10
// 0.210490
0x32bc
// 0.148163
0x30be
// 0.189833
0x3213
// 0.038896
0x28fb
// 0.138513
0x306f
// 0.022802
0x25d6
// 0.196137
0x3247
// 0.076794
0x2cea
// 0.010108
0x212d
// 0.220958
0x3312
// 0.129726
0x3027
// 0.099061
0x2e57
// 0.136545
0x305f
// 0.232233
0x336e
// 0.014392
0x235e
// 0.080182
0x2d22
// 0.004332
0x1c70
// 0.083574
0x2d59
// 0.080585
0x2d28
// 0.117765
0x2f89
// 0.162459
0x3133
// 0.019641
0x2507
// 0.075399
0x2cd3
// 0.144758
0x30a2
// 0.064852
0x2c27
// 0.130529
0x302d
// 0.026411
0x26c3
// 0.089694
0x2dbe
// 0.345746
0x3588
// 0.286879
0x3497
// 0.367375
0x35e1
// 0.099391
0x2e5c
// 0.081898
0x2d3e
// 0.075065
0x2cce
// 0.179149
0x31bc
// 0.135742
0x3058
// 0.071606
0x2c95
// 0.174743
0x3197
// 0.182405
0x31d6
// 0.067165
0x2c4c
// 0.071465
0x2c93
// 0.128327
0x301b
// 0.219746
0x3308
// 0.104638
0x2eb2
// 0.145188
0x30a5
// 0.010493
0x215f
// 0.166402
0x3153
// 0.086577
0x2d8a
// 0.134145
0x304b
// 0.007296
0x1f79
// 0.070782
0x2c88
// 0.104541
0x2eb1
// 0.119510
0x2fa6
// 0.109169
0x2efd
// 0.070861
0x2c89
// 0.052457
0x2ab7
// 0.013363
0x22d7
// 0.067024
0x2c4a
// 0.142142
0x308c
// 0.108710
0x2ef5
// 0.182145
0x31d4
// 0.359778
0x35c2
// 0.458077
0x3754
// 0.166456
0x3154
// 0.192489
0x3229
// 0.161820
0x312e
// 0.183137
0x31dc
// 0.038873
0x28fa
// 0.103665
0x2ea2
// 0.103497
0x2ea0
// 0.050063
0x2a68

@ -0,0 +1,152 @@
H
75
// -0.959212
0xbbac
// -0.670324
0xb95d
// -2.232477
0xc077
// -1.387737
0xbd8d
// -4.329000
0xc454
// -1.806354
0xbf3a
// -2.749885
0xc180
// -1.850150
0xbf67
// -1.610971
0xbe72
// -3.493034
0xc2fc
// -2.050449
0xc01a
// -2.407527
0xc0d1
// -2.544094
0xc117
// -2.448739
0xc0e6
// -1.799126
0xbf32
// -1.731562
0xbeed
// -1.709799
0xbed7
// -4.335314
0xc456
// -2.254958
0xc083
// -2.192057
0xc062
// -1.938937
0xbfc1
// -2.430444
0xc0dc
// -4.417635
0xc46b
// -2.415578
0xc0d5
// -2.538128
0xc114
// -3.305833
0xc29d
// -3.357861
0xc2b7
// -2.316667
0xc0a2
// -2.270840
0xc08b
// -2.387285
0xc0c6
// -2.154695
0xc04f
// -2.140212
0xc048
// -1.657703
0xbea1
// -0.645126
0xb929
// -1.248875
0xbcff
// -1.603551
0xbe6a
// -2.633577
0xc144
// -1.835578
0xbf58
// -2.357818
0xc0b7
// -3.324743
0xc2a6
// -1.648598
0xbe98
// -2.037842
0xc013
// -2.109987
0xc038
// -1.195792
0xbcc8
// -2.659356
0xc152
// -1.980175
0xbfec
// -2.161604
0xc053
// -1.794321
0xbf2d
// -2.246154
0xc07e
// -2.408418
0xc0d1
// -3.953591
0xc3e8
// -6.752190
0xc6c1
// -2.878867
0xc1c2
// -2.157113
0xc050
// -2.660457
0xc152
// -2.162948
0xc053
// -3.034069
0xc211
// -2.095012
0xc031
// -4.019581
0xc405
// -1.844349
0xbf61
// -2.773310
0xc18c
// -2.257637
0xc084
// -4.031211
0xc408
// -2.102431
0xc034
// -1.073346
0xbc4b
// -0.878225
0xbb07
// -1.408060
0xbda2
// -2.406489
0xc0d0
// -1.872983
0xbf7e
// -2.021809
0xc00b
// -1.781448
0xbf20
// -1.769868
0xbf14
// -1.823862
0xbf4c
// -2.119634
0xc03d
// -4.550491
0xc48d

@ -0,0 +1,152 @@
H
75
// 0.320653
0x3521
// 0.199694
0x3264
// 0.479652
0x37ad
// 0.102236
0x2e8b
// 0.105106
0x2eba
// 0.201341
0x3271
// 0.132813
0x3040
// 0.088571
0x2dab
// 0.058607
0x2b80
// 0.249883
0x33ff
// 0.061443
0x2bdd
// 0.071180
0x2c8e
// 0.107576
0x2ee3
// 0.057274
0x2b55
// 0.105606
0x2ec2
// 0.152027
0x30dd
// 0.103325
0x2e9d
// 0.049270
0x2a4e
// 0.220453
0x330e
// 0.133290
0x3044
// 0.105547
0x2ec1
// 0.084060
0x2d61
// 0.137416
0x3066
// 0.030274
0x27c0
// 0.048675
0x2a3b
// 0.127130
0x3011
// 0.046091
0x29e6
// 0.066165
0x2c3c
// 0.067586
0x2c53
// 0.097630
0x2e40
// 0.072033
0x2c9c
// 0.117393
0x2f83
// 0.453053
0x3740
// 0.081985
0x2d3f
// 0.464962
0x3770
// 0.052770
0x2ac1
// 0.131843
0x3038
// 0.057721
0x2b63
// 0.152657
0x30e3
// 0.210732
0x32be
// 0.038870
0x28fa
// 0.265057
0x343e
// 0.090349
0x2dc8
// 0.153684
0x30eb
// 0.057884
0x2b69
// 0.057020
0x2b4c
// 0.071140
0x2c8e
// 0.261764
0x3430
// 0.015038
0x23b3
// 0.081591
0x2d39
// 0.262766
0x3434
// 0.039113
0x2902
// 0.156950
0x3106
// 0.095202
0x2e18
// 0.025249
0x2677
// 0.037328
0x28c7
// 0.083821
0x2d5d
// 0.104429
0x2eaf
// 0.087592
0x2d9b
// 0.114188
0x2f4f
// 0.056203
0x2b32
// 0.004401
0x1c82
// 0.095480
0x2e1c
// 0.139157
0x3074
// 0.294734
0x34b7
// 0.268507
0x344c
// 0.436759
0x36fd
// 0.009448
0x20d6
// 0.081745
0x2d3b
// 0.168340
0x3163
// 0.148550
0x30c1
// 0.136539
0x305f
// 0.036541
0x28ad
// 0.253112
0x340d
// 0.165724
0x314e

@ -0,0 +1,152 @@
H
75
// -0.550708
0xb868
// -1.086194
0xbc58
// -2.430874
0xc0dd
// -2.189178
0xc061
// -1.915799
0xbfaa
// -1.850925
0xbf67
// -1.791474
0xbf2a
// -1.795864
0xbf2f
// -1.805296
0xbf39
// -3.914507
0xc3d4
// -2.609097
0xc138
// -2.791088
0xc195
// -1.657411
0xbea1
// -3.818442
0xc3a3
// -2.085128
0xc02c
// -1.898179
0xbf98
// -5.553102
0xc58e
// -1.827261
0xbf4f
// -2.342852
0xc0b0
// -1.613762
0xbe74
// -2.609171
0xc138
// -1.941609
0xbfc4
// -3.429489
0xc2dc
// -3.528022
0xc30e
// -2.070397
0xc024
// -1.926029
0xbfb4
// -3.317714
0xc2a3
// -2.238273
0xc07a
// -1.737780
0xbef3
// -3.121640
0xc23e
// -4.527533
0xc487
// -2.446193
0xc0e4
// -2.637576
0xc146
// -1.176571
0xbcb5
// -0.474604
0xb798
// -3.033151
0xc211
// -1.835852
0xbf58
// -1.691092
0xbec4
// -1.805705
0xbf39
// -2.648123
0xc14c
// -2.160886
0xc052
// -2.461351
0xc0ec
// -1.718385
0xbee0
// -3.930567
0xc3dc
// -2.625716
0xc140
// -1.569582
0xbe47
// -1.492665
0xbdf8
// -2.078453
0xc028
// -2.391468
0xc0c8
// -3.195368
0xc264
// -2.475005
0xc0f3
// -1.956577
0xbfd4
// -2.960791
0xc1ec
// -2.717495
0xc16f
// -2.590550
0xc12e
// -2.850811
0xc1b4
// -2.901796
0xc1ce
// -2.290420
0xc095
// -2.218488
0xc070
// -2.282453
0xc091
// -2.319957
0xc0a4
// -2.432125
0xc0dd
// -2.239578
0xc07b
// -2.294428
0xc097
// -0.590827
0xb8ba
// -1.218422
0xbce0
// -1.881018
0xbf86
// -1.877314
0xbf82
// -2.526120
0xc10d
// -1.792010
0xbf2b
// -1.845023
0xbf61
// -1.752139
0xbf02
// -2.295159
0xc097
// -3.509155
0xc305
// -1.928857
0xbfb7

@ -0,0 +1,22 @@
H
10
// 1.038650
0x3c28
// 1.946771
0x3fc9
// 2.139348
0x4047
// 2.343983
0x40b0
// 0.989680
0x3beb
// 1.881268
0x3f86
// 1.990067
0x3ff6
// 2.193480
0x4063
// 1.026660
0x3c1b
// 1.720281
0x3ee2

@ -0,0 +1,22 @@
H
10
// 0.413470
0x369e
// 0.394708
0x3651
// 0.582607
0x38a9
// 0.396809
0x3659
// 0.179323
0x31bd
// 0.111712
0x2f26
// 0.515907
0x3821
// 0.325282
0x3534
// 0.039444
0x290c
// 0.581119
0x38a6

@ -0,0 +1,22 @@
H
10
// 1.437131
0x3dc0
// 2.206211
0x406a
// 2.309694
0x409f
// 2.569599
0x4124
// 1.432431
0x3dbb
// 2.207405
0x406a
// 2.310898
0x409f
// 2.569412
0x4124
// 1.455066
0x3dd2
// 2.207080
0x406a

@ -0,0 +1,22 @@
H
10
// -0.908800
0xbb45
// -1.999736
0xc000
// -2.257576
0xc084
// -2.442263
0xc0e2
// -1.038944
0xbc28
// -2.090152
0xc02e
// -2.287087
0xc093
// -2.463247
0xc0ed
// -1.051243
0xbc34
// -2.074013
0xc026

@ -0,0 +1,243 @@
#include "FastMathF16.h"
#include <stdio.h>
#include "Error.h"
#include "arm_vec_math_f16.h"
#include "Test.h"
#define SNR_THRESHOLD 60
#define SNR_LOG_THRESHOLD 40
/*
Reference patterns are generated with
a double precision computation.
*/
#define REL_ERROR (1.0e-3)
#define ABS_ERROR (1.0e-3)
#define REL_LOG_ERROR (3.0e-2)
#define ABS_LOG_ERROR (3.0e-2)
#if 0
void FastMathF16::test_cos_f16()
{
const float16_t *inp = input.ptr();
float16_t *outp = output.ptr();
unsigned long i;
for(i=0; i < ref.nbSamples(); i++)
{
outp[i]=arm_cos_f16(inp[i]);
}
ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD);
ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR);
}
void FastMathF16::test_sin_f16()
{
const float16_t *inp = input.ptr();
float16_t *outp = output.ptr();
unsigned long i;
for(i=0; i < ref.nbSamples(); i++)
{
outp[i]=arm_sin_f16(inp[i]);
}
ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD);
ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR);
}
#endif
void FastMathF16::test_sqrt_f16()
{
const float16_t *inp = input.ptr();
float16_t *outp = output.ptr();
arm_status status;
unsigned long i;
for(i=0; i < ref.nbSamples(); i++)
{
status=arm_sqrt_f16(inp[i],&outp[i]);
ASSERT_TRUE((status == ARM_MATH_SUCCESS) || ((inp[i] < 0.0f) && (status == ARM_MATH_ARGUMENT_ERROR)));
}
ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD);
ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR);
}
void FastMathF16::test_vlog_f16()
{
const float16_t *inp = input.ptr();
float16_t *outp = output.ptr();
arm_vlog_f16(inp,outp,ref.nbSamples());
ASSERT_SNR(ref,output,(float16_t)SNR_LOG_THRESHOLD);
ASSERT_CLOSE_ERROR(ref,output,ABS_LOG_ERROR,REL_LOG_ERROR);
ASSERT_EMPTY_TAIL(output);
}
void FastMathF16::test_vexp_f16()
{
const float16_t *inp = input.ptr();
float16_t *outp = output.ptr();
arm_vexp_f16(inp,outp,ref.nbSamples());
ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR);
ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD);
ASSERT_EMPTY_TAIL(output);
}
void FastMathF16::test_inverse_f16()
{
const float16_t *inp = input.ptr();
float16_t *outp = output.ptr();
arm_vinverse_f16(inp,outp,ref.nbSamples());
ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR);
ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD);
ASSERT_EMPTY_TAIL(output);
}
void FastMathF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
{
(void)paramsArgs;
switch(id)
{
#if 0
case FastMathF16::TEST_COS_F16_1:
{
input.reload(FastMathF16::ANGLES1_F16_ID,mgr);
ref.reload(FastMathF16::COS1_F16_ID,mgr);
output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
}
break;
case FastMathF16::TEST_SIN_F16_2:
{
input.reload(FastMathF16::ANGLES1_F16_ID,mgr);
ref.reload(FastMathF16::SIN1_F16_ID,mgr);
output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
}
break;
#endif
case FastMathF16::TEST_SQRT_F16_3:
{
input.reload(FastMathF16::SQRTINPUT1_F16_ID,mgr);
ref.reload(FastMathF16::SQRT1_F16_ID,mgr);
output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
}
break;
case FastMathF16::TEST_VLOG_F16_4:
{
input.reload(FastMathF16::LOGINPUT1_F16_ID,mgr);
ref.reload(FastMathF16::LOG1_F16_ID,mgr);
output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
}
break;
case FastMathF16::TEST_VLOG_F16_5:
{
input.reload(FastMathF16::LOGINPUT1_F16_ID,mgr,7);
ref.reload(FastMathF16::LOG1_F16_ID,mgr,7);
output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
}
break;
case FastMathF16::TEST_VLOG_F16_6:
{
input.reload(FastMathF16::LOGINPUT1_F16_ID,mgr,16);
ref.reload(FastMathF16::LOG1_F16_ID,mgr,16);
output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
}
break;
case FastMathF16::TEST_VLOG_F16_7:
{
input.reload(FastMathF16::LOGINPUT1_F16_ID,mgr,23);
ref.reload(FastMathF16::LOG1_F16_ID,mgr,23);
output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
}
break;
case FastMathF16::TEST_VEXP_F16_8:
{
input.reload(FastMathF16::EXPINPUT1_F16_ID,mgr);
ref.reload(FastMathF16::EXP1_F16_ID,mgr);
output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
}
break;
case FastMathF16::TEST_VEXP_F16_9:
{
input.reload(FastMathF16::EXPINPUT1_F16_ID,mgr,7);
ref.reload(FastMathF16::EXP1_F16_ID,mgr,7);
output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
}
break;
case FastMathF16::TEST_VEXP_F16_10:
{
input.reload(FastMathF16::EXPINPUT1_F16_ID,mgr,16);
ref.reload(FastMathF16::EXP1_F16_ID,mgr,16);
output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
}
break;
case FastMathF16::TEST_VEXP_F16_11:
{
input.reload(FastMathF16::EXPINPUT1_F16_ID,mgr,23);
ref.reload(FastMathF16::EXP1_F16_ID,mgr,23);
output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
}
break;
case FastMathF16::TEST_INVERSE_F16_12:
{
input.reload(FastMathF16::INPUT1_F16_ID,mgr);
ref.reload(FastMathF16::INVERSE1_F16_ID,mgr);
output.create(ref.nbSamples(),FastMathF16::OUT_F16_ID,mgr);
}
break;
}
}
void FastMathF16::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)
{
(void)id;
output.dump(mgr);
}

@ -4,14 +4,18 @@
#include "Test.h"
#define SNR_THRESHOLD 50
#define SNR_THRESHOLD 48
#define SNR_KULLBACK_THRESHOLD 40
/*
Reference patterns are generated with
a double precision computation.
*/
#define REL_ERROR (3.0e-3)
#define REL_ERROR (6.0e-3)
#define REL_KULLBACK_ERROR (5.0e-3)
#define ABS_KULLBACK_ERROR (5.0e-3)
void StatsTestsF16::test_max_f16()
{
@ -39,7 +43,7 @@ a double precision computation.
}
#if 0
void StatsTestsF16::test_max_no_idx_f16()
{
const float16_t *inp = inputA.ptr();
@ -59,7 +63,7 @@ a double precision computation.
ASSERT_EQ(result,refp[this->refOffset]);
}
#endif
void StatsTestsF16::test_min_f16()
{
@ -241,7 +245,7 @@ a double precision computation.
}
#if 0
void StatsTestsF16::test_entropy_f16()
{
const float16_t *inp = inputA.ptr();
@ -298,9 +302,9 @@ a double precision computation.
inpB += dimsp[i+1];
}
ASSERT_SNR(ref,output,(float16_t)SNR_THRESHOLD);
ASSERT_SNR(ref,output,(float16_t)SNR_KULLBACK_THRESHOLD);
ASSERT_REL_ERROR(ref,output,REL_ERROR);
ASSERT_CLOSE_ERROR(ref,output,ABS_KULLBACK_ERROR,REL_KULLBACK_ERROR);
}
void StatsTestsF16::test_logsumexp_dot_prod_f16()
@ -325,7 +329,7 @@ a double precision computation.
ASSERT_REL_ERROR(ref,output,REL_ERROR);
}
#endif
void StatsTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
{
@ -595,7 +599,7 @@ a double precision computation.
refOffset = 2;
}
break;
#if 0
case StatsTestsF16::TEST_ENTROPY_F16_22:
{
inputA.reload(StatsTestsF16::INPUT22_F16_ID,mgr);
@ -651,7 +655,7 @@ a double precision computation.
case StatsTestsF16::TEST_MAX_NO_IDX_F16_26:
{
inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,3);
inputA.reload(StatsTestsF16::INPUT1_F16_ID,mgr,7);
ref.reload(StatsTestsF16::MAXVALS_F16_ID,mgr);
@ -684,7 +688,7 @@ a double precision computation.
refOffset = 2;
}
break;
#endif
case TEST_MEAN_F16_29:
inputA.reload(StatsTestsF16::INPUT2_F16_ID,mgr,100);

@ -25,23 +25,23 @@ group Root {
Pattern STDVALS_F16_ID : StdVals6_f16.txt
Pattern VARVALS_F16_ID : VarVals7_f16.txt
//Pattern INPUT22_F16_ID : Input22_f16.txt
//Pattern DIM22_S16_ID : Dims22_s16.txt
//Pattern REF22_ENTROPY_F16_ID : RefEntropy22_f16.txt
//
//Pattern INPUT23_F16_ID : Input23_f16.txt
//Pattern DIM23_S16_ID : Dims23_s16.txt
//Pattern REF23_LOGSUMEXP_F16_ID : RefLogSumExp23_f16.txt
//
//Pattern INPUTA24_F16_ID : InputA24_f16.txt
//Pattern INPUTB24_F16_ID : InputB24_f16.txt
//Pattern DIM24_S16_ID : Dims24_s16.txt
//Pattern REF24_KL_F16_ID : RefKL24_f16.txt
//
//Pattern INPUTA25_F16_ID : InputA25_f16.txt
//Pattern INPUTB25_F16_ID : InputB25_f16.txt
//Pattern DIM25_S16_ID : Dims25_s16.txt
//Pattern REF25_LOGSUMEXP_DOT_F16_ID : RefLogSumExpDot25_f16.txt
Pattern INPUT22_F16_ID : Input22_f16.txt
Pattern DIM22_S16_ID : Dims22_s16.txt
Pattern REF22_ENTROPY_F16_ID : RefEntropy22_f16.txt
Pattern INPUT23_F16_ID : Input23_f16.txt
Pattern DIM23_S16_ID : Dims23_s16.txt
Pattern REF23_LOGSUMEXP_F16_ID : RefLogSumExp23_f16.txt
Pattern INPUTA24_F16_ID : InputA24_f16.txt
Pattern INPUTB24_F16_ID : InputB24_f16.txt
Pattern DIM24_S16_ID : Dims24_s16.txt
Pattern REF24_KL_F16_ID : RefKL24_f16.txt
Pattern INPUTA25_F16_ID : InputA25_f16.txt
Pattern INPUTB25_F16_ID : InputB25_f16.txt
Pattern DIM25_S16_ID : Dims25_s16.txt
Pattern REF25_LOGSUMEXP_DOT_F16_ID : RefLogSumExpDot25_f16.txt
Output OUT_F16_ID : Output
Output OUT_S16_ID : Index
@ -76,14 +76,14 @@ group Root {
Test nb=8n arm_var_f16:test_var_f16
Test nb=8n+1 arm_var_f16:test_var_f16
disabled{arm_entropy_f16:test_entropy_f16}
disabled{arm_logsumexp_f16:test_logsumexp_f16}
disabled{arm_kullback_leibler_f16:test_kullback_leibler_f16}
disabled{arm_logsumexp_dot_prod_f16:test_logsumexp_dot_prod_f16}
arm_entropy_f16:test_entropy_f16
arm_logsumexp_f16:test_logsumexp_f16
arm_kullback_leibler_f16:test_kullback_leibler_f16
arm_logsumexp_dot_prod_f16:test_logsumexp_dot_prod_f16
disabled{Test nb=7 arm_max_no_idx_f16:test_max_no_idx_f16}
disabled{Test nb=8n arm_max_no_idx_f16:test_max_no_idx_f16}
disabled{Test nb=8n+1 arm_max_no_idx_f16:test_max_no_idx_f16}
Test nb=7 arm_max_no_idx_f16:test_max_no_idx_f16
Test nb=8n arm_max_no_idx_f16:test_max_no_idx_f16
Test nb=8n+1 arm_max_no_idx_f16:test_max_no_idx_f16
Test long arm_mean_f16:test_mean_f16
Test long arm_rms_f16:test_rms_f16
@ -342,6 +342,46 @@ group Root {
}
group Fast Maths Tests {
class = FastMath
folder = FastMath
suite Fast Maths F16 {
class = FastMathF16
folder = FastMathF16
Pattern ANGLES1_F16_ID : Angles1_f16.txt
Pattern SQRTINPUT1_F16_ID : SqrtInput1_f16.txt
Pattern LOGINPUT1_F16_ID : LogInput1_f16.txt
Pattern EXPINPUT1_F16_ID : ExpInput1_f16.txt
Pattern INPUT1_F16_ID : Samples1_f16.txt
Pattern COS1_F16_ID : Cos1_f16.txt
Pattern SIN1_F16_ID : Sin1_f16.txt
Pattern SQRT1_F16_ID : Sqrt1_f16.txt
Pattern LOG1_F16_ID : Log1_f16.txt
Pattern EXP1_F16_ID : Exp1_f16.txt
Pattern INVERSE1_F16_ID : Inverse1_f16.txt
Output OUT_F16_ID : Output
Functions {
disabled{test_cos_f16:test_cos_f16}
disabled{test_sin_f16:test_sin_f16}
test_sqrt_f16:test_sqrt_f16
test_vlog_f16:test_vlog_f16
test_vlog_f16 nb=3:test_vlog_f16
test_vlog_f16 nb=4n:test_vlog_f16
test_vlog_f16 nb=4n+1:test_vlog_f16
test_vexp_f16:test_vexp_f16
test_vexp_f16 nb=3:test_vexp_f16
test_vexp_f16 nb=4n:test_vexp_f16
test_vexp_f16 nb=4n+1:test_vexp_f16
test_inverse_f16:test_inverse_f16
}
}
}
group Filtering Tests {
class = FilteringTests
folder = Filtering

Loading…
Cancel
Save