From f4a19826afbdf0f76d07cd497b06a2e2c7f5829b Mon Sep 17 00:00:00 2001 From: Silfurion Date: Thu, 11 Aug 2022 09:50:31 +0200 Subject: [PATCH] Updating missing file 1 --- ComputeLibrary/Include/NEMath.h | 99 +++++++++++++++++++++++ ComputeLibrary/Source/arm_cl_tables.c | 24 ++++++ Source/MatrixFunctions/MatrixFunctions.c | 3 +- Source/MatrixFunctions/arm_mat_init_f64.c | 76 +++++++++++++++++ 4 files changed, 201 insertions(+), 1 deletion(-) create mode 100644 Source/MatrixFunctions/arm_mat_init_f64.c diff --git a/ComputeLibrary/Include/NEMath.h b/ComputeLibrary/Include/NEMath.h index 29838bdf..30b9fc2e 100755 --- a/ComputeLibrary/Include/NEMath.h +++ b/ComputeLibrary/Include/NEMath.h @@ -26,6 +26,40 @@ #if defined(ARM_MATH_NEON) + + +/** Calculate logarithm + * + * @param[in] x Input vector value in F32 format. + * + * @return The calculated logarithm. + */ +static inline float64x2_t vlogq_f64(float64x2_t x); +/** Calculate exponential + * + * @param[in] x Input vector value in F32 format. + * + * @return The calculated exponent. + */ +static inline float64x2_t vexpq_f64(float64x2_t x); + +/** Perform a 7th degree polynomial approximation using Estrin's method. + * + * @param[in] x Input vector value in F32 format. + * @param[in] coeffs Polynomial coefficients table. (array of flattened float32x4_t vectors) + * + * @return The calculated approximation. + */ +static inline float64x2_t vtaylor_polyq_f64(float64x2_t x, const float64_t *coeffs); + +/** Calculate reciprocal. + * + * @param[in] x Input value. + * + * @return The calculated reciprocal. + */ +static inline float64x2_t vinvq_f64(float64x2_t x); + /** Calculate floor of a vector. * * @param[in] val Input vector value in F32 format. @@ -182,10 +216,14 @@ static inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n); /** Exponent polynomial coefficients */ extern const float32_t exp_tab[4*8]; +extern const float64_t exp_tab_64[2*8]; + /** Logarithm polynomial coefficients */ extern const float32_t log_tab[4*8]; +extern const float64_t log_tab_64[2*8]; + #ifndef DOXYGEN_SKIP_THIS inline float32x4_t vfloorq_f32(float32x4_t val) { @@ -231,6 +269,14 @@ inline float32x4_t vinvq_f32(float32x4_t x) return recip; } +inline float64x2_t vinvq_f64(float64x2_t x) +{ + float64x2_t recip = vrecpeq_f64(x); + recip = vmulq_f64(vrecpsq_f64(x, recip), recip); + recip = vmulq_f64(vrecpsq_f64(x, recip), recip); + return recip; +} + inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const float32_t *coeffs) { float32x4_t A = vmlaq_f32(vld1q_f32(&coeffs[4*0]), vld1q_f32(&coeffs[4*4]), x); @@ -243,6 +289,18 @@ inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const float32_t *coeffs) return res; } +inline float64x2_t vtaylor_polyq_f64(float64x2_t x, const float64_t *coeffs) +{ + float64x2_t A = vmlaq_f64(vld1q_f64(&coeffs[2*0]), vld1q_f64(&coeffs[2*4]), x); + float64x2_t B = vmlaq_f64(vld1q_f64(&coeffs[2*2]), vld1q_f64(&coeffs[2*6]), x); + float64x2_t C = vmlaq_f64(vld1q_f64(&coeffs[2*1]), vld1q_f64(&coeffs[2*5]), x); + float64x2_t D = vmlaq_f64(vld1q_f64(&coeffs[2*3]), vld1q_f64(&coeffs[2*7]), x); + float64x2_t x2 = vmulq_f64(x, x); + float64x2_t x4 = vmulq_f64(x2, x2); + float64x2_t res = vmlaq_f64(vmlaq_f64(A, B, x2), vmlaq_f64(C, D, x2), x4); + return res; +} + inline float32x4_t vexpq_f32(float32x4_t x) { static const float32_t CONST_LN2[4] = {0.6931471805f,0.6931471805f,0.6931471805f,0.6931471805f}; // ln(2) @@ -261,6 +319,28 @@ inline float32x4_t vexpq_f32(float32x4_t x) poly = vreinterpretq_f32_s32(vqaddq_s32(vreinterpretq_s32_f32(poly), vqshlq_n_s32(m, 23))); poly = vbslq_f32(vcltq_s32(m, vld1q_s32(CONST_NEGATIVE_126)), vld1q_f32(CONST_0), poly); + + return poly; +} + +inline float64x2_t vexpq_f64(float64x2_t x) +{ + + static const float64_t CONST_LN2[2] = { 0.6931471805f ,0.6931471805f}; // ln(2) + static const float64_t CONST_INV_LN2[2] = {1.4426950408f,1.4426950408f}; // 1/ln(2) + static const float64_t CONST_0[2] = {0.f,0.f}; + static const int64_t CONST_NEGATIVE_1022[2] = {-1022,-1022}; + + //[-log(2),log(2)] + int64x2_t m = vcvtq_s64_f64(vmulq_f64(x, vld1q_f64(CONST_INV_LN2))); + float64x2_t val = vmlsq_f64(x, vcvtq_f64_s64(m), vld1q_f64(CONST_LN2)); + + // Polynomial Approximation + float64x2_t poly = vtaylor_polyq_f64(val, exp_tab_64); + + // Reconstruct + poly = vreinterpretq_f64_s64(vqaddq_s64(vreinterpretq_s64_f64(poly), vqshlq_n_s64(m,52))); + poly = vbslq_f64(vcltq_s64(m, vld1q_s64(CONST_NEGATIVE_1022)), vld1q_f64(CONST_0), poly); return poly; } @@ -282,6 +362,25 @@ inline float32x4_t vlogq_f32(float32x4_t x) return poly; } +inline float64x2_t vlogq_f64(float64x2_t x) +{ + static const int64_t CONST_1023[2] = {1023,1023}; // 1023 + static const float64_t CONST_LN2[2] = {0.6931471805f,0.6931471805f}; // ln(2) + + // Extract exponent + int64x2_t m = vsubq_s64(vreinterpretq_s64_u64(vshrq_n_u64(vreinterpretq_u64_f64(x), 52)), vld1q_s64(CONST_1023)); + float64x2_t val = vreinterpretq_f64_s64(vsubq_s64(vreinterpretq_s64_f64(x), vshlq_n_s64(m, 52))); + + // Polynomial Approximation + float64x2_t poly = vtaylor_polyq_f64(val, log_tab_64); + + // Reconstruct + poly = vmlaq_f64(poly, vcvtq_f64_s64(m), vld1q_f64(CONST_LN2)); + + return poly; +} + + inline float32x4_t vtanhq_f32(float32x4_t val) { static const float32_t CONST_1[4] = {1.f,1.f,1.f,1.f}; diff --git a/ComputeLibrary/Source/arm_cl_tables.c b/ComputeLibrary/Source/arm_cl_tables.c index bdd39a6b..3f1456d4 100755 --- a/ComputeLibrary/Source/arm_cl_tables.c +++ b/ComputeLibrary/Source/arm_cl_tables.c @@ -39,6 +39,18 @@ const float32_t exp_tab[4*8] = 0.000195780929062f,0.000195780929062f,0.000195780929062f,0.000195780929062f }; +const float64_t exp_tab_64[2*8] = +{ + 1.f,1.f, + 0.0416598916054f,0.0416598916054f, + 0.500000596046f,0.500000596046f, + 0.0014122662833f,0.0014122662833f, + 1.00000011921f,1.00000011921f, + 0.00833693705499f,0.00833693705499f, + 0.166665703058f,0.166665703058f, + 0.000195780929062f,0.000195780929062f +}; + /** Logarithm polynomial coefficients */ const float32_t log_tab[4*8] = { @@ -52,4 +64,16 @@ const float32_t log_tab[4*8] = 0.0141278216615f,0.0141278216615f,0.0141278216615f,0.0141278216615f }; +const float64_t log_tab_64[2*8] = +{ + -2.29561495781f,-2.29561495781f, + -2.47071170807f,-2.47071170807f, + -5.68692588806f,-5.68692588806f, + -0.165253549814f,-0.165253549814f, + 5.17591238022f,5.17591238022f, + 0.844007015228f,0.844007015228f, + 4.58445882797f,4.58445882797f, + 0.0141278216615f,0.0141278216615f +}; + #endif diff --git a/Source/MatrixFunctions/MatrixFunctions.c b/Source/MatrixFunctions/MatrixFunctions.c index 7d7302be..9466a1b0 100644 --- a/Source/MatrixFunctions/MatrixFunctions.c +++ b/Source/MatrixFunctions/MatrixFunctions.c @@ -32,6 +32,7 @@ #include "arm_mat_cmplx_mult_f32.c" #include "arm_mat_cmplx_mult_q15.c" #include "arm_mat_cmplx_mult_q31.c" +#include "arm_mat_init_f64.c" #include "arm_mat_init_f32.c" #include "arm_mat_init_q15.c" #include "arm_mat_init_q31.c" @@ -75,4 +76,4 @@ #include "arm_mat_qr_f32.c" #include "arm_mat_qr_f64.c" #include "arm_householder_f64.c" -#include "arm_householder_f32.c" \ No newline at end of file +#include "arm_householder_f32.c" diff --git a/Source/MatrixFunctions/arm_mat_init_f64.c b/Source/MatrixFunctions/arm_mat_init_f64.c new file mode 100644 index 00000000..5abe302b --- /dev/null +++ b/Source/MatrixFunctions/arm_mat_init_f64.c @@ -0,0 +1,76 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mat_init_f32.c + * Description: Floating-point matrix initialization + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/matrix_functions.h" + +/** + @ingroup groupMatrix + */ + +/** + @defgroup MatrixInit Matrix Initialization + + Initializes the underlying matrix data structure. + The functions set the numRows, + numCols, and pData fields + of the matrix data structure. + */ + +/** + @addtogroup MatrixInit + @{ + */ + +/** + @brief Floating-point matrix initialization. + @param[in,out] S points to an instance of the floating-point matrix structure + @param[in] nRows number of rows in the matrix + @param[in] nColumns number of columns in the matrix + @param[in] pData points to the matrix data array + @return none + */ + +void arm_mat_init_f64( + arm_matrix_instance_f64 * S, + uint16_t nRows, + uint16_t nColumns, + float64_t * pData) +{ + /* Assign Number of Rows */ + S->numRows = nRows; + + /* Assign Number of Columns */ + S->numCols = nColumns; + + /* Assign Data pointer */ + S->pData = pData; +} + +/** + @} end of MatrixInit group + */