Updating missing file 1

pull/39/head
Silfurion 3 years ago
parent e5753198af
commit f4a19826af

@ -26,6 +26,40 @@
#if defined(ARM_MATH_NEON)
/** Calculate logarithm
*
* @param[in] x Input vector value in F32 format.
*
* @return The calculated logarithm.
*/
static inline float64x2_t vlogq_f64(float64x2_t x);
/** Calculate exponential
*
* @param[in] x Input vector value in F32 format.
*
* @return The calculated exponent.
*/
static inline float64x2_t vexpq_f64(float64x2_t x);
/** Perform a 7th degree polynomial approximation using Estrin's method.
*
* @param[in] x Input vector value in F32 format.
* @param[in] coeffs Polynomial coefficients table. (array of flattened float32x4_t vectors)
*
* @return The calculated approximation.
*/
static inline float64x2_t vtaylor_polyq_f64(float64x2_t x, const float64_t *coeffs);
/** Calculate reciprocal.
*
* @param[in] x Input value.
*
* @return The calculated reciprocal.
*/
static inline float64x2_t vinvq_f64(float64x2_t x);
/** Calculate floor of a vector.
*
* @param[in] val Input vector value in F32 format.
@ -182,10 +216,14 @@ static inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n);
/** Exponent polynomial coefficients */
extern const float32_t exp_tab[4*8];
extern const float64_t exp_tab_64[2*8];
/** Logarithm polynomial coefficients */
extern const float32_t log_tab[4*8];
extern const float64_t log_tab_64[2*8];
#ifndef DOXYGEN_SKIP_THIS
inline float32x4_t vfloorq_f32(float32x4_t val)
{
@ -231,6 +269,14 @@ inline float32x4_t vinvq_f32(float32x4_t x)
return recip;
}
inline float64x2_t vinvq_f64(float64x2_t x)
{
float64x2_t recip = vrecpeq_f64(x);
recip = vmulq_f64(vrecpsq_f64(x, recip), recip);
recip = vmulq_f64(vrecpsq_f64(x, recip), recip);
return recip;
}
inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const float32_t *coeffs)
{
float32x4_t A = vmlaq_f32(vld1q_f32(&coeffs[4*0]), vld1q_f32(&coeffs[4*4]), x);
@ -243,6 +289,18 @@ inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const float32_t *coeffs)
return res;
}
inline float64x2_t vtaylor_polyq_f64(float64x2_t x, const float64_t *coeffs)
{
float64x2_t A = vmlaq_f64(vld1q_f64(&coeffs[2*0]), vld1q_f64(&coeffs[2*4]), x);
float64x2_t B = vmlaq_f64(vld1q_f64(&coeffs[2*2]), vld1q_f64(&coeffs[2*6]), x);
float64x2_t C = vmlaq_f64(vld1q_f64(&coeffs[2*1]), vld1q_f64(&coeffs[2*5]), x);
float64x2_t D = vmlaq_f64(vld1q_f64(&coeffs[2*3]), vld1q_f64(&coeffs[2*7]), x);
float64x2_t x2 = vmulq_f64(x, x);
float64x2_t x4 = vmulq_f64(x2, x2);
float64x2_t res = vmlaq_f64(vmlaq_f64(A, B, x2), vmlaq_f64(C, D, x2), x4);
return res;
}
inline float32x4_t vexpq_f32(float32x4_t x)
{
static const float32_t CONST_LN2[4] = {0.6931471805f,0.6931471805f,0.6931471805f,0.6931471805f}; // ln(2)
@ -261,6 +319,28 @@ inline float32x4_t vexpq_f32(float32x4_t x)
poly = vreinterpretq_f32_s32(vqaddq_s32(vreinterpretq_s32_f32(poly), vqshlq_n_s32(m, 23)));
poly = vbslq_f32(vcltq_s32(m, vld1q_s32(CONST_NEGATIVE_126)), vld1q_f32(CONST_0), poly);
return poly;
}
inline float64x2_t vexpq_f64(float64x2_t x)
{
static const float64_t CONST_LN2[2] = { 0.6931471805f ,0.6931471805f}; // ln(2)
static const float64_t CONST_INV_LN2[2] = {1.4426950408f,1.4426950408f}; // 1/ln(2)
static const float64_t CONST_0[2] = {0.f,0.f};
static const int64_t CONST_NEGATIVE_1022[2] = {-1022,-1022};
//[-log(2),log(2)]
int64x2_t m = vcvtq_s64_f64(vmulq_f64(x, vld1q_f64(CONST_INV_LN2)));
float64x2_t val = vmlsq_f64(x, vcvtq_f64_s64(m), vld1q_f64(CONST_LN2));
// Polynomial Approximation
float64x2_t poly = vtaylor_polyq_f64(val, exp_tab_64);
// Reconstruct
poly = vreinterpretq_f64_s64(vqaddq_s64(vreinterpretq_s64_f64(poly), vqshlq_n_s64(m,52)));
poly = vbslq_f64(vcltq_s64(m, vld1q_s64(CONST_NEGATIVE_1022)), vld1q_f64(CONST_0), poly);
return poly;
}
@ -282,6 +362,25 @@ inline float32x4_t vlogq_f32(float32x4_t x)
return poly;
}
inline float64x2_t vlogq_f64(float64x2_t x)
{
static const int64_t CONST_1023[2] = {1023,1023}; // 1023
static const float64_t CONST_LN2[2] = {0.6931471805f,0.6931471805f}; // ln(2)
// Extract exponent
int64x2_t m = vsubq_s64(vreinterpretq_s64_u64(vshrq_n_u64(vreinterpretq_u64_f64(x), 52)), vld1q_s64(CONST_1023));
float64x2_t val = vreinterpretq_f64_s64(vsubq_s64(vreinterpretq_s64_f64(x), vshlq_n_s64(m, 52)));
// Polynomial Approximation
float64x2_t poly = vtaylor_polyq_f64(val, log_tab_64);
// Reconstruct
poly = vmlaq_f64(poly, vcvtq_f64_s64(m), vld1q_f64(CONST_LN2));
return poly;
}
inline float32x4_t vtanhq_f32(float32x4_t val)
{
static const float32_t CONST_1[4] = {1.f,1.f,1.f,1.f};

@ -39,6 +39,18 @@ const float32_t exp_tab[4*8] =
0.000195780929062f,0.000195780929062f,0.000195780929062f,0.000195780929062f
};
const float64_t exp_tab_64[2*8] =
{
1.f,1.f,
0.0416598916054f,0.0416598916054f,
0.500000596046f,0.500000596046f,
0.0014122662833f,0.0014122662833f,
1.00000011921f,1.00000011921f,
0.00833693705499f,0.00833693705499f,
0.166665703058f,0.166665703058f,
0.000195780929062f,0.000195780929062f
};
/** Logarithm polynomial coefficients */
const float32_t log_tab[4*8] =
{
@ -52,4 +64,16 @@ const float32_t log_tab[4*8] =
0.0141278216615f,0.0141278216615f,0.0141278216615f,0.0141278216615f
};
const float64_t log_tab_64[2*8] =
{
-2.29561495781f,-2.29561495781f,
-2.47071170807f,-2.47071170807f,
-5.68692588806f,-5.68692588806f,
-0.165253549814f,-0.165253549814f,
5.17591238022f,5.17591238022f,
0.844007015228f,0.844007015228f,
4.58445882797f,4.58445882797f,
0.0141278216615f,0.0141278216615f
};
#endif

@ -32,6 +32,7 @@
#include "arm_mat_cmplx_mult_f32.c"
#include "arm_mat_cmplx_mult_q15.c"
#include "arm_mat_cmplx_mult_q31.c"
#include "arm_mat_init_f64.c"
#include "arm_mat_init_f32.c"
#include "arm_mat_init_q15.c"
#include "arm_mat_init_q31.c"
@ -75,4 +76,4 @@
#include "arm_mat_qr_f32.c"
#include "arm_mat_qr_f64.c"
#include "arm_householder_f64.c"
#include "arm_householder_f32.c"
#include "arm_householder_f32.c"

@ -0,0 +1,76 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mat_init_f32.c
* Description: Floating-point matrix initialization
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/matrix_functions.h"
/**
@ingroup groupMatrix
*/
/**
@defgroup MatrixInit Matrix Initialization
Initializes the underlying matrix data structure.
The functions set the <code>numRows</code>,
<code>numCols</code>, and <code>pData</code> fields
of the matrix data structure.
*/
/**
@addtogroup MatrixInit
@{
*/
/**
@brief Floating-point matrix initialization.
@param[in,out] S points to an instance of the floating-point matrix structure
@param[in] nRows number of rows in the matrix
@param[in] nColumns number of columns in the matrix
@param[in] pData points to the matrix data array
@return none
*/
void arm_mat_init_f64(
arm_matrix_instance_f64 * S,
uint16_t nRows,
uint16_t nColumns,
float64_t * pData)
{
/* Assign Number of Rows */
S->numRows = nRows;
/* Assign Number of Columns */
S->numCols = nColumns;
/* Assign Data pointer */
S->pData = pData;
}
/**
@} end of MatrixInit group
*/
Loading…
Cancel
Save