From f4a19826afbdf0f76d07cd497b06a2e2c7f5829b Mon Sep 17 00:00:00 2001
From: Silfurion <nathan@lesbottet.net>
Date: Thu, 11 Aug 2022 09:50:31 +0200
Subject: [PATCH] Updating missing file 1

---
 ComputeLibrary/Include/NEMath.h           | 99 +++++++++++++++++++++++
 ComputeLibrary/Source/arm_cl_tables.c     | 24 ++++++
 Source/MatrixFunctions/MatrixFunctions.c  |  3 +-
 Source/MatrixFunctions/arm_mat_init_f64.c | 76 +++++++++++++++++
 4 files changed, 201 insertions(+), 1 deletion(-)
 create mode 100644 Source/MatrixFunctions/arm_mat_init_f64.c

diff --git a/ComputeLibrary/Include/NEMath.h b/ComputeLibrary/Include/NEMath.h
index 29838bdf..30b9fc2e 100755
--- a/ComputeLibrary/Include/NEMath.h
+++ b/ComputeLibrary/Include/NEMath.h
@@ -26,6 +26,40 @@
 
 
 #if defined(ARM_MATH_NEON)
+
+
+/** Calculate logarithm
+ *
+ * @param[in] x Input vector value in F32 format.
+ *
+ * @return The calculated logarithm.
+ */
+static inline float64x2_t vlogq_f64(float64x2_t x);
+/** Calculate exponential
+ *
+ * @param[in] x Input vector value in F32 format.
+ *
+ * @return The calculated exponent.
+ */
+static inline float64x2_t vexpq_f64(float64x2_t x);
+
+/** Perform a 7th degree polynomial approximation using Estrin's method.
+ *
+ * @param[in] x      Input vector value in F32 format.
+ * @param[in] coeffs Polynomial coefficients table. (array of flattened float32x4_t vectors)
+ *
+ * @return The calculated approximation.
+ */
+static inline float64x2_t vtaylor_polyq_f64(float64x2_t x, const float64_t *coeffs);
+
+/** Calculate reciprocal.
+ *
+ * @param[in] x Input value.
+ *
+ * @return The calculated reciprocal.
+ */
+static inline float64x2_t vinvq_f64(float64x2_t x);
+
 /** Calculate floor of a vector.
  *
  * @param[in] val Input vector value in F32 format.
@@ -182,10 +216,14 @@ static inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n);
 /** Exponent polynomial coefficients */
 extern const float32_t exp_tab[4*8];
 
+extern const float64_t exp_tab_64[2*8];
+
 
 /** Logarithm polynomial coefficients */
 extern const float32_t log_tab[4*8];
 
+extern const float64_t log_tab_64[2*8];
+
 #ifndef DOXYGEN_SKIP_THIS
 inline float32x4_t vfloorq_f32(float32x4_t val)
 {
@@ -231,6 +269,14 @@ inline float32x4_t vinvq_f32(float32x4_t x)
     return recip;
 }
 
+inline float64x2_t vinvq_f64(float64x2_t x)
+{
+    float64x2_t recip = vrecpeq_f64(x);
+    recip             = vmulq_f64(vrecpsq_f64(x, recip), recip);
+    recip             = vmulq_f64(vrecpsq_f64(x, recip), recip);
+    return recip;
+}
+
 inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const float32_t *coeffs)
 {
     float32x4_t A   = vmlaq_f32(vld1q_f32(&coeffs[4*0]), vld1q_f32(&coeffs[4*4]), x);
@@ -243,6 +289,18 @@ inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const float32_t *coeffs)
     return res;
 }
 
+inline float64x2_t vtaylor_polyq_f64(float64x2_t x, const float64_t *coeffs)
+{
+    float64x2_t A   = vmlaq_f64(vld1q_f64(&coeffs[2*0]), vld1q_f64(&coeffs[2*4]), x);
+    float64x2_t B   = vmlaq_f64(vld1q_f64(&coeffs[2*2]), vld1q_f64(&coeffs[2*6]), x);
+    float64x2_t C   = vmlaq_f64(vld1q_f64(&coeffs[2*1]), vld1q_f64(&coeffs[2*5]), x);
+    float64x2_t D   = vmlaq_f64(vld1q_f64(&coeffs[2*3]), vld1q_f64(&coeffs[2*7]), x);
+    float64x2_t x2  = vmulq_f64(x, x);
+    float64x2_t x4  = vmulq_f64(x2, x2);
+    float64x2_t res = vmlaq_f64(vmlaq_f64(A, B, x2), vmlaq_f64(C, D, x2), x4);
+    return res;
+}
+
 inline float32x4_t vexpq_f32(float32x4_t x)
 {
     static const float32_t CONST_LN2[4]          = {0.6931471805f,0.6931471805f,0.6931471805f,0.6931471805f}; // ln(2)
@@ -261,6 +319,28 @@ inline float32x4_t vexpq_f32(float32x4_t x)
     poly = vreinterpretq_f32_s32(vqaddq_s32(vreinterpretq_s32_f32(poly), vqshlq_n_s32(m, 23)));
     poly = vbslq_f32(vcltq_s32(m, vld1q_s32(CONST_NEGATIVE_126)), vld1q_f32(CONST_0), poly);
 
+  
+    return poly;
+}
+
+inline float64x2_t vexpq_f64(float64x2_t x)
+{
+
+    static const float64_t CONST_LN2[2]          = { 0.6931471805f ,0.6931471805f}; // ln(2)
+    static const float64_t CONST_INV_LN2[2]      = {1.4426950408f,1.4426950408f}; // 1/ln(2)
+    static const float64_t CONST_0[2]            = {0.f,0.f};
+    static const int64_t   CONST_NEGATIVE_1022[2] = {-1022,-1022};
+
+    //[-log(2),log(2)]
+    int64x2_t   m   = vcvtq_s64_f64(vmulq_f64(x, vld1q_f64(CONST_INV_LN2)));
+    float64x2_t val = vmlsq_f64(x, vcvtq_f64_s64(m), vld1q_f64(CONST_LN2));
+
+    // Polynomial Approximation
+    float64x2_t poly = vtaylor_polyq_f64(val, exp_tab_64);
+
+    // Reconstruct
+    poly = vreinterpretq_f64_s64(vqaddq_s64(vreinterpretq_s64_f64(poly), vqshlq_n_s64(m,52)));
+    poly = vbslq_f64(vcltq_s64(m, vld1q_s64(CONST_NEGATIVE_1022)), vld1q_f64(CONST_0), poly);
     return poly;
 }
 
@@ -282,6 +362,25 @@ inline float32x4_t vlogq_f32(float32x4_t x)
     return poly;
 }
 
+inline float64x2_t vlogq_f64(float64x2_t x)
+{
+    static const int64_t   CONST_1023[2] = {1023,1023};           // 1023
+    static const float64_t CONST_LN2[2] = {0.6931471805f,0.6931471805f}; // ln(2)
+
+    // Extract exponent
+    int64x2_t   m   = vsubq_s64(vreinterpretq_s64_u64(vshrq_n_u64(vreinterpretq_u64_f64(x), 52)), vld1q_s64(CONST_1023));
+    float64x2_t val = vreinterpretq_f64_s64(vsubq_s64(vreinterpretq_s64_f64(x), vshlq_n_s64(m, 52)));
+
+    // Polynomial Approximation
+    float64x2_t poly = vtaylor_polyq_f64(val, log_tab_64);
+
+    // Reconstruct
+    poly = vmlaq_f64(poly, vcvtq_f64_s64(m), vld1q_f64(CONST_LN2));
+
+    return poly;
+}
+
+
 inline float32x4_t vtanhq_f32(float32x4_t val)
 {
     static const float32_t CONST_1[4]        = {1.f,1.f,1.f,1.f};
diff --git a/ComputeLibrary/Source/arm_cl_tables.c b/ComputeLibrary/Source/arm_cl_tables.c
index bdd39a6b..3f1456d4 100755
--- a/ComputeLibrary/Source/arm_cl_tables.c
+++ b/ComputeLibrary/Source/arm_cl_tables.c
@@ -39,6 +39,18 @@ const float32_t exp_tab[4*8] =
         0.000195780929062f,0.000195780929062f,0.000195780929062f,0.000195780929062f
 };
 
+const float64_t exp_tab_64[2*8] =
+{
+        1.f,1.f,
+        0.0416598916054f,0.0416598916054f,
+        0.500000596046f,0.500000596046f,
+        0.0014122662833f,0.0014122662833f,
+        1.00000011921f,1.00000011921f,
+        0.00833693705499f,0.00833693705499f,
+        0.166665703058f,0.166665703058f,
+        0.000195780929062f,0.000195780929062f
+};
+
 /** Logarithm polynomial coefficients */
 const float32_t log_tab[4*8] =
 {
@@ -52,4 +64,16 @@ const float32_t log_tab[4*8] =
         0.0141278216615f,0.0141278216615f,0.0141278216615f,0.0141278216615f
 };
 
+const float64_t log_tab_64[2*8] =
+{
+        -2.29561495781f,-2.29561495781f,
+        -2.47071170807f,-2.47071170807f,
+        -5.68692588806f,-5.68692588806f,
+        -0.165253549814f,-0.165253549814f,
+        5.17591238022f,5.17591238022f,
+        0.844007015228f,0.844007015228f,
+        4.58445882797f,4.58445882797f,
+        0.0141278216615f,0.0141278216615f
+};
+
 #endif
diff --git a/Source/MatrixFunctions/MatrixFunctions.c b/Source/MatrixFunctions/MatrixFunctions.c
index 7d7302be..9466a1b0 100644
--- a/Source/MatrixFunctions/MatrixFunctions.c
+++ b/Source/MatrixFunctions/MatrixFunctions.c
@@ -32,6 +32,7 @@
 #include "arm_mat_cmplx_mult_f32.c"
 #include "arm_mat_cmplx_mult_q15.c"
 #include "arm_mat_cmplx_mult_q31.c"
+#include "arm_mat_init_f64.c"
 #include "arm_mat_init_f32.c"
 #include "arm_mat_init_q15.c"
 #include "arm_mat_init_q31.c"
@@ -75,4 +76,4 @@
 #include "arm_mat_qr_f32.c"
 #include "arm_mat_qr_f64.c"
 #include "arm_householder_f64.c"
-#include "arm_householder_f32.c"
\ No newline at end of file
+#include "arm_householder_f32.c"
diff --git a/Source/MatrixFunctions/arm_mat_init_f64.c b/Source/MatrixFunctions/arm_mat_init_f64.c
new file mode 100644
index 00000000..5abe302b
--- /dev/null
+++ b/Source/MatrixFunctions/arm_mat_init_f64.c
@@ -0,0 +1,76 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_mat_init_f32.c
+ * Description:  Floating-point matrix initialization
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/matrix_functions.h"
+
+/**
+  @ingroup groupMatrix
+ */
+
+/**
+  @defgroup MatrixInit Matrix Initialization
+ 
+  Initializes the underlying matrix data structure.
+  The functions set the <code>numRows</code>,
+  <code>numCols</code>, and <code>pData</code> fields
+  of the matrix data structure.
+ */
+
+/**
+  @addtogroup MatrixInit
+  @{
+ */
+
+/**
+  @brief         Floating-point matrix initialization.
+  @param[in,out] S         points to an instance of the floating-point matrix structure
+  @param[in]     nRows     number of rows in the matrix
+  @param[in]     nColumns  number of columns in the matrix
+  @param[in]     pData     points to the matrix data array
+  @return        none
+ */
+
+void arm_mat_init_f64(
+  arm_matrix_instance_f64 * S,
+  uint16_t nRows,
+  uint16_t nColumns,
+  float64_t * pData)
+{
+  /* Assign Number of Rows */
+  S->numRows = nRows;
+
+  /* Assign Number of Columns */
+  S->numCols = nColumns;
+
+  /* Assign Data pointer */
+  S->pData = pData;
+}
+
+/**
+  @} end of MatrixInit group
+ */