diff --git a/ComputeLibrary/Include/NEMath.h b/ComputeLibrary/Include/NEMath.h
index 13ae8c4d..1eae4a0e 100755
--- a/ComputeLibrary/Include/NEMath.h
+++ b/ComputeLibrary/Include/NEMath.h
@@ -69,11 +69,11 @@ static inline float32x4_t vinvq_f32(float32x4_t x);
/** Perform a 7th degree polynomial approximation using Estrin's method.
*
* @param[in] x Input vector value in F32 format.
- * @param[in] coeffs Polynomial coefficients table.
+ * @param[in] coeffs Polynomial coefficients table. (array of flattened float32x4_t vectors)
*
* @return The calculated approximation.
*/
-static inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const float32x4_t *coeffs);
+static inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const float32_t *coeffs);
/** Calculate exponential
*
@@ -180,21 +180,21 @@ static inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n);
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
/** Exponent polynomial coefficients */
-extern const float32x4_t exp_tab[8];
+extern const float32_t exp_tab[4*8];
/** Logarithm polynomial coefficients */
-extern const float32x4_t log_tab[8];
+extern const float32_t log_tab[4*8];
#ifndef DOXYGEN_SKIP_THIS
inline float32x4_t vfloorq_f32(float32x4_t val)
{
- static const float32x4_t CONST_1 = {1.f,1.f,1.f,1.f};
+ static const float32_t CONST_1[4] = {1.f,1.f,1.f,1.f};
const int32x4_t z = vcvtq_s32_f32(val);
const float32x4_t r = vcvtq_f32_s32(z);
- return vbslq_f32(vcgtq_f32(r, val), vsubq_f32(r, CONST_1), r);
+ return vbslq_f32(vcgtq_f32(r, val), vsubq_f32(r, vld1q_f32(CONST_1)), r);
}
inline float32x2_t vinvsqrt_f32(float32x2_t x)
@@ -231,12 +231,12 @@ inline float32x4_t vinvq_f32(float32x4_t x)
return recip;
}
-inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const float32x4_t *coeffs)
+inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const float32_t *coeffs)
{
- float32x4_t A = vmlaq_f32(coeffs[0], coeffs[4], x);
- float32x4_t B = vmlaq_f32(coeffs[2], coeffs[6], x);
- float32x4_t C = vmlaq_f32(coeffs[1], coeffs[5], x);
- float32x4_t D = vmlaq_f32(coeffs[3], coeffs[7], x);
+ float32x4_t A = vmlaq_f32(vld1q_f32(&coeffs[4*0]), vld1q_f32(&coeffs[4*4]), x);
+ float32x4_t B = vmlaq_f32(vld1q_f32(&coeffs[4*2]), vld1q_f32(&coeffs[4*6]), x);
+ float32x4_t C = vmlaq_f32(vld1q_f32(&coeffs[4*1]), vld1q_f32(&coeffs[4*5]), x);
+ float32x4_t D = vmlaq_f32(vld1q_f32(&coeffs[4*3]), vld1q_f32(&coeffs[4*7]), x);
float32x4_t x2 = vmulq_f32(x, x);
float32x4_t x4 = vmulq_f32(x2, x2);
float32x4_t res = vmlaq_f32(vmlaq_f32(A, B, x2), vmlaq_f32(C, D, x2), x4);
@@ -245,54 +245,54 @@ inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const float32x4_t *coeffs)
inline float32x4_t vexpq_f32(float32x4_t x)
{
- static const float32x4_t CONST_LN2 = {0.6931471805f,0.6931471805f,0.6931471805f,0.6931471805f}; // ln(2)
- static const float32x4_t CONST_INV_LN2 = {1.4426950408f,1.4426950408f,1.4426950408f,1.4426950408f}; // 1/ln(2)
- static const float32x4_t CONST_0 = {0.f,0.f,0.f,0.f};
- static const int32x4_t CONST_NEGATIVE_126 = {-126,-126,-126,-126};
+ static const float32_t CONST_LN2[4] = {0.6931471805f,0.6931471805f,0.6931471805f,0.6931471805f}; // ln(2)
+ static const float32_t CONST_INV_LN2[4] = {1.4426950408f,1.4426950408f,1.4426950408f,1.4426950408f}; // 1/ln(2)
+ static const float32_t CONST_0[4] = {0.f,0.f,0.f,0.f};
+ static const int32_t CONST_NEGATIVE_126[4] = {-126,-126,-126,-126};
// Perform range reduction [-log(2),log(2)]
- int32x4_t m = vcvtq_s32_f32(vmulq_f32(x, CONST_INV_LN2));
- float32x4_t val = vmlsq_f32(x, vcvtq_f32_s32(m), CONST_LN2);
+ int32x4_t m = vcvtq_s32_f32(vmulq_f32(x, vld1q_f32(CONST_INV_LN2)));
+ float32x4_t val = vmlsq_f32(x, vcvtq_f32_s32(m), vld1q_f32(CONST_LN2));
// Polynomial Approximation
float32x4_t poly = vtaylor_polyq_f32(val, exp_tab);
// Reconstruct
poly = vreinterpretq_f32_s32(vqaddq_s32(vreinterpretq_s32_f32(poly), vqshlq_n_s32(m, 23)));
- poly = vbslq_f32(vcltq_s32(m, CONST_NEGATIVE_126), CONST_0, poly);
+ poly = vbslq_f32(vcltq_s32(m, vld1q_s32(CONST_NEGATIVE_126)), vld1q_f32(CONST_0), poly);
return poly;
}
inline float32x4_t vlogq_f32(float32x4_t x)
{
- static const int32x4_t CONST_127 = {127,127,127,127}; // 127
- static const float32x4_t CONST_LN2 = {0.6931471805f,0.6931471805f,0.6931471805f,0.6931471805f}; // ln(2)
+ static const int32_t CONST_127[4] = {127,127,127,127}; // 127
+ static const float32_t CONST_LN2[4] = {0.6931471805f,0.6931471805f,0.6931471805f,0.6931471805f}; // ln(2)
// Extract exponent
- int32x4_t m = vsubq_s32(vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_f32(x), 23)), CONST_127);
+ int32x4_t m = vsubq_s32(vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_f32(x), 23)), vld1q_s32(CONST_127));
float32x4_t val = vreinterpretq_f32_s32(vsubq_s32(vreinterpretq_s32_f32(x), vshlq_n_s32(m, 23)));
// Polynomial Approximation
float32x4_t poly = vtaylor_polyq_f32(val, log_tab);
// Reconstruct
- poly = vmlaq_f32(poly, vcvtq_f32_s32(m), CONST_LN2);
+ poly = vmlaq_f32(poly, vcvtq_f32_s32(m), vld1q_f32(CONST_LN2));
return poly;
}
inline float32x4_t vtanhq_f32(float32x4_t val)
{
- static const float32x4_t CONST_1 = {1.f,1.f,1.f,1.f};
- static const float32x4_t CONST_2 = {2.f,2.f,2.f,2.f};
- static const float32x4_t CONST_MIN_TANH = {-10.f,-10.f,-10.f,-10.f};
- static const float32x4_t CONST_MAX_TANH = {10.f,10.f,10.f,10.f};
-
- float32x4_t x = vminq_f32(vmaxq_f32(val, CONST_MIN_TANH), CONST_MAX_TANH);
- float32x4_t exp2x = vexpq_f32(vmulq_f32(CONST_2, x));
- float32x4_t num = vsubq_f32(exp2x, CONST_1);
- float32x4_t den = vaddq_f32(exp2x, CONST_1);
+ static const float32_t CONST_1[4] = {1.f,1.f,1.f,1.f};
+ static const float32_t CONST_2[4] = {2.f,2.f,2.f,2.f};
+ static const float32_t CONST_MIN_TANH[4] = {-10.f,-10.f,-10.f,-10.f};
+ static const float32_t CONST_MAX_TANH[4] = {10.f,10.f,10.f,10.f};
+
+ float32x4_t x = vminq_f32(vmaxq_f32(val, vld1q_f32(CONST_MIN_TANH)), vld1q_f32(CONST_MAX_TANH));
+ float32x4_t exp2x = vexpq_f32(vmulq_f32(vld1q_f32(CONST_2), x));
+ float32x4_t num = vsubq_f32(exp2x, vld1q_f32(CONST_1));
+ float32x4_t den = vaddq_f32(exp2x, vld1q_f32(CONST_1));
float32x4_t tanh = vmulq_f32(num, vinvq_f32(den));
return tanh;
}
@@ -309,12 +309,12 @@ inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n)
#ifndef DOXYGEN_SKIP_THIS
inline float16x8_t vfloorq_f16(float16x8_t val)
{
- static const float16x8_t CONST_1 = {1.f,1.f,1.f,1.f};
+ static const float16_t CONST_1[8] = {1.f,1.f,1.f,1.f,1.f,1.f,1.f,1.f};
const int16x8_t z = vcvtq_s16_f16(val);
const float16x8_t r = vcvtq_f16_s16(z);
- return vbslq_f16(vcgtq_f16(r, val), vsubq_f16(r, CONST_1), r);
+ return vbslq_f16(vcgtq_f16(r, val), vsubq_f16(r, vld1q_f16(CONST_1)), r);
}
inline float16x4_t vinvsqrt_f16(float16x4_t x)
{
@@ -350,15 +350,15 @@ inline float16x8_t vinvq_f16(float16x8_t x)
inline float16x8_t vtanhq_f16(float16x8_t val)
{
- const float16x8_t CONST_1 = {1.f,1.f,1.f,1.f};
- const float16x8_t CONST_2 = {2.f,2.f,2.f,2.f};
- const float16x8_t CONST_MIN_TANH = {-10.f,-10.f,-10.f,-10.f};
- const float16x8_t CONST_MAX_TANH = {10.f,10.f,10.f,10.f};
-
- const float16x8_t x = vminq_f16(vmaxq_f16(val, CONST_MIN_TANH), CONST_MAX_TANH);
- const float16x8_t exp2x = vexpq_f16(vmulq_f16(CONST_2, x));
- const float16x8_t num = vsubq_f16(exp2x, CONST_1);
- const float16x8_t den = vaddq_f16(exp2x, CONST_1);
+ const float16_t CONST_1[8] = {1.f,1.f,1.f,1.f,1.f,1.f,1.f,1.f};
+ const float16_t CONST_2[8] = {2.f,2.f,2.f,2.f,2.f,2.f,2.f,2.f};
+ const float16_t CONST_MIN_TANH[8] = {-10.f,-10.f,-10.f,-10.f,-10.f,-10.f,-10.f,-10.f};
+ const float16_t CONST_MAX_TANH[8] = {10.f,10.f,10.f,10.f,10.f,10.f,10.f,10.f};
+
+ const float16x8_t x = vminq_f16(vmaxq_f16(val, vld1q_f16(CONST_MIN_TANH)), vld1q_f16(CONST_MAX_TANH));
+ const float16x8_t exp2x = vexpq_f16(vmulq_f16(vld1q_f16(CONST_2), x));
+ const float16x8_t num = vsubq_f16(exp2x, vld1q_f16(CONST_1));
+ const float16x8_t den = vaddq_f16(exp2x, vld1q_f16(CONST_1));
const float16x8_t tanh = vmulq_f16(num, vinvq_f16(den));
return tanh;
}
@@ -411,4 +411,4 @@ inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n)
#endif /* DOXYGEN_SKIP_THIS */
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
#endif
-#endif /* __ARM_COMPUTE_NEMATH_H__ */
\ No newline at end of file
+#endif /* __ARM_COMPUTE_NEMATH_H__ */
diff --git a/ComputeLibrary/Source/arm_cl_tables.c b/ComputeLibrary/Source/arm_cl_tables.c
index 53d548b4..bdd39a6b 100755
--- a/ComputeLibrary/Source/arm_cl_tables.c
+++ b/ComputeLibrary/Source/arm_cl_tables.c
@@ -27,29 +27,29 @@
#if defined(ARM_MATH_NEON)
/** Exponent polynomial coefficients */
-const float32x4_t exp_tab[8] =
+const float32_t exp_tab[4*8] =
{
- {1.f,1.f,1.f,1.f},
- {0.0416598916054f,0.0416598916054f,0.0416598916054f,0.0416598916054f},
- {0.500000596046f,0.500000596046f,0.500000596046f,0.500000596046f},
- {0.0014122662833f,0.0014122662833f,0.0014122662833f,0.0014122662833f},
- {1.00000011921f,1.00000011921f,1.00000011921f,1.00000011921f},
- {0.00833693705499f,0.00833693705499f,0.00833693705499f,0.00833693705499f},
- {0.166665703058f,0.166665703058f,0.166665703058f,0.166665703058f},
- {0.000195780929062f,0.000195780929062f,0.000195780929062f,0.000195780929062f}
+ 1.f,1.f,1.f,1.f,
+ 0.0416598916054f,0.0416598916054f,0.0416598916054f,0.0416598916054f,
+ 0.500000596046f,0.500000596046f,0.500000596046f,0.500000596046f,
+ 0.0014122662833f,0.0014122662833f,0.0014122662833f,0.0014122662833f,
+ 1.00000011921f,1.00000011921f,1.00000011921f,1.00000011921f,
+ 0.00833693705499f,0.00833693705499f,0.00833693705499f,0.00833693705499f,
+ 0.166665703058f,0.166665703058f,0.166665703058f,0.166665703058f,
+ 0.000195780929062f,0.000195780929062f,0.000195780929062f,0.000195780929062f
};
/** Logarithm polynomial coefficients */
-const float32x4_t log_tab[8] =
+const float32_t log_tab[4*8] =
{
- {-2.29561495781f,-2.29561495781f,-2.29561495781f,-2.29561495781f},
- {-2.47071170807f,-2.47071170807f,-2.47071170807f,-2.47071170807f},
- {-5.68692588806f,-5.68692588806f,-5.68692588806f,-5.68692588806f},
- {-0.165253549814f,-0.165253549814f,-0.165253549814f,-0.165253549814f},
- {5.17591238022f,5.17591238022f,5.17591238022f,5.17591238022f},
- {0.844007015228f,0.844007015228f,0.844007015228f,0.844007015228f},
- {4.58445882797f,4.58445882797f,4.58445882797f,4.58445882797f},
- {0.0141278216615f,0.0141278216615f,0.0141278216615f,0.0141278216615f},
+ -2.29561495781f,-2.29561495781f,-2.29561495781f,-2.29561495781f,
+ -2.47071170807f,-2.47071170807f,-2.47071170807f,-2.47071170807f,
+ -5.68692588806f,-5.68692588806f,-5.68692588806f,-5.68692588806f,
+ -0.165253549814f,-0.165253549814f,-0.165253549814f,-0.165253549814f,
+ 5.17591238022f,5.17591238022f,5.17591238022f,5.17591238022f,
+ 0.844007015228f,0.844007015228f,0.844007015228f,0.844007015228f,
+ 4.58445882797f,4.58445882797f,4.58445882797f,4.58445882797f,
+ 0.0141278216615f,0.0141278216615f,0.0141278216615f,0.0141278216615f
};
-#endif
\ No newline at end of file
+#endif
diff --git a/Include/arm_math.h b/Include/arm_math.h
index 50fa170a..ffd79a9e 100644
--- a/Include/arm_math.h
+++ b/Include/arm_math.h
@@ -93,8 +93,10 @@
* Toolchain Support
* ------------
*
- * The library has been developed and tested with MDK version 5.14.0.0
- * The library is being tested in GCC and IAR toolchains and updates on this activity will be made available shortly.
+ * The library is now tested on Fast Models building with cmake.
+ * Core M0, M7, A5 are tested.
+ *
+ *
*
* Building the Library
* ------------
@@ -140,6 +142,23 @@
* of some DSP functions. Experimental Neon versions currently do not have better
* performances than the scalar versions.
*
+ * - ARM_MATH_HELIUM:
+ *
+ * It implies the flags ARM_MATH_MVEF and ARM_MATH_MVEI and ARM_MATH_FLOAT16.
+ *
+ * - ARM_MATH_MVEF:
+ *
+ * Select Helium versions of the f32 algorithms.
+ * It implies ARM_MATH_FLOAT16 and ARM_MATH_MVEI.
+ *
+ * - ARM_MATH_MVEI:
+ *
+ * Select Helium versions of the int and fixed point algorithms.
+ *
+ * - ARM_MATH_FLOAT16:
+ *
+ * Float16 implementations of some algorithms (Requires MVE extension).
+ *
*
* CMSIS-DSP in ARM::CMSIS Pack
* -----------------------------
diff --git a/Platforms/FVP/ARMCA5/LinkScripts/GCC/mem_ARMCA5.h b/Platforms/FVP/ARMCA5/LinkScripts/GCC/mem_ARMCA5.h
index 8feba2ba..44a1b313 100644
--- a/Platforms/FVP/ARMCA5/LinkScripts/GCC/mem_ARMCA5.h
+++ b/Platforms/FVP/ARMCA5/LinkScripts/GCC/mem_ARMCA5.h
@@ -44,7 +44,7 @@
//
*----------------------------------------------------------------------------*/
#define __ROM_BASE 0x80000000
-#define __ROM_SIZE 0x00200000
+#define __ROM_SIZE 0x00400000
/*--------------------- RAM Configuration -----------------------------------
// RAM Configuration
@@ -68,7 +68,7 @@
//
//
*----------------------------------------------------------------------------*/
-#define __RAM_BASE 0x80200000
+#define __RAM_BASE 0x80400000
#define __RAM_SIZE 0x00300000
#define __RW_DATA_SIZE 0x00100000
@@ -94,7 +94,7 @@
// TTB Size (in Bytes) <0x0-0xFFFFFFFF:8>
//
*----------------------------------------------------------------------------*/
-#define __TTB_BASE 0x80500000
+#define __TTB_BASE 0x80800000
#define __TTB_SIZE 0x00005000
#endif /* __MEM_ARMCA5_H */
diff --git a/Platforms/FVP/ARMCA5/Startup/AC5/startup_ARMCA5.c b/Platforms/FVP/ARMCA5/Startup/AC5/startup_ARMCA5.c
index 535a2005..84a122b4 100755
--- a/Platforms/FVP/ARMCA5/Startup/AC5/startup_ARMCA5.c
+++ b/Platforms/FVP/ARMCA5/Startup/AC5/startup_ARMCA5.c
@@ -41,8 +41,8 @@
/*----------------------------------------------------------------------------
Internal References
*----------------------------------------------------------------------------*/
-void Vectors (void) __attribute__ ((naked, section("RESET")));
-void Reset_Handler (void) __attribute__ ((naked));
+void Vectors (void) __attribute__ ((section("RESET")));
+void Reset_Handler (void);
/*----------------------------------------------------------------------------
Exception / Interrupt Handler
@@ -59,14 +59,14 @@ void FIQ_Handler (void) __attribute__ ((weak, alias("Default_Handler")));
*----------------------------------------------------------------------------*/
void Vectors(void) {
__ASM volatile(
- "LDR PC, =Reset_Handler \n"
- "LDR PC, =Undef_Handler \n"
- "LDR PC, =SVC_Handler \n"
- "LDR PC, =PAbt_Handler \n"
- "LDR PC, =DAbt_Handler \n"
+ "LDR __current_pc, =Reset_Handler \n"
+ "LDR __current_pc, =Undef_Handler \n"
+ "LDR __current_pc, =SVC_Handler \n"
+ "LDR __current_pc, =PAbt_Handler \n"
+ "LDR __current_pc, =DAbt_Handler \n"
"NOP \n"
- "LDR PC, =IRQ_Handler \n"
- "LDR PC, =FIQ_Handler \n"
+ "LDR __current_pc, =IRQ_Handler \n"
+ "LDR __current_pc, =FIQ_Handler \n"
);
}
diff --git a/Platforms/FVP/ARMCM0/LinkScripts/AC5/lnk.sct b/Platforms/FVP/ARMCM0/LinkScripts/AC5/lnk.sct
index 37072231..18ee8b29 100755
--- a/Platforms/FVP/ARMCM0/LinkScripts/AC5/lnk.sct
+++ b/Platforms/FVP/ARMCM0/LinkScripts/AC5/lnk.sct
@@ -14,7 +14,7 @@
;
*----------------------------------------------------------------------------*/
#define __ROM_BASE 0x00000000
-#define __ROM_SIZE 0x00200000
+#define __ROM_SIZE 0x00300000
/*--------------------- Embedded RAM Configuration ---------------------------
; RAM Configuration
diff --git a/Platforms/FVP/ARMCM0/LinkScripts/GCC/lnk.ld b/Platforms/FVP/ARMCM0/LinkScripts/GCC/lnk.ld
new file mode 100755
index 00000000..d2485ad7
--- /dev/null
+++ b/Platforms/FVP/ARMCM0/LinkScripts/GCC/lnk.ld
@@ -0,0 +1,296 @@
+/******************************************************************************
+ * @file gcc_arm.ld
+ * @brief GNU Linker Script for Cortex-M based device
+ * @version V2.0.0
+ * @date 21. May 2019
+ ******************************************************************************/
+/*
+ * Copyright (c) 2009-2019 Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "mem_ARMCM0.h"
+/*
+ *-------- <<< Use Configuration Wizard in Context Menu >>> -------------------
+ */
+
+/*---------------------- Flash Configuration ----------------------------------
+ Flash Configuration
+ Flash Base Address <0x0-0xFFFFFFFF:8>
+ Flash Size (in Bytes) <0x0-0xFFFFFFFF:8>
+
+ -----------------------------------------------------------------------------*/
+__ROM_BASE = 0x00000000;
+__ROM_SIZE = 0x00400000;
+
+/*--------------------- Embedded RAM Configuration ----------------------------
+ RAM Configuration
+ RAM Base Address <0x0-0xFFFFFFFF:8>
+ RAM Size (in Bytes) <0x0-0xFFFFFFFF:8>
+
+ -----------------------------------------------------------------------------*/
+__RAM_BASE = 0x20000000;
+__RAM_SIZE = 0x00300000;
+
+/*--------------------- Stack / Heap Configuration ----------------------------
+ Stack / Heap Configuration
+ Stack Size (in Bytes) <0x0-0xFFFFFFFF:8>
+ Heap Size (in Bytes) <0x0-0xFFFFFFFF:8>
+
+ -----------------------------------------------------------------------------*/
+__STACK_SIZE = STACK_SIZE;
+__HEAP_SIZE = HEAP_SIZE;
+
+/*
+ *-------------------- <<< end of configuration section >>> -------------------
+ */
+
+MEMORY
+{
+ FLASH (rx) : ORIGIN = __ROM_BASE, LENGTH = __ROM_SIZE
+ RAM (rwx) : ORIGIN = __RAM_BASE, LENGTH = __RAM_SIZE
+}
+
+/* Linker script to place sections and symbol values. Should be used together
+ * with other linker script that defines memory regions FLASH and RAM.
+ * It references following symbols, which must be defined in code:
+ * Reset_Handler : Entry of reset handler
+ *
+ * It defines following symbols, which code can use without definition:
+ * __exidx_start
+ * __exidx_end
+ * __copy_table_start__
+ * __copy_table_end__
+ * __zero_table_start__
+ * __zero_table_end__
+ * __etext
+ * __data_start__
+ * __preinit_array_start
+ * __preinit_array_end
+ * __init_array_start
+ * __init_array_end
+ * __fini_array_start
+ * __fini_array_end
+ * __data_end__
+ * __bss_start__
+ * __bss_end__
+ * __end__
+ * end
+ * __HeapLimit
+ * __StackLimit
+ * __StackTop
+ * __stack
+ */
+ENTRY(Reset_Handler)
+
+SECTIONS
+{
+ .text :
+ {
+ KEEP(*(.vectors))
+ *(.text*)
+
+ KEEP(*(.init))
+ KEEP(*(.fini))
+
+ /* .ctors */
+ *crtbegin.o(.ctors)
+ *crtbegin?.o(.ctors)
+ *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors)
+ *(SORT(.ctors.*))
+ *(.ctors)
+
+ /* .dtors */
+ *crtbegin.o(.dtors)
+ *crtbegin?.o(.dtors)
+ *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors)
+ *(SORT(.dtors.*))
+ *(.dtors)
+
+ *(.rodata*)
+
+ KEEP(*(.eh_frame*))
+ } > FLASH
+
+ /*
+ * SG veneers:
+ * All SG veneers are placed in the special output section .gnu.sgstubs. Its start address
+ * must be set, either with the command line option ‘--section-start’ or in a linker script,
+ * to indicate where to place these veneers in memory.
+ */
+/*
+ .gnu.sgstubs :
+ {
+ . = ALIGN(32);
+ } > FLASH
+*/
+ .ARM.extab :
+ {
+ *(.ARM.extab* .gnu.linkonce.armextab.*)
+ } > FLASH
+
+ __exidx_start = .;
+ .ARM.exidx :
+ {
+ *(.ARM.exidx* .gnu.linkonce.armexidx.*)
+ } > FLASH
+ __exidx_end = .;
+
+ .copy.table :
+ {
+ . = ALIGN(4);
+ __copy_table_start__ = .;
+ LONG (__etext)
+ LONG (__data_start__)
+ LONG (__data_end__ - __data_start__)
+ /* Add each additional data section here */
+/*
+ LONG (__etext2)
+ LONG (__data2_start__)
+ LONG (__data2_end__ - __data2_start__)
+*/
+ __copy_table_end__ = .;
+ } > FLASH
+
+ .zero.table :
+ {
+ . = ALIGN(4);
+ __zero_table_start__ = .;
+ /* Add each additional bss section here */
+/*
+ LONG (__bss2_start__)
+ LONG (__bss2_end__ - __bss2_start__)
+*/
+ __zero_table_end__ = .;
+ } > FLASH
+
+ /**
+ * Location counter can end up 2byte aligned with narrow Thumb code but
+ * __etext is assumed by startup code to be the LMA of a section in RAM
+ * which must be 4byte aligned
+ */
+ __etext = ALIGN (4);
+
+ .data : AT (__etext)
+ {
+ __data_start__ = .;
+ *(vtable)
+ *(.data)
+ *(.data.*)
+
+ . = ALIGN(4);
+ /* preinit data */
+ PROVIDE_HIDDEN (__preinit_array_start = .);
+ KEEP(*(.preinit_array))
+ PROVIDE_HIDDEN (__preinit_array_end = .);
+
+ . = ALIGN(4);
+ /* init data */
+ PROVIDE_HIDDEN (__init_array_start = .);
+ KEEP(*(SORT(.init_array.*)))
+ KEEP(*(.init_array))
+ PROVIDE_HIDDEN (__init_array_end = .);
+
+
+ . = ALIGN(4);
+ /* finit data */
+ PROVIDE_HIDDEN (__fini_array_start = .);
+ KEEP(*(SORT(.fini_array.*)))
+ KEEP(*(.fini_array))
+ PROVIDE_HIDDEN (__fini_array_end = .);
+
+ KEEP(*(.jcr*))
+ . = ALIGN(4);
+ /* All data end */
+ __data_end__ = .;
+
+ } > RAM
+
+ /*
+ * Secondary data section, optional
+ *
+ * Remember to add each additional data section
+ * to the .copy.table above to asure proper
+ * initialization during startup.
+ */
+/*
+ __etext2 = ALIGN (4);
+
+ .data2 : AT (__etext2)
+ {
+ . = ALIGN(4);
+ __data2_start__ = .;
+ *(.data2)
+ *(.data2.*)
+ . = ALIGN(4);
+ __data2_end__ = .;
+
+ } > RAM2
+*/
+
+ .bss :
+ {
+ . = ALIGN(4);
+ __bss_start__ = .;
+ *(.bss)
+ *(.bss.*)
+ *(COMMON)
+ . = ALIGN(4);
+ __bss_end__ = .;
+ } > RAM AT > RAM
+
+ /*
+ * Secondary bss section, optional
+ *
+ * Remember to add each additional bss section
+ * to the .zero.table above to asure proper
+ * initialization during startup.
+ */
+/*
+ .bss2 :
+ {
+ . = ALIGN(4);
+ __bss2_start__ = .;
+ *(.bss2)
+ *(.bss2.*)
+ . = ALIGN(4);
+ __bss2_end__ = .;
+ } > RAM2 AT > RAM2
+*/
+
+ .heap (COPY) :
+ {
+ . = ALIGN(8);
+ __end__ = .;
+ __HeapBase = .;
+ PROVIDE(end = .);
+ . = . + __HEAP_SIZE;
+ . = ALIGN(8);
+ __HeapLimit = .;
+ } > RAM
+
+ .stack (ORIGIN(RAM) + LENGTH(RAM) - __STACK_SIZE) (COPY) :
+ {
+ . = ALIGN(8);
+ __StackLimit = .;
+ . = . + __STACK_SIZE;
+ . = ALIGN(8);
+ __StackTop = .;
+ } > RAM
+ PROVIDE(__stack = __StackTop);
+
+ /* Check if data + heap + stack exceeds RAM limit */
+ ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack")
+}
diff --git a/Platforms/FVP/ARMCM0/LinkScripts/GCC/mem_ARMCM0.h b/Platforms/FVP/ARMCM0/LinkScripts/GCC/mem_ARMCM0.h
new file mode 100755
index 00000000..b25b6182
--- /dev/null
+++ b/Platforms/FVP/ARMCM0/LinkScripts/GCC/mem_ARMCM0.h
@@ -0,0 +1,38 @@
+/**************************************************************************//**
+ * @file mem_ARMCM0.h
+ * @brief Memory base and size definitions (used in scatter file)
+ * @version V1.1.0
+ * @date 15. May 2019
+ *
+ * @note
+ *
+ ******************************************************************************/
+/*
+ * Copyright (c) 2009-2019 Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MEM_ARMCM0_H
+#define __MEM_ARMCM0_H
+
+
+
+#define STACK_SIZE 0x00003000
+#define HEAP_SIZE 0x00100000
+
+
+
+#endif /* __MEM_ARMCM7_H */
diff --git a/Platforms/FVP/ARMCM0/Startup/GCC/startup_ARMCM0.S b/Platforms/FVP/ARMCM0/Startup/GCC/startup_ARMCM0.S
new file mode 100755
index 00000000..7bbd2dd6
--- /dev/null
+++ b/Platforms/FVP/ARMCM0/Startup/GCC/startup_ARMCM0.S
@@ -0,0 +1,179 @@
+/**************************************************************************//**
+ * @file startup_ARMCM0.S
+ * @brief CMSIS-Core(M) Device Startup File for Cortex-M0 Device
+ * @version V2.0.1
+ * @date 23. July 2019
+ ******************************************************************************/
+/*
+ * Copyright (c) 2009-2019 Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ .syntax unified
+ .arch armv6-m
+
+ .section .vectors
+ .align 2
+ .globl __Vectors
+ .globl __Vectors_End
+ .globl __Vectors_Size
+__Vectors:
+ .long __StackTop /* Top of Stack */
+ .long Reset_Handler /* Reset Handler */
+ .long NMI_Handler /* -14 NMI Handler */
+ .long HardFault_Handler /* -13 Hard Fault Handler */
+ .long 0 /* Reserved */
+ .long 0 /* Reserved */
+ .long 0 /* Reserved */
+ .long 0 /* Reserved */
+ .long 0 /* Reserved */
+ .long 0 /* Reserved */
+ .long 0 /* Reserved */
+ .long SVC_Handler /* -5 SVCall Handler */
+ .long 0 /* Reserved */
+ .long 0 /* Reserved */
+ .long PendSV_Handler /* -2 PendSV Handler */
+ .long SysTick_Handler /* -1 SysTick Handler */
+
+ /* Interrupts */
+ .long Interrupt0_Handler /* 0 Interrupt 0 */
+ .long Interrupt1_Handler /* 1 Interrupt 1 */
+ .long Interrupt2_Handler /* 2 Interrupt 2 */
+ .long Interrupt3_Handler /* 3 Interrupt 3 */
+ .long Interrupt4_Handler /* 4 Interrupt 4 */
+ .long Interrupt5_Handler /* 5 Interrupt 5 */
+ .long Interrupt6_Handler /* 6 Interrupt 6 */
+ .long Interrupt7_Handler /* 7 Interrupt 7 */
+ .long Interrupt8_Handler /* 8 Interrupt 8 */
+ .long Interrupt9_Handler /* 9 Interrupt 9 */
+
+ .space ( 22 * 4) /* Interrupts 10 .. 31 are left out */
+__Vectors_End:
+ .equ __Vectors_Size, __Vectors_End - __Vectors
+ .size __Vectors, . - __Vectors
+
+
+ .thumb
+ .section .text
+ .align 2
+
+ .thumb_func
+ .type Reset_Handler, %function
+ .globl Reset_Handler
+ .fnstart
+Reset_Handler:
+ bl SystemInit
+
+ ldr r4, =__copy_table_start__
+ ldr r5, =__copy_table_end__
+
+.L_loop0:
+ cmp r4, r5
+ bge .L_loop0_done
+ ldr r1, [r4]
+ ldr r2, [r4, #4]
+ ldr r3, [r4, #8]
+
+.L_loop0_0:
+ subs r3, #4
+ blt .L_loop0_0_done
+ ldr r0, [r1, r3]
+ str r0, [r2, r3]
+ b .L_loop0_0
+
+.L_loop0_0_done:
+ adds r4, #12
+ b .L_loop0
+
+.L_loop0_done:
+
+ ldr r3, =__zero_table_start__
+ ldr r4, =__zero_table_end__
+
+.L_loop2:
+ cmp r3, r4
+ bge .L_loop2_done
+ ldr r1, [r3]
+ ldr r2, [r3, #4]
+ movs r0, 0
+
+.L_loop2_0:
+ subs r2, #4
+ blt .L_loop2_0_done
+ str r0, [r1, r2]
+ b .L_loop2_0
+.L_loop2_0_done:
+
+ adds r3, #8
+ b .L_loop2
+.L_loop2_done:
+
+ bl _start
+
+ .fnend
+ .size Reset_Handler, . - Reset_Handler
+
+/* The default macro is not used for HardFault_Handler
+ * because this results in a poor debug illusion.
+ */
+ .thumb_func
+ .type HardFault_Handler, %function
+ .weak HardFault_Handler
+ .fnstart
+HardFault_Handler:
+ b .
+ .fnend
+ .size HardFault_Handler, . - HardFault_Handler
+
+ .thumb_func
+ .type Default_Handler, %function
+ .weak Default_Handler
+ .fnstart
+Default_Handler:
+ b .
+ .fnend
+ .size Default_Handler, . - Default_Handler
+
+/* Macro to define default exception/interrupt handlers.
+ * Default handler are weak symbols with an endless loop.
+ * They can be overwritten by real handlers.
+ */
+ .macro Set_Default_Handler Handler_Name
+ .weak \Handler_Name
+ .set \Handler_Name, Default_Handler
+ .endm
+
+
+/* Default exception/interrupt handler */
+
+ Set_Default_Handler NMI_Handler
+ Set_Default_Handler SVC_Handler
+ Set_Default_Handler PendSV_Handler
+ Set_Default_Handler SysTick_Handler
+
+ Set_Default_Handler Interrupt0_Handler
+ Set_Default_Handler Interrupt1_Handler
+ Set_Default_Handler Interrupt2_Handler
+ Set_Default_Handler Interrupt3_Handler
+ Set_Default_Handler Interrupt4_Handler
+ Set_Default_Handler Interrupt5_Handler
+ Set_Default_Handler Interrupt6_Handler
+ Set_Default_Handler Interrupt7_Handler
+ Set_Default_Handler Interrupt8_Handler
+ Set_Default_Handler Interrupt9_Handler
+
+
+ .end
diff --git a/Platforms/FVP/ARMCM0/Startup/GCC/support.c b/Platforms/FVP/ARMCM0/Startup/GCC/support.c
new file mode 100755
index 00000000..740f6b08
--- /dev/null
+++ b/Platforms/FVP/ARMCM0/Startup/GCC/support.c
@@ -0,0 +1,36 @@
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+char * _sbrk(int incr);
+
+void __malloc_lock() ;
+void __malloc_unlock();
+
+char __HeapBase, __HeapLimit; // make sure to define these symbols in linker command file
+#ifdef __cplusplus
+}
+#endif
+
+static int totalBytesProvidedBySBRK = 0;
+
+//! sbrk/_sbrk version supporting reentrant newlib (depends upon above symbols defined by linker control file).
+char * sbrk(int incr) {
+ static char *currentHeapEnd = &__HeapBase;
+ char *previousHeapEnd = currentHeapEnd;
+ if (currentHeapEnd + incr > &__HeapLimit) {
+ return (char *)-1; // the malloc-family routine that called sbrk will return 0
+ }
+ currentHeapEnd += incr;
+
+ totalBytesProvidedBySBRK += incr;
+
+ return (char *) previousHeapEnd;
+}
+//! Synonym for sbrk.
+char * _sbrk(int incr) { return sbrk(incr); };
+
+void __malloc_lock() { };
+void __malloc_unlock() { };
\ No newline at end of file
diff --git a/Platforms/FVP/ARMCM7/LinkScripts/AC5/lnk.sct b/Platforms/FVP/ARMCM7/LinkScripts/AC5/lnk.sct
index bc03375e..60b878c9 100755
--- a/Platforms/FVP/ARMCM7/LinkScripts/AC5/lnk.sct
+++ b/Platforms/FVP/ARMCM7/LinkScripts/AC5/lnk.sct
@@ -14,7 +14,7 @@
;
*----------------------------------------------------------------------------*/
#define __ROM_BASE 0x00000000
-#define __ROM_SIZE 0x00200000
+#define __ROM_SIZE 0x00300000
/*--------------------- Embedded RAM Configuration ---------------------------
; RAM Configuration
diff --git a/Platforms/FVP/ARMCM7/LinkScripts/GCC/lnk.ld b/Platforms/FVP/ARMCM7/LinkScripts/GCC/lnk.ld
index dd8398d9..5b1c56a5 100644
--- a/Platforms/FVP/ARMCM7/LinkScripts/GCC/lnk.ld
+++ b/Platforms/FVP/ARMCM7/LinkScripts/GCC/lnk.ld
@@ -33,7 +33,7 @@
-----------------------------------------------------------------------------*/
__ROM_BASE = 0x00000000;
-__ROM_SIZE = 0x00300000;
+__ROM_SIZE = 0x00400000;
/*--------------------- Embedded RAM Configuration ----------------------------
RAM Configuration
diff --git a/Source/BasicMathFunctions/arm_dot_prod_f32.c b/Source/BasicMathFunctions/arm_dot_prod_f32.c
index ef0e2aa0..2bce15bf 100644
--- a/Source/BasicMathFunctions/arm_dot_prod_f32.c
+++ b/Source/BasicMathFunctions/arm_dot_prod_f32.c
@@ -131,7 +131,8 @@ void arm_dot_prod_f32(
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
f32x4_t vec1;
f32x4_t vec2;
- f32x4_t accum = vdupq_n_f32(0);
+ f32x4_t accum = vdupq_n_f32(0);
+ f32x2_t tmp = vdup_n_f32(0);
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
@@ -160,7 +161,9 @@ void arm_dot_prod_f32(
#if __aarch64__
sum = vpadds_f32(vpadd_f32(vget_low_f32(accum), vget_high_f32(accum)));
#else
- sum = (vpadd_f32(vget_low_f32(accum), vget_high_f32(accum)))[0] + (vpadd_f32(vget_low_f32(accum), vget_high_f32(accum)))[1];
+ tmp = vpadd_f32(vget_low_f32(accum), vget_high_f32(accum));
+ sum = vget_lane_f32(tmp, 0) + vget_lane_f32(tmp, 1);
+
#endif
/* Tail */
diff --git a/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f32.c b/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f32.c
index 243a76e6..27bd793f 100755
--- a/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f32.c
+++ b/Source/BayesFunctions/arm_gaussian_naive_bayes_predict_f32.c
@@ -228,10 +228,10 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins
vecBlkCnt--;
}
tmpV2 = vpadd_f32(vget_low_f32(tmpV),vget_high_f32(tmpV));
- tmp += tmpV2[0] + tmpV2[1];
-
+ tmp += vget_lane_f32(tmpV2, 0) + vget_lane_f32(tmpV2, 1);
+
tmpV2 = vpadd_f32(vget_low_f32(tmpV1),vget_high_f32(tmpV1));
- tmp1 += tmpV2[0] + tmpV2[1];
+ tmp1 += vget_lane_f32(tmpV2, 0) + vget_lane_f32(tmpV2, 1);
vecBlkCnt = S->vectorDimension & 3;
while(vecBlkCnt > 0)
@@ -300,7 +300,7 @@ uint32_t arm_gaussian_naive_bayes_predict_f32(const arm_gaussian_naive_bayes_ins
vecBlkCnt--;
}
tmpV2 = vpadd_f32(vget_low_f32(tmpV),vget_high_f32(tmpV));
- tmp += tmpV2[0] + tmpV2[1];
+ tmp += vget_lane_f32(tmpV2, 0) + vget_lane_f32(tmpV2, 1);
vecBlkCnt = S->vectorDimension & 3;
while(vecBlkCnt > 0)
diff --git a/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c b/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c
index 1f747ae9..4925f02c 100644
--- a/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c
+++ b/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c
@@ -169,7 +169,7 @@ void arm_cmplx_dot_prod_f32(
/* C = (A[0]+jA[1])*(B[0]+jB[1]) + ... */
/* Calculate dot product and then store the result in a temporary buffer. */
- vec1 = vld2q_f32(pSrcA);
+ vec1 = vld2q_f32(pSrcA);
vec2 = vld2q_f32(pSrcB);
/* Increment pointers */
@@ -204,10 +204,10 @@ void arm_cmplx_dot_prod_f32(
}
accum = vpadd_f32(vget_low_f32(accR), vget_high_f32(accR));
- real_sum += accum[0] + accum[1];
+ real_sum += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1);
accum = vpadd_f32(vget_low_f32(accI), vget_high_f32(accI));
- imag_sum += accum[0] + accum[1];
+ imag_sum += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1);
/* Tail */
blkCnt = numSamples & 0x7;
diff --git a/Source/DistanceFunctions/arm_boolean_distance_template.h b/Source/DistanceFunctions/arm_boolean_distance_template.h
index f54adecf..6888e9e0 100755
--- a/Source/DistanceFunctions/arm_boolean_distance_template.h
+++ b/Source/DistanceFunctions/arm_boolean_distance_template.h
@@ -351,16 +351,16 @@ void FUNC(EXT)(const uint32_t *pA
}
#ifdef TT
- _ctt += tmp4tt[0] + tmp4tt[1];
+ _ctt += vgetq_lane_u64(tmp4tt, 0) + vgetq_lane_u64(tmp4tt, 1);
#endif
#ifdef FF
- _cff += tmp4ff[0] + tmp4ff[1];
+ _cff +=vgetq_lane_u64(tmp4ff, 0) + vgetq_lane_u64(tmp4ff, 1);
#endif
#ifdef TF
- _ctf += tmp4tf[0] + tmp4tf[1];
+ _ctf += vgetq_lane_u64(tmp4tf, 0) + vgetq_lane_u64(tmp4tf, 1);
#endif
#ifdef FT
- _cft += tmp4ft[0] + tmp4ft[1];
+ _cft += vgetq_lane_u64(tmp4ft, 0) + vgetq_lane_u64(tmp4ft, 1);
#endif
nbBoolBlock = numberOfBools & 0x7F;
diff --git a/Source/DistanceFunctions/arm_braycurtis_distance_f32.c b/Source/DistanceFunctions/arm_braycurtis_distance_f32.c
index 154827ed..38c9d18e 100755
--- a/Source/DistanceFunctions/arm_braycurtis_distance_f32.c
+++ b/Source/DistanceFunctions/arm_braycurtis_distance_f32.c
@@ -144,10 +144,10 @@ float32_t arm_braycurtis_distance_f32(const float32_t *pA,const float32_t *pB, u
blkCnt --;
}
accumV2 = vpadd_f32(vget_low_f32(accumDiffV),vget_high_f32(accumDiffV));
- accumDiff = accumV2[0] + accumV2[1];
+ accumDiff = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
accumV2 = vpadd_f32(vget_low_f32(accumSumV),vget_high_f32(accumSumV));
- accumSum = accumV2[0] + accumV2[1];
+ accumSum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
blkCnt = blockSize & 3;
while(blkCnt > 0)
diff --git a/Source/DistanceFunctions/arm_canberra_distance_f32.c b/Source/DistanceFunctions/arm_canberra_distance_f32.c
index 219d2bba..f71b74d3 100755
--- a/Source/DistanceFunctions/arm_canberra_distance_f32.c
+++ b/Source/DistanceFunctions/arm_canberra_distance_f32.c
@@ -135,7 +135,7 @@ float32_t arm_canberra_distance_f32(const float32_t *pA,const float32_t *pB, uin
uint32_t blkCnt;
float32x4_t a,b,c,accumV;
float32x2_t accumV2;
- int32x4_t isZeroV;
+ uint32x4_t isZeroV;
float32x4_t zeroV = vdupq_n_f32(0.0f);
accumV = vdupq_n_f32(0.0f);
@@ -162,7 +162,7 @@ float32_t arm_canberra_distance_f32(const float32_t *pA,const float32_t *pB, uin
* Force result of a division by 0 to 0. It the behavior of the
* sklearn canberra function.
*/
- a = vbicq_s32(a,isZeroV);
+ a = vreinterpretq_f32_s32(vbicq_s32(vreinterpretq_s32_f32(a),vreinterpretq_s32_u32(isZeroV)));
c = vmulq_f32(c,a);
accumV = vaddq_f32(accumV,c);
@@ -171,7 +171,7 @@ float32_t arm_canberra_distance_f32(const float32_t *pA,const float32_t *pB, uin
blkCnt --;
}
accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
- accum = accumV2[0] + accumV2[1];
+ accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
blkCnt = blockSize & 3;
diff --git a/Source/DistanceFunctions/arm_chebyshev_distance_f32.c b/Source/DistanceFunctions/arm_chebyshev_distance_f32.c
index 349158ac..f6a47365 100755
--- a/Source/DistanceFunctions/arm_chebyshev_distance_f32.c
+++ b/Source/DistanceFunctions/arm_chebyshev_distance_f32.c
@@ -158,7 +158,7 @@ float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, ui
}
maxValV2 = vpmax_f32(vget_low_f32(maxValV),vget_high_f32(maxValV));
maxValV2 = vpmax_f32(maxValV2,maxValV2);
- maxVal = maxValV2[0];
+ maxVal = vget_lane_f32(maxValV2,0);
blkCnt = blockSize & 3;
diff --git a/Source/DistanceFunctions/arm_cityblock_distance_f32.c b/Source/DistanceFunctions/arm_cityblock_distance_f32.c
index 88173fa8..8dce697c 100755
--- a/Source/DistanceFunctions/arm_cityblock_distance_f32.c
+++ b/Source/DistanceFunctions/arm_cityblock_distance_f32.c
@@ -115,7 +115,7 @@ float32_t arm_cityblock_distance_f32(const float32_t *pA,const float32_t *pB, ui
}
accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
accumV2 = vpadd_f32(accumV2,accumV2);
- accum = accumV2[0];
+ accum = vget_lane_f32(accumV2,0);
blkCnt = blockSize & 3;
diff --git a/Source/DistanceFunctions/arm_euclidean_distance_f32.c b/Source/DistanceFunctions/arm_euclidean_distance_f32.c
index af59ffbe..34df6d7d 100755
--- a/Source/DistanceFunctions/arm_euclidean_distance_f32.c
+++ b/Source/DistanceFunctions/arm_euclidean_distance_f32.c
@@ -114,7 +114,7 @@ float32_t arm_euclidean_distance_f32(const float32_t *pA,const float32_t *pB, ui
blkCnt --;
}
accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
- accum = accumV2[0] + accumV2[1];
+ accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
blkCnt = blockSize & 3;
while(blkCnt > 0)
diff --git a/Source/DistanceFunctions/arm_jensenshannon_distance_f32.c b/Source/DistanceFunctions/arm_jensenshannon_distance_f32.c
index 3c3a4fac..6673e472 100755
--- a/Source/DistanceFunctions/arm_jensenshannon_distance_f32.c
+++ b/Source/DistanceFunctions/arm_jensenshannon_distance_f32.c
@@ -168,7 +168,7 @@ float32_t arm_jensenshannon_distance_f32(const float32_t *pA,const float32_t *pB
}
accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
- accum = accumV2[0] + accumV2[1];
+ accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
blkCnt = blockSize & 3;
while(blkCnt > 0)
diff --git a/Source/DistanceFunctions/arm_minkowski_distance_f32.c b/Source/DistanceFunctions/arm_minkowski_distance_f32.c
index bf30fe55..3d550fb4 100755
--- a/Source/DistanceFunctions/arm_minkowski_distance_f32.c
+++ b/Source/DistanceFunctions/arm_minkowski_distance_f32.c
@@ -126,7 +126,7 @@ float32_t arm_minkowski_distance_f32(const float32_t *pA,const float32_t *pB, in
}
sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
- sum = sumV2[0] + sumV2[1];
+ sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
blkCnt = blockSize & 3;
while(blkCnt > 0)
diff --git a/Source/FilteringFunctions/arm_biquad_cascade_df1_f32.c b/Source/FilteringFunctions/arm_biquad_cascade_df1_f32.c
index c9745914..0abc1659 100644
--- a/Source/FilteringFunctions/arm_biquad_cascade_df1_f32.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df1_f32.c
@@ -370,19 +370,20 @@ void arm_biquad_cascade_df1_f32(
while (stage > 0U)
{
/* Reading the coefficients */
- Xn = vdupq_n_f32(0.0);
- Xn[2] = pState[0];
- Xn[3] = pState[1];
- Yn[0] = pState[2];
- Yn[1] = pState[3];
+ Xn = vdupq_n_f32(0.0f);
+ Xn = vsetq_lane_f32(pState[0],Xn,2);
+ Xn = vsetq_lane_f32(pState[1],Xn,3);
+ Yn = vset_lane_f32(pState[2],Yn,0);
+ Yn = vset_lane_f32(pState[3],Yn,1);
+
b = vld1q_f32(pCoeffs);
b = vrev64q_f32(b);
b = vcombine_f32(vget_high_f32(b), vget_low_f32(b));
a = vld1_f32(pCoeffs + 3);
a = vrev64_f32(a);
- b[0] = 0.0;
+ b = vsetq_lane_f32(0.0f,b,0);
pCoeffs += 5;
/* Reading the pState values */
@@ -460,7 +461,11 @@ void arm_biquad_cascade_df1_f32(
Xns = *pIn++;
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
- acc = (b[1] * Xn[2]) + (b[2] * Xn[3]) + (b[3] * Xns) + (a[0] * Yn[0]) + (a[1] * Yn[1]);
+ acc = (vgetq_lane_f32(b, 1) * vgetq_lane_f32(Xn, 2))
+ + (vgetq_lane_f32(b, 2) * vgetq_lane_f32(Xn, 3))
+ + (vgetq_lane_f32(b, 3) * Xns)
+ + (vget_lane_f32(a, 0) * vget_lane_f32(Yn, 0))
+ + (vget_lane_f32(a, 1) * vget_lane_f32(Yn, 1));
/* Store the result in the accumulator in the destination buffer. */
*pOut++ = acc;
@@ -471,10 +476,10 @@ void arm_biquad_cascade_df1_f32(
/* Xn1 = Xn */
/* Yn2 = Yn1 */
/* Yn1 = acc */
- Xn[2] = Xn[3];
- Xn[3] = Xns;
- Yn[0] = Yn[1];
- Yn[1] = acc;
+ Xn = vsetq_lane_f32(vgetq_lane_f32(Xn, 3),Xn,2);
+ Xn = vsetq_lane_f32(Xns,Xn,3);
+ Yn = vset_lane_f32(vget_lane_f32(Yn, 1),Yn,0);
+ Yn = vset_lane_f32(acc,Yn,1);
/* Decrement the loop counter */
sample--;
diff --git a/Source/FilteringFunctions/arm_biquad_cascade_df2T_f32.c b/Source/FilteringFunctions/arm_biquad_cascade_df2T_f32.c
index 574dc28b..46454dad 100644
--- a/Source/FilteringFunctions/arm_biquad_cascade_df2T_f32.c
+++ b/Source/FilteringFunctions/arm_biquad_cascade_df2T_f32.c
@@ -268,7 +268,7 @@ void arm_biquad_cascade_df2T_f32(
dV.val[1] = vmulq_f32(s, b2V);
dV.val[1] = vmlaq_f32(dV.val[1], YnV, a2V);
- *pOut++ = YnV[3];
+ *pOut++ = vgetq_lane_f32(YnV, 3) ;
sample--;
}
diff --git a/Source/FilteringFunctions/arm_conv_f32.c b/Source/FilteringFunctions/arm_conv_f32.c
index ea64b80f..f217f1ed 100644
--- a/Source/FilteringFunctions/arm_conv_f32.c
+++ b/Source/FilteringFunctions/arm_conv_f32.c
@@ -263,7 +263,8 @@ void arm_conv_f32(
uint32_t blockSize1, blockSize2, blockSize3; /* Loop counters */
uint32_t j, k, count, blkCnt; /* Loop counters */
-#if defined (ARM_MATH_LOOPUNROLL)
+
+#if defined (ARM_MATH_LOOPUNROLL) || defined(ARM_MATH_NEON)
float32_t acc0, acc1, acc2, acc3, c0; /* Accumulators */
#if !defined(ARM_MATH_NEON)
float32_t x0, x1, x2, x3; /* Temporary variables to hold state and coefficient values */
diff --git a/Source/FilteringFunctions/arm_conv_partial_f32.c b/Source/FilteringFunctions/arm_conv_partial_f32.c
index 179b6dd0..34c77f24 100644
--- a/Source/FilteringFunctions/arm_conv_partial_f32.c
+++ b/Source/FilteringFunctions/arm_conv_partial_f32.c
@@ -142,7 +142,7 @@ arm_status arm_conv_partial_f32(
blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
- blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : (int32_t) numPoints) : 0;
+ blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : numPoints) : 0;
blockSize2 = ((int32_t) check - blockSize3) - (blockSize1 + (int32_t) firstIndex);
blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;
diff --git a/Source/FilteringFunctions/arm_conv_partial_fast_q31.c b/Source/FilteringFunctions/arm_conv_partial_fast_q31.c
index 9d934f65..fe2e2bbb 100644
--- a/Source/FilteringFunctions/arm_conv_partial_fast_q31.c
+++ b/Source/FilteringFunctions/arm_conv_partial_fast_q31.c
@@ -118,7 +118,7 @@ arm_status arm_conv_partial_fast_q31(
blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
- blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : (int32_t) numPoints) : 0;
+ blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : numPoints) : 0;
blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + (int32_t) firstIndex);
blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;
diff --git a/Source/FilteringFunctions/arm_conv_partial_q15.c b/Source/FilteringFunctions/arm_conv_partial_q15.c
index b8a965be..eb246417 100644
--- a/Source/FilteringFunctions/arm_conv_partial_q15.c
+++ b/Source/FilteringFunctions/arm_conv_partial_q15.c
@@ -119,7 +119,7 @@ arm_status arm_conv_partial_q15(
blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
- blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : (int32_t) numPoints) : 0;
+ blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : numPoints) : 0;
blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + (int32_t) firstIndex);
blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;
diff --git a/Source/FilteringFunctions/arm_conv_partial_q31.c b/Source/FilteringFunctions/arm_conv_partial_q31.c
index 388ef6e7..cfb1ad01 100644
--- a/Source/FilteringFunctions/arm_conv_partial_q31.c
+++ b/Source/FilteringFunctions/arm_conv_partial_q31.c
@@ -121,7 +121,7 @@ arm_status arm_conv_partial_q31(
blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
- blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : (int32_t) numPoints) : 0;
+ blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : numPoints) : 0;
blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + (int32_t) firstIndex);
blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;
diff --git a/Source/FilteringFunctions/arm_conv_partial_q7.c b/Source/FilteringFunctions/arm_conv_partial_q7.c
index 1123591c..bcfc51eb 100644
--- a/Source/FilteringFunctions/arm_conv_partial_q7.c
+++ b/Source/FilteringFunctions/arm_conv_partial_q7.c
@@ -123,7 +123,7 @@ arm_status arm_conv_partial_q7(
blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
- blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : (int32_t) numPoints) : 0;
+ blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1U)) ? blockSize1 : numPoints) : 0;
blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + (int32_t) firstIndex);
blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;
diff --git a/Source/FilteringFunctions/arm_correlate_f32.c b/Source/FilteringFunctions/arm_correlate_f32.c
index 3a8aa00e..4dd5bb93 100644
--- a/Source/FilteringFunctions/arm_correlate_f32.c
+++ b/Source/FilteringFunctions/arm_correlate_f32.c
@@ -320,7 +320,7 @@ void arm_correlate_f32(
uint32_t outBlockSize; /* Loop counter */
int32_t inc = 1; /* Destination address modifier */
-#if defined (ARM_MATH_LOOPUNROLL)
+#if defined (ARM_MATH_LOOPUNROLL) || defined(ARM_MATH_NEON)
float32_t acc0, acc1, acc2, acc3,c0; /* Accumulators */
#if !defined(ARM_MATH_NEON)
float32_t x0, x1, x2, x3; /* temporary variables for holding input and coefficient values */
diff --git a/Source/FilteringFunctions/arm_fir_decimate_f32.c b/Source/FilteringFunctions/arm_fir_decimate_f32.c
index 6ebbe49e..7d445dda 100644
--- a/Source/FilteringFunctions/arm_fir_decimate_f32.c
+++ b/Source/FilteringFunctions/arm_fir_decimate_f32.c
@@ -219,16 +219,16 @@ void arm_fir_decimate_f32(
}
temp = vpadd_f32(vget_low_f32(acc0v),vget_high_f32(acc0v));
- accv[0] = temp[0] + temp[1];
+ accv = vsetq_lane_f32(vget_lane_f32(temp, 0) + vget_lane_f32(temp, 1),accv,0);
temp = vpadd_f32(vget_low_f32(acc1v),vget_high_f32(acc1v));
- accv[1] = temp[0] + temp[1];
+ accv = vsetq_lane_f32(vget_lane_f32(temp, 0) + vget_lane_f32(temp, 1),accv,1);
temp = vpadd_f32(vget_low_f32(acc2v),vget_high_f32(acc2v));
- accv[2] = temp[0] + temp[1];
+ accv = vsetq_lane_f32(vget_lane_f32(temp, 0) + vget_lane_f32(temp, 1),accv,2);
temp = vpadd_f32(vget_low_f32(acc3v),vget_high_f32(acc3v));
- accv[3] = temp[0] + temp[1];
+ accv = vsetq_lane_f32(vget_lane_f32(temp, 0) + vget_lane_f32(temp, 1),accv,3);
/* If the filter length is not a multiple of 4, compute the remaining filter taps */
tapCnt = numTaps % 0x4U;
@@ -245,10 +245,10 @@ void arm_fir_decimate_f32(
x3 = *(px3++);
/* Perform the multiply-accumulate */
- accv[0] += x0 * c0;
- accv[1] += x1 * c0;
- accv[2] += x2 * c0;
- accv[3] += x3 * c0;
+ accv = vsetq_lane_f32(vgetq_lane_f32(accv, 0) + x0 * c0,accv,0);
+ accv = vsetq_lane_f32(vgetq_lane_f32(accv, 1) + x1 * c0,accv,1);
+ accv = vsetq_lane_f32(vgetq_lane_f32(accv, 2) + x2 * c0,accv,2);
+ accv = vsetq_lane_f32(vgetq_lane_f32(accv, 3) + x3 * c0,accv,3);
/* Decrement the loop counter */
tapCnt--;
@@ -306,7 +306,7 @@ void arm_fir_decimate_f32(
}
temp = vpadd_f32(vget_low_f32(sum0v),vget_high_f32(sum0v));
- sum0 = temp[0] + temp[1];
+ sum0 = vget_lane_f32(temp, 0) + vget_lane_f32(temp, 1);
/* If the filter length is not a multiple of 4, compute the remaining filter taps */
tapCnt = numTaps % 0x4U;
diff --git a/Source/FilteringFunctions/arm_fir_f32.c b/Source/FilteringFunctions/arm_fir_f32.c
index d20a9b7d..2f9632e5 100644
--- a/Source/FilteringFunctions/arm_fir_f32.c
+++ b/Source/FilteringFunctions/arm_fir_f32.c
@@ -767,26 +767,26 @@ uint32_t blockSize)
b = vld1q_f32(pb);
xa = x0;
xb = x1;
- accv0 = vmlaq_n_f32(accv0,xa,b[0]);
- accv1 = vmlaq_n_f32(accv1,xb,b[0]);
+ accv0 = vmlaq_n_f32(accv0,xa,vgetq_lane_f32(b, 0));
+ accv1 = vmlaq_n_f32(accv1,xb,vgetq_lane_f32(b, 0));
xa = vextq_f32(x0,x1,1);
xb = vextq_f32(x1,x2,1);
-
- accv0 = vmlaq_n_f32(accv0,xa,b[1]);
- accv1 = vmlaq_n_f32(accv1,xb,b[1]);
+
+ accv0 = vmlaq_n_f32(accv0,xa,vgetq_lane_f32(b, 1));
+ accv1 = vmlaq_n_f32(accv1,xb,vgetq_lane_f32(b, 1));
- xa = vextq_f32(x0,x1,2);
+ xa = vextq_f32(x0,x1,2);
xb = vextq_f32(x1,x2,2);
- accv0 = vmlaq_n_f32(accv0,xa,b[2]);
- accv1 = vmlaq_n_f32(accv1,xb,b[2]);
+ accv0 = vmlaq_n_f32(accv0,xa,vgetq_lane_f32(b, 2));
+ accv1 = vmlaq_n_f32(accv1,xb,vgetq_lane_f32(b, 2));
- xa = vextq_f32(x0,x1,3);
- xb = vextq_f32(x1,x2,3);
+ xa = vextq_f32(x0,x1,3);
+ xb = vextq_f32(x1,x2,3);
- accv0 = vmlaq_n_f32(accv0,xa,b[3]);
- accv1 = vmlaq_n_f32(accv1,xb,b[3]);
+ accv0 = vmlaq_n_f32(accv0,xa,vgetq_lane_f32(b, 3));
+ accv1 = vmlaq_n_f32(accv1,xb,vgetq_lane_f32(b, 3));
pb += 4;
x0 = x1;
@@ -810,8 +810,8 @@ uint32_t blockSize)
pb++;
- xa = vextq_f32(x0,x1,1);
- xb = vextq_f32(x1,x2,1);
+ xa = vextq_f32(x0,x1,1);
+ xb = vextq_f32(x1,x2,1);
accv0 = vmlaq_n_f32(accv0,xa,*pb);
accv1 = vmlaq_n_f32(accv1,xb,*pb);
@@ -821,7 +821,7 @@ uint32_t blockSize)
xa = vextq_f32(x0,x1,2);
xb = vextq_f32(x1,x2,2);
- accv0 = vmlaq_n_f32(accv0,xa,*pb);
+ accv0 = vmlaq_n_f32(accv0,xa,*pb);
accv1 = vmlaq_n_f32(accv1,xb,*pb);
}
@@ -836,7 +836,7 @@ uint32_t blockSize)
xa = vextq_f32(x0,x1,1);
xb = vextq_f32(x1,x2,1);
- accv0 = vmlaq_n_f32(accv0,xa,*pb);
+ accv0 = vmlaq_n_f32(accv0,xa,*pb);
accv1 = vmlaq_n_f32(accv1,xb,*pb);
}
diff --git a/Source/FilteringFunctions/arm_fir_interpolate_f32.c b/Source/FilteringFunctions/arm_fir_interpolate_f32.c
index 23ba08d7..726549cb 100644
--- a/Source/FilteringFunctions/arm_fir_interpolate_f32.c
+++ b/Source/FilteringFunctions/arm_fir_interpolate_f32.c
@@ -314,15 +314,15 @@ void arm_fir_interpolate_f32(
}
/* The result is in the accumulator, store in the destination buffer. */
- *pDst = accV0[0];
- *(pDst + S->L) = accV0[1];
- *(pDst + 2 * S->L) = accV0[2];
- *(pDst + 3 * S->L) = accV0[3];
+ *pDst = vgetq_lane_f32(accV0, 0);
+ *(pDst + S->L) = vgetq_lane_f32(accV0, 1);
+ *(pDst + 2 * S->L) = vgetq_lane_f32(accV0, 2);
+ *(pDst + 3 * S->L) = vgetq_lane_f32(accV0, 3);
- *(pDst + 4 * S->L) = accV1[0];
- *(pDst + 5 * S->L) = accV1[1];
- *(pDst + 6 * S->L) = accV1[2];
- *(pDst + 7 * S->L) = accV1[3];
+ *(pDst + 4 * S->L) = vgetq_lane_f32(accV1, 0);
+ *(pDst + 5 * S->L) = vgetq_lane_f32(accV1, 1);
+ *(pDst + 6 * S->L) = vgetq_lane_f32(accV1, 2);
+ *(pDst + 7 * S->L) = vgetq_lane_f32(accV1, 3);
pDst++;
@@ -375,7 +375,7 @@ void arm_fir_interpolate_f32(
while (tapCnt > 0U)
{
/* Read the coefficient */
- x1v[0] = *(ptr2);
+ x1v = vsetq_lane_f32(*(ptr2),x1v,0);
/* Upsampling is done by stuffing L-1 zeros between each sample.
* So instead of multiplying zeros with coefficients,
@@ -387,19 +387,19 @@ void arm_fir_interpolate_f32(
ptr1 += 4;
/* Read the coefficient */
- x1v[1] = *(ptr2);
+ x1v = vsetq_lane_f32(*(ptr2),x1v,1);
/* Increment the coefficient pointer by interpolation factor times. */
ptr2 += S->L;
/* Read the coefficient */
- x1v[2] = *(ptr2);
+ x1v = vsetq_lane_f32(*(ptr2),x1v,2);
/* Increment the coefficient pointer by interpolation factor times. */
ptr2 += S->L;
/* Read the coefficient */
- x1v[3] = *(ptr2);
+ x1v = vsetq_lane_f32(*(ptr2),x1v,3);
/* Increment the coefficient pointer by interpolation factor times. */
ptr2 += S->L;
@@ -411,7 +411,7 @@ void arm_fir_interpolate_f32(
}
tempV = vpadd_f32(vget_low_f32(sum0v),vget_high_f32(sum0v));
- sum0 = tempV[0] + tempV[1];
+ sum0 = vget_lane_f32(tempV, 0) + vget_lane_f32(tempV, 1);
/* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
tapCnt = phaseLen % 0x4U;
diff --git a/Source/FilteringFunctions/arm_lms_f32.c b/Source/FilteringFunctions/arm_lms_f32.c
index 4fc6e7e2..4a863594 100644
--- a/Source/FilteringFunctions/arm_lms_f32.c
+++ b/Source/FilteringFunctions/arm_lms_f32.c
@@ -225,7 +225,7 @@ void arm_lms_f32(
tapCnt--;
}
tempV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
- sum = tempV2[0] + tempV2[1];
+ sum = vget_lane_f32(tempV2, 0) + vget_lane_f32(tempV2, 1);
/* If the filter length is not a multiple of 4, compute the remaining filter taps */
diff --git a/Source/FilteringFunctions/arm_lms_norm_f32.c b/Source/FilteringFunctions/arm_lms_norm_f32.c
index f62494d0..693d45f8 100644
--- a/Source/FilteringFunctions/arm_lms_norm_f32.c
+++ b/Source/FilteringFunctions/arm_lms_norm_f32.c
@@ -233,7 +233,7 @@ void arm_lms_norm_f32(
tapCnt--;
}
tempV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
- sum = tempV2[0] + tempV2[1];
+ sum = vget_lane_f32(tempV2, 0) + vget_lane_f32(tempV2, 1);
/* If the filter length is not a multiple of 4, compute the remaining filter taps */
tapCnt = numTaps % 0x4U;
diff --git a/Source/MatrixFunctions/arm_mat_cmplx_mult_f32.c b/Source/MatrixFunctions/arm_mat_cmplx_mult_f32.c
index e65bb42e..1b79eabe 100644
--- a/Source/MatrixFunctions/arm_mat_cmplx_mult_f32.c
+++ b/Source/MatrixFunctions/arm_mat_cmplx_mult_f32.c
@@ -938,20 +938,21 @@ arm_status arm_mat_cmplx_mult_f32(
pIn1 += 8;
pIn1B += 8;
- tempR[0] = *pIn2;
- tempI[0] = *(pIn2 + 1U);
+ tempR = vsetq_lane_f32(*pIn2,tempR,0);
+ tempI = vsetq_lane_f32(*(pIn2 + 1U),tempI,0);
pIn2 += 2 * numColsB;
- tempR[1] = *pIn2;
- tempI[1] = *(pIn2 + 1U);
+
+ tempR = vsetq_lane_f32(*pIn2,tempR,1);
+ tempI = vsetq_lane_f32(*(pIn2 + 1U),tempI,1);
pIn2 += 2 * numColsB;
- tempR[2] = *pIn2;
- tempI[2] = *(pIn2 + 1U);
+ tempR = vsetq_lane_f32(*pIn2,tempR,2);
+ tempI = vsetq_lane_f32(*(pIn2 + 1U),tempI,2);
pIn2 += 2 * numColsB;
- tempR[3] = *pIn2;
- tempI[3] = *(pIn2 + 1U);
+ tempR = vsetq_lane_f32(*pIn2,tempR,3);
+ tempI = vsetq_lane_f32(*(pIn2 + 1U),tempI,3);
pIn2 += 2 * numColsB;
accR0 = vmlaq_f32(accR0,a0V.val[0],tempR);
@@ -971,16 +972,16 @@ arm_status arm_mat_cmplx_mult_f32(
}
accum = vpadd_f32(vget_low_f32(accR0), vget_high_f32(accR0));
- sumReal1 += accum[0] + accum[1];
+ sumReal1 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1);
accum = vpadd_f32(vget_low_f32(accI0), vget_high_f32(accI0));
- sumImag1 += accum[0] + accum[1];
+ sumImag1 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1);
accum = vpadd_f32(vget_low_f32(accR1), vget_high_f32(accR1));
- sumReal1B += accum[0] + accum[1];
+ sumReal1B += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1);
accum = vpadd_f32(vget_low_f32(accI1), vget_high_f32(accI1));
- sumImag1B += accum[0] + accum[1];
+ sumImag1B += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1);
/* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
@@ -1088,20 +1089,20 @@ arm_status arm_mat_cmplx_mult_f32(
a0V = vld2q_f32(pIn1); // load & separate real/imag pSrcA (de-interleave 2)
pIn1 += 8;
- tempR[0] = *pIn2;
- tempI[0] = *(pIn2 + 1U);
+ tempR = vsetq_lane_f32(*pIn2,tempR,0);
+ tempI = vsetq_lane_f32(*(pIn2 + 1U),tempI,0);
pIn2 += 2 * numColsB;
- tempR[1] = *pIn2;
- tempI[1] = *(pIn2 + 1U);
+ tempR = vsetq_lane_f32(*pIn2,tempR,1);
+ tempI = vsetq_lane_f32(*(pIn2 + 1U),tempI,1);
pIn2 += 2 * numColsB;
- tempR[2] = *pIn2;
- tempI[2] = *(pIn2 + 1U);
+ tempR = vsetq_lane_f32(*pIn2,tempR,2);
+ tempI = vsetq_lane_f32(*(pIn2 + 1U),tempI,2);
pIn2 += 2 * numColsB;
- tempR[3] = *pIn2;
- tempI[3] = *(pIn2 + 1U);
+ tempR = vsetq_lane_f32(*pIn2,tempR,3);
+ tempI = vsetq_lane_f32(*(pIn2 + 1U),tempI,3);
pIn2 += 2 * numColsB;
accR0 = vmlaq_f32(accR0,a0V.val[0],tempR);
@@ -1115,10 +1116,10 @@ arm_status arm_mat_cmplx_mult_f32(
}
accum = vpadd_f32(vget_low_f32(accR0), vget_high_f32(accR0));
- sumReal1 += accum[0] + accum[1];
+ sumReal1 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1);
accum = vpadd_f32(vget_low_f32(accI0), vget_high_f32(accI0));
- sumImag1 += accum[0] + accum[1];
+ sumImag1 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1);
/* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
diff --git a/Source/MatrixFunctions/arm_mat_inverse_f32.c b/Source/MatrixFunctions/arm_mat_inverse_f32.c
index ca19f97b..7e7ca80e 100644
--- a/Source/MatrixFunctions/arm_mat_inverse_f32.c
+++ b/Source/MatrixFunctions/arm_mat_inverse_f32.c
@@ -774,7 +774,7 @@ arm_status arm_mat_inverse_f32(
/* Pivot element of the row */
in = *pPivotRowIn;
- tmpV = vdupq_n_f32(1.0/in);
+ tmpV = vdupq_n_f32(1.0f/in);
/* Loop over number of columns
* to the right of the pilot element */
diff --git a/Source/MatrixFunctions/arm_mat_mult_f32.c b/Source/MatrixFunctions/arm_mat_mult_f32.c
index 577c1f19..90b8e952 100644
--- a/Source/MatrixFunctions/arm_mat_mult_f32.c
+++ b/Source/MatrixFunctions/arm_mat_mult_f32.c
@@ -633,7 +633,7 @@ arm_status arm_mat_mult_f32(
a6V = vld1q_f32(pIn1G);
a7V = vld1q_f32(pIn1H);
- pIn1 += 4;
+ pIn1 += 4;
pIn1B += 4;
pIn1C += 4;
pIn1D += 4;
@@ -642,13 +642,13 @@ arm_status arm_mat_mult_f32(
pIn1G += 4;
pIn1H += 4;
- temp[0] = *pIn2;
+ temp = vsetq_lane_f32(*pIn2,temp,0);
pIn2 += numColsB;
- temp[1] = *pIn2;
+ temp = vsetq_lane_f32(*pIn2,temp,1);
pIn2 += numColsB;
- temp[2] = *pIn2;
+ temp = vsetq_lane_f32(*pIn2,temp,2);
pIn2 += numColsB;
- temp[3] = *pIn2;
+ temp = vsetq_lane_f32(*pIn2,temp,3);
pIn2 += numColsB;
acc0 = vmlaq_f32(acc0,a0V,temp);
@@ -665,28 +665,28 @@ arm_status arm_mat_mult_f32(
}
accum = vpadd_f32(vget_low_f32(acc0), vget_high_f32(acc0));
- sum0 += accum[0] + accum[1];
+ sum0 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1);
accum = vpadd_f32(vget_low_f32(acc1), vget_high_f32(acc1));
- sum1 += accum[0] + accum[1];
+ sum1 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1);
accum = vpadd_f32(vget_low_f32(acc2), vget_high_f32(acc2));
- sum2 += accum[0] + accum[1];
+ sum2 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1);
accum = vpadd_f32(vget_low_f32(acc3), vget_high_f32(acc3));
- sum3 += accum[0] + accum[1];
+ sum3 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1);
accum = vpadd_f32(vget_low_f32(acc4), vget_high_f32(acc4));
- sum4 += accum[0] + accum[1];
+ sum4 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1);
accum = vpadd_f32(vget_low_f32(acc5), vget_high_f32(acc5));
- sum5 += accum[0] + accum[1];
+ sum5 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1);
accum = vpadd_f32(vget_low_f32(acc6), vget_high_f32(acc6));
- sum6 += accum[0] + accum[1];
+ sum6 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1);
accum = vpadd_f32(vget_low_f32(acc7), vget_high_f32(acc7));
- sum7 += accum[0] + accum[1];
+ sum7 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1);
/* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
@@ -782,13 +782,13 @@ arm_status arm_mat_mult_f32(
a0V = vld1q_f32(pIn1); // load & separate real/imag pSrcA (de-interleave 2)
pIn1 += 4;
- temp[0] = *pIn2;
+ temp = vsetq_lane_f32(*pIn2,temp,0);
pIn2 += numColsB;
- temp[1] = *pIn2;
+ temp = vsetq_lane_f32(*pIn2,temp,1);
pIn2 += numColsB;
- temp[2] = *pIn2;
+ temp = vsetq_lane_f32(*pIn2,temp,2);
pIn2 += numColsB;
- temp[3] = *pIn2;
+ temp = vsetq_lane_f32(*pIn2,temp,3);
pIn2 += numColsB;
acc0 = vmlaq_f32(acc0,a0V,temp);
@@ -798,7 +798,7 @@ arm_status arm_mat_mult_f32(
}
accum = vpadd_f32(vget_low_f32(acc0), vget_high_f32(acc0));
- sum += accum[0] + accum[1];
+ sum += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1);
/* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here.
** No loop unrolling is used. */
diff --git a/Source/SVMFunctions/arm_svm_linear_predict_f32.c b/Source/SVMFunctions/arm_svm_linear_predict_f32.c
index d9c9ffd6..503ba16f 100755
--- a/Source/SVMFunctions/arm_svm_linear_predict_f32.c
+++ b/Source/SVMFunctions/arm_svm_linear_predict_f32.c
@@ -344,25 +344,25 @@ void arm_svm_linear_predict_f32(
blkCnt -- ;
}
accum2 = vpadd_f32(vget_low_f32(accuma),vget_high_f32(accuma));
- dotV[0] = accum2[0] + accum2[1];
+ dotV = vsetq_lane_f32(vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1),dotV,0);
accum2 = vpadd_f32(vget_low_f32(accumb),vget_high_f32(accumb));
- dotV[1] = accum2[0] + accum2[1];
+ dotV = vsetq_lane_f32(vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1),dotV,1);
accum2 = vpadd_f32(vget_low_f32(accumc),vget_high_f32(accumc));
- dotV[2] = accum2[0] + accum2[1];
+ dotV = vsetq_lane_f32(vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1),dotV,2);
accum2 = vpadd_f32(vget_low_f32(accumd),vget_high_f32(accumd));
- dotV[3] = accum2[0] + accum2[1];
+ dotV = vsetq_lane_f32(vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1),dotV,3);
blkCnt = S->vectorDimension & 3;
while (blkCnt > 0U)
{
- dotV[0] = dotV[0] + *pIn * *pSupporta++;
- dotV[1] = dotV[1] + *pIn * *pSupportb++;
- dotV[2] = dotV[2] + *pIn * *pSupportc++;
- dotV[3] = dotV[3] + *pIn * *pSupportd++;
+ dotV = vsetq_lane_f32(vgetq_lane_f32(dotV,0) + *pIn * *pSupporta++, dotV,0);
+ dotV = vsetq_lane_f32(vgetq_lane_f32(dotV,1) + *pIn * *pSupportb++, dotV,1);
+ dotV = vsetq_lane_f32(vgetq_lane_f32(dotV,2) + *pIn * *pSupportc++, dotV,2);
+ dotV = vsetq_lane_f32(vgetq_lane_f32(dotV,3) + *pIn * *pSupportd++, dotV,3);
pIn++;
@@ -374,7 +374,7 @@ void arm_svm_linear_predict_f32(
accum = vmulq_f32(vec1,dotV);
accum2 = vpadd_f32(vget_low_f32(accum),vget_high_f32(accum));
- sum += accum2[0] + accum2[1];
+ sum += vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1);
pSupporta += 3*S->vectorDimension;
pSupportb += 3*S->vectorDimension;
@@ -406,7 +406,7 @@ void arm_svm_linear_predict_f32(
blkCnt -- ;
}
accum2 = vpadd_f32(vget_low_f32(accum),vget_high_f32(accum));
- dot = accum2[0] + accum2[1];
+ dot = vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1);
blkCnt = S->vectorDimension & 3;
diff --git a/Source/SVMFunctions/arm_svm_polynomial_predict_f32.c b/Source/SVMFunctions/arm_svm_polynomial_predict_f32.c
index 703ab243..0873bff6 100755
--- a/Source/SVMFunctions/arm_svm_polynomial_predict_f32.c
+++ b/Source/SVMFunctions/arm_svm_polynomial_predict_f32.c
@@ -370,25 +370,25 @@ void arm_svm_polynomial_predict_f32(
blkCnt -- ;
}
accum2 = vpadd_f32(vget_low_f32(accuma),vget_high_f32(accuma));
- dotV[0] = accum2[0] + accum2[1];
+ dotV = vsetq_lane_f32(vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1),dotV,0);
accum2 = vpadd_f32(vget_low_f32(accumb),vget_high_f32(accumb));
- dotV[1] = accum2[0] + accum2[1];
+ dotV = vsetq_lane_f32(vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1),dotV,1);
accum2 = vpadd_f32(vget_low_f32(accumc),vget_high_f32(accumc));
- dotV[2] = accum2[0] + accum2[1];
+ dotV = vsetq_lane_f32(vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1),dotV,2);
accum2 = vpadd_f32(vget_low_f32(accumd),vget_high_f32(accumd));
- dotV[3] = accum2[0] + accum2[1];
+ dotV = vsetq_lane_f32(vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1),dotV,3);
blkCnt = S->vectorDimension & 3;
while (blkCnt > 0U)
{
- dotV[0] = dotV[0] + *pIn * *pSupporta++;
- dotV[1] = dotV[1] + *pIn * *pSupportb++;
- dotV[2] = dotV[2] + *pIn * *pSupportc++;
- dotV[3] = dotV[3] + *pIn * *pSupportd++;
+ dotV = vsetq_lane_f32(vgetq_lane_f32(dotV,0) + *pIn * *pSupporta++, dotV,0);
+ dotV = vsetq_lane_f32(vgetq_lane_f32(dotV,1) + *pIn * *pSupportb++, dotV,1);
+ dotV = vsetq_lane_f32(vgetq_lane_f32(dotV,2) + *pIn * *pSupportc++, dotV,2);
+ dotV = vsetq_lane_f32(vgetq_lane_f32(dotV,3) + *pIn * *pSupportd++, dotV,3);
pIn++;
@@ -406,7 +406,7 @@ void arm_svm_polynomial_predict_f32(
accum = vmulq_f32(vec1,dotV);
accum2 = vpadd_f32(vget_low_f32(accum),vget_high_f32(accum));
- sum += accum2[0] + accum2[1];
+ sum += vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1);
pSupporta += 3*S->vectorDimension;
pSupportb += 3*S->vectorDimension;
@@ -439,7 +439,7 @@ void arm_svm_polynomial_predict_f32(
blkCnt -- ;
}
accum2 = vpadd_f32(vget_low_f32(accum),vget_high_f32(accum));
- dot = accum2[0] + accum2[1];
+ dot = vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1);
blkCnt = S->vectorDimension & 3;
diff --git a/Source/SVMFunctions/arm_svm_rbf_predict_f32.c b/Source/SVMFunctions/arm_svm_rbf_predict_f32.c
index a4fa0318..8625ef49 100755
--- a/Source/SVMFunctions/arm_svm_rbf_predict_f32.c
+++ b/Source/SVMFunctions/arm_svm_rbf_predict_f32.c
@@ -396,25 +396,26 @@ void arm_svm_rbf_predict_f32(
blkCnt -- ;
}
accum2 = vpadd_f32(vget_low_f32(accuma),vget_high_f32(accuma));
- dotV[0] = accum2[0] + accum2[1];
+ dotV = vsetq_lane_f32(vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1),dotV,0);
accum2 = vpadd_f32(vget_low_f32(accumb),vget_high_f32(accumb));
- dotV[1] = accum2[0] + accum2[1];
+ dotV = vsetq_lane_f32(vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1),dotV,1);
accum2 = vpadd_f32(vget_low_f32(accumc),vget_high_f32(accumc));
- dotV[2] = accum2[0] + accum2[1];
+ dotV = vsetq_lane_f32(vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1),dotV,2);
accum2 = vpadd_f32(vget_low_f32(accumd),vget_high_f32(accumd));
- dotV[3] = accum2[0] + accum2[1];
+ dotV = vsetq_lane_f32(vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1),dotV,3);
blkCnt = S->vectorDimension & 3;
while (blkCnt > 0U)
{
- dotV[0] = dotV[0] + SQ(*pIn - *pSupporta);
- dotV[1] = dotV[1] + SQ(*pIn - *pSupportb);
- dotV[2] = dotV[2] + SQ(*pIn - *pSupportc);
- dotV[3] = dotV[3] + SQ(*pIn - *pSupportd);
+ dotV = vsetq_lane_f32(vgetq_lane_f32(dotV,0) + SQ(*pIn - *pSupporta), dotV,0);
+ dotV = vsetq_lane_f32(vgetq_lane_f32(dotV,1) + SQ(*pIn - *pSupportb), dotV,1);
+ dotV = vsetq_lane_f32(vgetq_lane_f32(dotV,2) + SQ(*pIn - *pSupportc), dotV,2);
+ dotV = vsetq_lane_f32(vgetq_lane_f32(dotV,3) + SQ(*pIn - *pSupportd), dotV,3);
+
pSupporta++;
pSupportb++;
pSupportc++;
@@ -434,7 +435,7 @@ void arm_svm_rbf_predict_f32(
accum = vmulq_f32(vec1,dotV);
accum2 = vpadd_f32(vget_low_f32(accum),vget_high_f32(accum));
- sum += accum2[0] + accum2[1];
+ sum += vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1);
pSupporta += 3*S->vectorDimension;
pSupportb += 3*S->vectorDimension;
@@ -468,7 +469,7 @@ void arm_svm_rbf_predict_f32(
blkCnt -- ;
}
accum2 = vpadd_f32(vget_low_f32(accum),vget_high_f32(accum));
- dot = accum2[0] + accum2[1];
+ dot = vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1);
blkCnt = S->vectorDimension & 3;
diff --git a/Source/SVMFunctions/arm_svm_sigmoid_predict_f32.c b/Source/SVMFunctions/arm_svm_sigmoid_predict_f32.c
index 9be478a0..f3b2f2f4 100755
--- a/Source/SVMFunctions/arm_svm_sigmoid_predict_f32.c
+++ b/Source/SVMFunctions/arm_svm_sigmoid_predict_f32.c
@@ -368,25 +368,25 @@ void arm_svm_sigmoid_predict_f32(
blkCnt -- ;
}
accum2 = vpadd_f32(vget_low_f32(accuma),vget_high_f32(accuma));
- dotV[0] = accum2[0] + accum2[1];
+ dotV = vsetq_lane_f32(vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1),dotV,0);
accum2 = vpadd_f32(vget_low_f32(accumb),vget_high_f32(accumb));
- dotV[1] = accum2[0] + accum2[1];
+ dotV = vsetq_lane_f32(vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1),dotV,1);
accum2 = vpadd_f32(vget_low_f32(accumc),vget_high_f32(accumc));
- dotV[2] = accum2[0] + accum2[1];
+ dotV = vsetq_lane_f32(vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1),dotV,2);
accum2 = vpadd_f32(vget_low_f32(accumd),vget_high_f32(accumd));
- dotV[3] = accum2[0] + accum2[1];
+ dotV = vsetq_lane_f32(vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1),dotV,3);
blkCnt = S->vectorDimension & 3;
while (blkCnt > 0U)
{
- dotV[0] = dotV[0] + *pIn * *pSupporta++;
- dotV[1] = dotV[1] + *pIn * *pSupportb++;
- dotV[2] = dotV[2] + *pIn * *pSupportc++;
- dotV[3] = dotV[3] + *pIn * *pSupportd++;
+ dotV = vsetq_lane_f32(vgetq_lane_f32(dotV,0) + *pIn * *pSupporta++, dotV,0);
+ dotV = vsetq_lane_f32(vgetq_lane_f32(dotV,1) + *pIn * *pSupportb++, dotV,1);
+ dotV = vsetq_lane_f32(vgetq_lane_f32(dotV,2) + *pIn * *pSupportc++, dotV,2);
+ dotV = vsetq_lane_f32(vgetq_lane_f32(dotV,3) + *pIn * *pSupportd++, dotV,3);
pIn++;
@@ -404,7 +404,7 @@ void arm_svm_sigmoid_predict_f32(
accum = vmulq_f32(vec1,dotV);
accum2 = vpadd_f32(vget_low_f32(accum),vget_high_f32(accum));
- sum += accum2[0] + accum2[1];
+ sum += vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1);
pSupporta += 3*S->vectorDimension;
pSupportb += 3*S->vectorDimension;
@@ -437,7 +437,7 @@ void arm_svm_sigmoid_predict_f32(
blkCnt -- ;
}
accum2 = vpadd_f32(vget_low_f32(accum),vget_high_f32(accum));
- dot = accum2[0] + accum2[1];
+ dot = vget_lane_f32(accum2, 0) + vget_lane_f32(accum2, 1);
blkCnt = S->vectorDimension & 3;
diff --git a/Source/StatisticsFunctions/arm_entropy_f32.c b/Source/StatisticsFunctions/arm_entropy_f32.c
index 9b0b02e9..01f7bc78 100755
--- a/Source/StatisticsFunctions/arm_entropy_f32.c
+++ b/Source/StatisticsFunctions/arm_entropy_f32.c
@@ -125,7 +125,8 @@ float32_t arm_entropy_f32(const float32_t * pSrcA,uint32_t blockSize)
}
accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
- accum = accumV2[0] + accumV2[1];
+ accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
+
blkCnt = blockSize & 3;
while(blkCnt > 0)
diff --git a/Source/StatisticsFunctions/arm_kullback_leibler_f32.c b/Source/StatisticsFunctions/arm_kullback_leibler_f32.c
index b2618621..c406e84d 100755
--- a/Source/StatisticsFunctions/arm_kullback_leibler_f32.c
+++ b/Source/StatisticsFunctions/arm_kullback_leibler_f32.c
@@ -142,7 +142,7 @@ float32_t arm_kullback_leibler_f32(const float32_t * pSrcA,const float32_t * pSr
}
accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
- accum = accumV2[0] + accumV2[1];
+ accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
blkCnt = blockSize & 3;
while(blkCnt > 0)
diff --git a/Source/StatisticsFunctions/arm_logsumexp_f32.c b/Source/StatisticsFunctions/arm_logsumexp_f32.c
index 2170ad99..03f778da 100755
--- a/Source/StatisticsFunctions/arm_logsumexp_f32.c
+++ b/Source/StatisticsFunctions/arm_logsumexp_f32.c
@@ -173,7 +173,7 @@ float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize)
accumV2 = vpmax_f32(vget_low_f32(maxValV),vget_high_f32(maxValV));
accumV2 = vpmax_f32(accumV2,accumV2);
- maxVal = accumV2[0];
+ maxVal = vget_lane_f32(accumV2, 0) ;
blkCnt = (blockSize - 4) & 3;
@@ -211,7 +211,7 @@ float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize)
}
accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
- accum = accumV2[0] + accumV2[1];
+ accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
blkCnt = blockSize & 0x3;
while(blkCnt > 0)
diff --git a/Source/StatisticsFunctions/arm_max_f32.c b/Source/StatisticsFunctions/arm_max_f32.c
index 18520cdf..677ad1dc 100644
--- a/Source/StatisticsFunctions/arm_max_f32.c
+++ b/Source/StatisticsFunctions/arm_max_f32.c
@@ -150,20 +150,26 @@ void arm_max_f32(
uint32_t * pIndex)
{
float32_t maxVal1, out; /* Temporary variables to store the output value. */
- uint32_t blkCnt, outIndex, count; /* loop counter */
+ uint32_t blkCnt, outIndex; /* loop counter */
float32x4_t outV, srcV;
float32x2_t outV2;
uint32x4_t idxV;
- uint32x4_t maxIdx={ULONG_MAX,ULONG_MAX,ULONG_MAX,ULONG_MAX};
- uint32x4_t index={4,5,6,7};
- uint32x4_t delta={4,4,4,4};
- uint32x4_t countV={0,1,2,3};
+ uint32x4_t maxIdx;
+ static const uint32_t indexInit[4]={4,5,6,7};
+ static const uint32_t countVInit[4]={0,1,2,3};
+
+ uint32x4_t index;
+ uint32x4_t delta;
+ uint32x4_t countV;
uint32x2_t countV2;
- /* Initialise the count value. */
- count = 0U;
+ maxIdx = vdupq_n_u32(ULONG_MAX);
+ delta = vdupq_n_u32(4);
+ index = vld1q_u32(indexInit);
+ countV = vld1q_u32(countVInit);
+
/* Initialise the index value to zero. */
outIndex = 0U;
@@ -217,14 +223,14 @@ void arm_max_f32(
outV2 = vpmax_f32(vget_low_f32(outV),vget_high_f32(outV));
outV2 = vpmax_f32(outV2,outV2);
- out = outV2[0];
+ out = vget_lane_f32(outV2, 0);
idxV = vceqq_f32(outV, vdupq_n_f32(out));
countV = vbslq_u32(idxV, countV,maxIdx);
countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
countV2 = vpmin_u32(countV2,countV2);
- outIndex = countV2[0];
+ outIndex = vget_lane_u32(countV2,0);
/* if (blockSize - 1U) is not multiple of 4 */
blkCnt = (blockSize - 4 ) % 4U;
diff --git a/Source/StatisticsFunctions/arm_max_no_idx_f32.c b/Source/StatisticsFunctions/arm_max_no_idx_f32.c
index 76fe44ef..ccb31c71 100755
--- a/Source/StatisticsFunctions/arm_max_no_idx_f32.c
+++ b/Source/StatisticsFunctions/arm_max_no_idx_f32.c
@@ -135,4 +135,4 @@ void arm_max_no_idx_f32(
/**
@} end of Max group
- */
\ No newline at end of file
+ */
diff --git a/Source/StatisticsFunctions/arm_min_f32.c b/Source/StatisticsFunctions/arm_min_f32.c
index 8b789d2d..7a4d2ab2 100644
--- a/Source/StatisticsFunctions/arm_min_f32.c
+++ b/Source/StatisticsFunctions/arm_min_f32.c
@@ -152,20 +152,24 @@ void arm_min_f32(
uint32_t * pIndex)
{
float32_t maxVal1, out; /* Temporary variables to store the output value. */
- uint32_t blkCnt, outIndex, count; /* loop counter */
+ uint32_t blkCnt, outIndex; /* loop counter */
float32x4_t outV, srcV;
float32x2_t outV2;
uint32x4_t idxV;
- uint32x4_t maxIdx={ULONG_MAX,ULONG_MAX,ULONG_MAX,ULONG_MAX};
- uint32x4_t index={4,5,6,7};
- uint32x4_t delta={4,4,4,4};
- uint32x4_t countV={0,1,2,3};
+ static const uint32_t indexInit[4]={4,5,6,7};
+ static const uint32_t countVInit[4]={0,1,2,3};
+ uint32x4_t maxIdx;
+ uint32x4_t index;
+ uint32x4_t delta;
+ uint32x4_t countV;
uint32x2_t countV2;
- /* Initialise the count value. */
- count = 0U;
+ maxIdx = vdupq_n_u32(ULONG_MAX);
+ delta = vdupq_n_u32(4);
+ index = vld1q_u32(indexInit);
+ countV = vld1q_u32(countVInit);
/* Initialise the index value to zero. */
outIndex = 0U;
@@ -219,14 +223,14 @@ void arm_min_f32(
outV2 = vpmin_f32(vget_low_f32(outV),vget_high_f32(outV));
outV2 = vpmin_f32(outV2,outV2);
- out = outV2[0];
+ out = vget_lane_f32(outV2,0);
idxV = vceqq_f32(outV, vdupq_n_f32(out));
countV = vbslq_u32(idxV, countV,maxIdx);
countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
countV2 = vpmin_u32(countV2,countV2);
- outIndex = countV2[0];
+ outIndex = vget_lane_u32(countV2,0);
/* if (blockSize - 1U) is not multiple of 4 */
blkCnt = (blockSize - 4 ) % 4U;
diff --git a/Source/StatisticsFunctions/arm_power_f32.c b/Source/StatisticsFunctions/arm_power_f32.c
index 3acbf2be..afe40cd4 100644
--- a/Source/StatisticsFunctions/arm_power_f32.c
+++ b/Source/StatisticsFunctions/arm_power_f32.c
@@ -136,7 +136,7 @@ void arm_power_f32(
blkCnt--;
}
sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
- sum = sumV2[0] + sumV2[1];
+ sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
diff --git a/Source/StatisticsFunctions/arm_rms_f32.c b/Source/StatisticsFunctions/arm_rms_f32.c
index 118f5393..7f26aff2 100644
--- a/Source/StatisticsFunctions/arm_rms_f32.c
+++ b/Source/StatisticsFunctions/arm_rms_f32.c
@@ -103,7 +103,7 @@ void arm_rms_f32(
}
sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
- sum = sumV2[0] + sumV2[1];
+ sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
diff --git a/Source/SupportFunctions/arm_bitonic_sort_f32.c b/Source/SupportFunctions/arm_bitonic_sort_f32.c
index 6f83e306..10267056 100644
--- a/Source/SupportFunctions/arm_bitonic_sort_f32.c
+++ b/Source/SupportFunctions/arm_bitonic_sort_f32.c
@@ -834,47 +834,41 @@ static void arm_bitonic_merge_256_f32(float32_t * pSrc, uint8_t dir)
arm_bitonic_merge_128_f32(pSrc+128, dir);
}
+#define SWAP(a,i,j) \
+ temp = vgetq_lane_f32(a, j); \
+ a = vsetq_lane_f32(vgetq_lane_f32(a, i), a, j);\
+ a = vsetq_lane_f32(temp, a, i);
+
static float32x4_t arm_bitonic_sort_4_f32(float32x4_t a, uint8_t dir)
{
float32_t temp;
- if( dir==(a[0]>a[1]) )
+
+ if( dir==(vgetq_lane_f32(a, 0) > vgetq_lane_f32(a, 1)) )
{
- temp = a[1];
- a[1] = a[0];
- a[0] = temp;
+ SWAP(a,0,1);
}
- if( dir==(a[2]>a[3]) )
+ if( dir==(vgetq_lane_f32(a, 2) > vgetq_lane_f32(a, 3)) )
{
- temp = a[3];
- a[3] = a[2];
- a[2] = temp;
+ SWAP(a,2,3);
}
- if( dir==(a[0]>a[3]) )
+ if( dir==(vgetq_lane_f32(a, 0) > vgetq_lane_f32(a, 3)) )
{
- temp = a[3];
- a[3] = a[0];
- a[0] = temp;
+ SWAP(a,0,3);
}
- if( dir==(a[1]>a[2]) )
+ if( dir==(vgetq_lane_f32(a, 1) > vgetq_lane_f32(a, 2)) )
{
- temp = a[2];
- a[2] = a[1];
- a[1] = temp;
+ SWAP(a,1,2);
}
- if( dir==(a[0]>a[1]) )
+ if( dir==(vgetq_lane_f32(a, 0) > vgetq_lane_f32(a, 1)) )
{
- temp = a[1];
- a[1] = a[0];
- a[0] = temp;
+ SWAP(a,0,1);
}
- if( dir==(a[2]>a[3]) )
+ if( dir==(vgetq_lane_f32(a, 2)>vgetq_lane_f32(a, 3)) )
{
- temp = a[3];
- a[3] = a[2];
- a[2] = temp;
+ SWAP(a,2,3);
}
return a;
diff --git a/Source/SupportFunctions/arm_spline_interp_f32.c b/Source/SupportFunctions/arm_spline_interp_f32.c
index cfbf3763..3ec7c642 100644
--- a/Source/SupportFunctions/arm_spline_interp_f32.c
+++ b/Source/SupportFunctions/arm_spline_interp_f32.c
@@ -157,6 +157,11 @@ void arm_spline_f32(
float32_t * pDst,
uint32_t blockSize)
{
+ /*
+
+ As explained in arm_spline_interp_init_f32.c, this must be > 1
+
+ */
int32_t n = S->n_x;
arm_spline_type type = S->type;
@@ -169,6 +174,9 @@ void arm_spline_f32(
float32_t bi, di;
float32_t x_sc;
+ bi = 0.0f;
+ di = 0.0f;
+
const float32_t * pXq = xq;
int32_t blkCnt = (int32_t)blockSize;
diff --git a/Source/SupportFunctions/arm_spline_interp_init_f32.c b/Source/SupportFunctions/arm_spline_interp_init_f32.c
index f9b5ee91..e014f201 100644
--- a/Source/SupportFunctions/arm_spline_interp_init_f32.c
+++ b/Source/SupportFunctions/arm_spline_interp_init_f32.c
@@ -40,7 +40,7 @@
/**
* @brief Initialization function for the floating-point cubic spline interpolation.
* @param[in,out] S points to an instance of the floating-point spline structure.
- * @param[in] n number of known data points.
+ * @param[in] n number of known data points (must > 1).
* @param[in] type type of cubic spline interpolation (boundary conditions)
*/
diff --git a/Source/SupportFunctions/arm_weighted_sum_f32.c b/Source/SupportFunctions/arm_weighted_sum_f32.c
index 405c6c70..ac3d4819 100755
--- a/Source/SupportFunctions/arm_weighted_sum_f32.c
+++ b/Source/SupportFunctions/arm_weighted_sum_f32.c
@@ -136,10 +136,10 @@ float32_t arm_weighted_sum_f32(const float32_t *in,const float32_t *weigths, uin
}
tempV = vpadd_f32(vget_low_f32(accum1V),vget_high_f32(accum1V));
- accum1 = tempV[0] + tempV[1];
+ accum1 = vget_lane_f32(tempV, 0) + vget_lane_f32(tempV, 1);
tempV = vpadd_f32(vget_low_f32(accum2V),vget_high_f32(accum2V));
- accum2 = tempV[0] + tempV[1];
+ accum2 = vget_lane_f32(tempV, 0) + vget_lane_f32(tempV, 1);
blkCnt = blockSize & 3;
while(blkCnt > 0)
diff --git a/Testing/FrameworkSource/Error.cpp b/Testing/FrameworkSource/Error.cpp
index fedddc22..c621c8fd 100644
--- a/Testing/FrameworkSource/Error.cpp
+++ b/Testing/FrameworkSource/Error.cpp
@@ -25,9 +25,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-#include "arm_math.h"
-#include "Error.h"
#include
+#include
+#include "Error.h"
+#include "arm_math.h"
namespace Client {
diff --git a/Testing/FrameworkSource/IORunner.cpp b/Testing/FrameworkSource/IORunner.cpp
index 4ebd0436..3c9e62e9 100644
--- a/Testing/FrameworkSource/IORunner.cpp
+++ b/Testing/FrameworkSource/IORunner.cpp
@@ -33,6 +33,7 @@
#include
#include
#include
+#include
#include "IORunner.h"
#include "Error.h"
#include "Timing.h"
diff --git a/Testing/RTE_Components.h b/Testing/RTE_Components.h
index db747a66..81b7e8dc 100644
--- a/Testing/RTE_Components.h
+++ b/Testing/RTE_Components.h
@@ -2,4 +2,4 @@
#define RTE_COMPONENTS_H
-#endif /* RTE_COMPONENTS_H */
\ No newline at end of file
+#endif /* RTE_COMPONENTS_H */
diff --git a/Testing/Source/Tests/BIQUADF32.cpp b/Testing/Source/Tests/BIQUADF32.cpp
index a34b4d4a..7486ad8d 100755
--- a/Testing/Source/Tests/BIQUADF32.cpp
+++ b/Testing/Source/Tests/BIQUADF32.cpp
@@ -1,4 +1,5 @@
#include "BIQUADF32.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 98
@@ -31,10 +32,7 @@ static __ALIGNED(8) float32_t coeffArray[32];
arm_biquad_mod_coef_f32 *coefsmodp = (arm_biquad_mod_coef_f32*)vecCoefs.ptr();
#endif
- int i,j;
int blockSize;
- int numTaps;
- int nb=0;
@@ -48,7 +46,6 @@ static __ALIGNED(8) float32_t coeffArray[32];
*/
blockSize = inputs.nbSamples() >> 1;
- numTaps = coefs.nbSamples();
/*
@@ -107,10 +104,7 @@ static __ALIGNED(8) float32_t coeffArray[32];
const float32_t *inputp = inputs.ptr();
float32_t *outp = output.ptr();
- int i,j;
int blockSize;
- int numTaps;
- int nb=0;
@@ -124,7 +118,6 @@ static __ALIGNED(8) float32_t coeffArray[32];
*/
blockSize = inputs.nbSamples() >> 1;
- numTaps = coefs.nbSamples();
/*
@@ -189,7 +182,6 @@ static __ALIGNED(8) float32_t coeffArray[32];
int blockSize;
int numStages;
- int nb=0;
int i;
@@ -267,7 +259,6 @@ static __ALIGNED(8) float32_t coeffArray[32];
int blockSize;
int numStages;
- int nb=0;
int i;
@@ -350,7 +341,6 @@ static __ALIGNED(8) float32_t coeffArray[32];
int blockSize;
int numStages;
- int nb=0;
int i;
@@ -410,8 +400,6 @@ static __ALIGNED(8) float32_t coeffArray[32];
void BIQUADF32::setUp(Testing::testID_t id,std::vector& params,Client::PatternMgr *mgr)
{
- Testing::nbSamples_t nb=MAX_NB_SAMPLES;
-
switch(id)
{
diff --git a/Testing/Source/Tests/BIQUADF64.cpp b/Testing/Source/Tests/BIQUADF64.cpp
index 6faa3486..8c64ee0f 100755
--- a/Testing/Source/Tests/BIQUADF64.cpp
+++ b/Testing/Source/Tests/BIQUADF64.cpp
@@ -1,4 +1,5 @@
#include "BIQUADF64.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 98
@@ -28,10 +29,7 @@ static __ALIGNED(8) float64_t coeffArray[64];
float64_t *inputp = inputs.ptr();
float64_t *outp = output.ptr();
- int i,j;
int blockSize;
- int numTaps;
- int nb=0;
@@ -45,7 +43,6 @@ static __ALIGNED(8) float64_t coeffArray[64];
*/
blockSize = inputs.nbSamples() >> 1;
- numTaps = coefs.nbSamples();
/*
@@ -98,7 +95,6 @@ static __ALIGNED(8) float64_t coeffArray[64];
int blockSize;
int numStages;
- int nb=0;
int i;
@@ -158,8 +154,6 @@ static __ALIGNED(8) float64_t coeffArray[64];
void BIQUADF64::setUp(Testing::testID_t id,std::vector& params,Client::PatternMgr *mgr)
{
- Testing::nbSamples_t nb=MAX_NB_SAMPLES;
-
switch(id)
{
diff --git a/Testing/Source/Tests/BIQUADQ15.cpp b/Testing/Source/Tests/BIQUADQ15.cpp
index b0985284..5b2625d0 100755
--- a/Testing/Source/Tests/BIQUADQ15.cpp
+++ b/Testing/Source/Tests/BIQUADQ15.cpp
@@ -1,4 +1,5 @@
#include "BIQUADQ15.h"
+#include
#include "Error.h"
@@ -26,10 +27,7 @@ static __ALIGNED(8) q15_t coeffArray[32];
const q15_t *inputp = inputs.ptr();
q15_t *outp = output.ptr();
- int i,j;
int blockSize;
- int numTaps;
- int nb=0;
@@ -43,7 +41,6 @@ static __ALIGNED(8) q15_t coeffArray[32];
*/
blockSize = inputs.nbSamples() >> 1;
- numTaps = coefs.nbSamples();
/*
@@ -85,7 +82,6 @@ static __ALIGNED(8) q15_t coeffArray[32];
void BIQUADQ15::setUp(Testing::testID_t id,std::vector& params,Client::PatternMgr *mgr)
{
- Testing::nbSamples_t nb=MAX_NB_SAMPLES;
switch(id)
diff --git a/Testing/Source/Tests/BIQUADQ31.cpp b/Testing/Source/Tests/BIQUADQ31.cpp
index c9bf9a14..14dc4586 100755
--- a/Testing/Source/Tests/BIQUADQ31.cpp
+++ b/Testing/Source/Tests/BIQUADQ31.cpp
@@ -1,4 +1,5 @@
#include "BIQUADQ31.h"
+#include
#include "Error.h"
@@ -24,10 +25,7 @@ static __ALIGNED(8) q31_t coeffArray[32];
const q31_t *inputp = inputs.ptr();
q31_t *outp = output.ptr();
- int i,j;
int blockSize;
- int numTaps;
- int nb=0;
@@ -41,7 +39,6 @@ static __ALIGNED(8) q31_t coeffArray[32];
*/
blockSize = inputs.nbSamples() >> 1;
- numTaps = coefs.nbSamples();
/*
@@ -87,10 +84,7 @@ static __ALIGNED(8) q31_t coeffArray[32];
q31_t *inputp = inputs.ptr();
q31_t *outp = output.ptr();
- int i,j;
int blockSize;
- int numTaps;
- int nb=0;
@@ -104,7 +98,6 @@ static __ALIGNED(8) q31_t coeffArray[32];
*/
blockSize = inputs.nbSamples() >> 1;
- numTaps = coefs.nbSamples();
/*
@@ -145,8 +138,6 @@ static __ALIGNED(8) q31_t coeffArray[32];
void BIQUADQ31::setUp(Testing::testID_t id,std::vector& params,Client::PatternMgr *mgr)
{
- Testing::nbSamples_t nb=MAX_NB_SAMPLES;
-
switch(id)
{
diff --git a/Testing/Source/Tests/BasicTestsF32.cpp b/Testing/Source/Tests/BasicTestsF32.cpp
index a447caec..a00dec56 100644
--- a/Testing/Source/Tests/BasicTestsF32.cpp
+++ b/Testing/Source/Tests/BasicTestsF32.cpp
@@ -1,4 +1,5 @@
#include "BasicTestsF32.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 120
diff --git a/Testing/Source/Tests/BasicTestsQ15.cpp b/Testing/Source/Tests/BasicTestsQ15.cpp
index df24ed71..9369dd6f 100755
--- a/Testing/Source/Tests/BasicTestsQ15.cpp
+++ b/Testing/Source/Tests/BasicTestsQ15.cpp
@@ -1,4 +1,5 @@
#include "BasicTestsQ15.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 70
diff --git a/Testing/Source/Tests/BasicTestsQ31.cpp b/Testing/Source/Tests/BasicTestsQ31.cpp
index cc322ed6..bae5d4d9 100755
--- a/Testing/Source/Tests/BasicTestsQ31.cpp
+++ b/Testing/Source/Tests/BasicTestsQ31.cpp
@@ -1,4 +1,5 @@
#include "BasicTestsQ31.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 100
diff --git a/Testing/Source/Tests/BasicTestsQ7.cpp b/Testing/Source/Tests/BasicTestsQ7.cpp
index 83cc0b79..8c6474dc 100755
--- a/Testing/Source/Tests/BasicTestsQ7.cpp
+++ b/Testing/Source/Tests/BasicTestsQ7.cpp
@@ -1,4 +1,5 @@
#include "BasicTestsQ7.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 20
diff --git a/Testing/Source/Tests/BayesF32.cpp b/Testing/Source/Tests/BayesF32.cpp
index 656d7e6a..bb109454 100755
--- a/Testing/Source/Tests/BayesF32.cpp
+++ b/Testing/Source/Tests/BayesF32.cpp
@@ -1,9 +1,9 @@
#include "BayesF32.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "Test.h"
-#include
void BayesF32::test_gaussian_naive_bayes_predict_f32()
diff --git a/Testing/Source/Tests/BinaryTestsF32.cpp b/Testing/Source/Tests/BinaryTestsF32.cpp
index 10a00f54..ef9bd85d 100755
--- a/Testing/Source/Tests/BinaryTestsF32.cpp
+++ b/Testing/Source/Tests/BinaryTestsF32.cpp
@@ -1,4 +1,5 @@
#include "BinaryTestsF32.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 120
diff --git a/Testing/Source/Tests/BinaryTestsQ15.cpp b/Testing/Source/Tests/BinaryTestsQ15.cpp
index 1f16ab38..67cc38af 100755
--- a/Testing/Source/Tests/BinaryTestsQ15.cpp
+++ b/Testing/Source/Tests/BinaryTestsQ15.cpp
@@ -1,4 +1,5 @@
#include "BinaryTestsQ15.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 70
diff --git a/Testing/Source/Tests/BinaryTestsQ31.cpp b/Testing/Source/Tests/BinaryTestsQ31.cpp
index 31b88214..0701748e 100755
--- a/Testing/Source/Tests/BinaryTestsQ31.cpp
+++ b/Testing/Source/Tests/BinaryTestsQ31.cpp
@@ -1,4 +1,5 @@
#include "BinaryTestsQ31.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 100
diff --git a/Testing/Source/Tests/ComplexTestsF32.cpp b/Testing/Source/Tests/ComplexTestsF32.cpp
index c77ae7f9..e0b91888 100755
--- a/Testing/Source/Tests/ComplexTestsF32.cpp
+++ b/Testing/Source/Tests/ComplexTestsF32.cpp
@@ -1,4 +1,5 @@
#include "ComplexTestsF32.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 120
diff --git a/Testing/Source/Tests/ComplexTestsQ15.cpp b/Testing/Source/Tests/ComplexTestsQ15.cpp
index a9bff1a2..2e3bfa76 100755
--- a/Testing/Source/Tests/ComplexTestsQ15.cpp
+++ b/Testing/Source/Tests/ComplexTestsQ15.cpp
@@ -1,4 +1,5 @@
#include "ComplexTestsQ15.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 25
diff --git a/Testing/Source/Tests/ComplexTestsQ31.cpp b/Testing/Source/Tests/ComplexTestsQ31.cpp
index 86c03811..e76b3bb5 100755
--- a/Testing/Source/Tests/ComplexTestsQ31.cpp
+++ b/Testing/Source/Tests/ComplexTestsQ31.cpp
@@ -1,4 +1,5 @@
#include "ComplexTestsQ31.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 100
diff --git a/Testing/Source/Tests/DistanceTestsF32.cpp b/Testing/Source/Tests/DistanceTestsF32.cpp
index c1e21ec7..b75beeef 100755
--- a/Testing/Source/Tests/DistanceTestsF32.cpp
+++ b/Testing/Source/Tests/DistanceTestsF32.cpp
@@ -1,9 +1,9 @@
#include "DistanceTestsF32.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "Test.h"
-#include
void DistanceTestsF32::test_braycurtis_distance_f32()
diff --git a/Testing/Source/Tests/DistanceTestsU32.cpp b/Testing/Source/Tests/DistanceTestsU32.cpp
index c31c6417..877b4db1 100755
--- a/Testing/Source/Tests/DistanceTestsU32.cpp
+++ b/Testing/Source/Tests/DistanceTestsU32.cpp
@@ -1,10 +1,9 @@
#include "DistanceTestsU32.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "Test.h"
-#include
-
#define ERROR_THRESHOLD 1e-8
void DistanceTestsU32::test_dice_distance()
diff --git a/Testing/Source/Tests/ExampleCategoryF32.cpp b/Testing/Source/Tests/ExampleCategoryF32.cpp
index ef38b936..249204ea 100755
--- a/Testing/Source/Tests/ExampleCategoryF32.cpp
+++ b/Testing/Source/Tests/ExampleCategoryF32.cpp
@@ -1,4 +1,5 @@
#include "ExampleCategoryF32.h"
+#include
#include "Error.h"
/*
diff --git a/Testing/Source/Tests/ExampleCategoryQ15.cpp b/Testing/Source/Tests/ExampleCategoryQ15.cpp
index 0a60e701..f88b926b 100755
--- a/Testing/Source/Tests/ExampleCategoryQ15.cpp
+++ b/Testing/Source/Tests/ExampleCategoryQ15.cpp
@@ -1,4 +1,5 @@
#include "ExampleCategoryQ15.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 70
diff --git a/Testing/Source/Tests/ExampleCategoryQ31.cpp b/Testing/Source/Tests/ExampleCategoryQ31.cpp
index 478e851d..50a4724c 100755
--- a/Testing/Source/Tests/ExampleCategoryQ31.cpp
+++ b/Testing/Source/Tests/ExampleCategoryQ31.cpp
@@ -1,4 +1,5 @@
#include "ExampleCategoryQ31.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 100
diff --git a/Testing/Source/Tests/ExampleCategoryQ7.cpp b/Testing/Source/Tests/ExampleCategoryQ7.cpp
index e598aec0..e4ba59f8 100755
--- a/Testing/Source/Tests/ExampleCategoryQ7.cpp
+++ b/Testing/Source/Tests/ExampleCategoryQ7.cpp
@@ -1,4 +1,5 @@
#include "ExampleCategoryQ7.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 20
diff --git a/Testing/Source/Tests/FIRF32.cpp b/Testing/Source/Tests/FIRF32.cpp
index 1bf9c418..5523a48f 100644
--- a/Testing/Source/Tests/FIRF32.cpp
+++ b/Testing/Source/Tests/FIRF32.cpp
@@ -1,4 +1,5 @@
#include "FIRF32.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 120
@@ -27,10 +28,12 @@ static __ALIGNED(8) float32_t coeffArray[32];
const float32_t *inputp = inputs.ptr();
float32_t *outp = output.ptr();
- int i,j;
+ int i;
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+ int j;
+#endif
int blockSize;
int numTaps;
- int nb=0;
@@ -93,7 +96,6 @@ static __ALIGNED(8) float32_t coeffArray[32];
configp += 2;
orgcoefsp += numTaps;
- nb += 2*blockSize;
}
@@ -109,7 +111,6 @@ static __ALIGNED(8) float32_t coeffArray[32];
void FIRF32::setUp(Testing::testID_t id,std::vector& params,Client::PatternMgr *mgr)
{
- Testing::nbSamples_t nb=MAX_NB_SAMPLES;
switch(id)
diff --git a/Testing/Source/Tests/FIRQ15.cpp b/Testing/Source/Tests/FIRQ15.cpp
index 10f9eb85..052f219d 100644
--- a/Testing/Source/Tests/FIRQ15.cpp
+++ b/Testing/Source/Tests/FIRQ15.cpp
@@ -1,4 +1,5 @@
#include "FIRQ15.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 59
@@ -21,10 +22,12 @@ static __ALIGNED(8) q15_t coeffArray[32];
const q15_t *inputp = inputs.ptr();
q15_t *outp = output.ptr();
- int i,j;
+ int i;
+#if defined(ARM_MATH_MVEI)
+ int j;
+#endif
int blockSize;
int numTaps;
- int nb=0;
/*
@@ -86,7 +89,6 @@ static __ALIGNED(8) q15_t coeffArray[32];
configp += 2;
orgcoefsp += numTaps;
- nb += 2*blockSize;
}
@@ -102,8 +104,6 @@ static __ALIGNED(8) q15_t coeffArray[32];
void FIRQ15::setUp(Testing::testID_t id,std::vector& params,Client::PatternMgr *mgr)
{
- Testing::nbSamples_t nb=MAX_NB_SAMPLES;
-
switch(id)
{
diff --git a/Testing/Source/Tests/FIRQ31.cpp b/Testing/Source/Tests/FIRQ31.cpp
index 16832077..c3d4a405 100644
--- a/Testing/Source/Tests/FIRQ31.cpp
+++ b/Testing/Source/Tests/FIRQ31.cpp
@@ -1,4 +1,5 @@
#include "FIRQ31.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 100
@@ -20,10 +21,12 @@ static __ALIGNED(8) q31_t coeffArray[32];
const q31_t *inputp = inputs.ptr();
q31_t *outp = output.ptr();
- int i,j;
+ int i;
+#if defined(ARM_MATH_MVEI)
+ int j;
+#endif
int blockSize;
int numTaps;
- int nb=0;
/*
@@ -83,7 +86,6 @@ static __ALIGNED(8) q31_t coeffArray[32];
configp += 2;
orgcoefsp += numTaps;
- nb += 2*blockSize;
}
@@ -99,8 +101,6 @@ static __ALIGNED(8) q31_t coeffArray[32];
void FIRQ31::setUp(Testing::testID_t id,std::vector& params,Client::PatternMgr *mgr)
{
- Testing::nbSamples_t nb=MAX_NB_SAMPLES;
-
switch(id)
{
diff --git a/Testing/Source/Tests/FIRQ7.cpp b/Testing/Source/Tests/FIRQ7.cpp
index e079389e..329d37c9 100644
--- a/Testing/Source/Tests/FIRQ7.cpp
+++ b/Testing/Source/Tests/FIRQ7.cpp
@@ -1,4 +1,5 @@
#include "FIRQ7.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 10
@@ -21,7 +22,10 @@ static __ALIGNED(8) q7_t coeffArray[32];
const q7_t *inputp = inputs.ptr();
q7_t *outp = output.ptr();
- int i,j;
+ int i;
+#if defined(ARM_MATH_MVEI)
+ int j;
+#endif
int blockSize;
int numTaps;
@@ -97,8 +101,6 @@ static __ALIGNED(8) q7_t coeffArray[32];
void FIRQ7::setUp(Testing::testID_t id,std::vector& params,Client::PatternMgr *mgr)
{
- Testing::nbSamples_t nb=MAX_NB_SAMPLES;
-
switch(id)
{
diff --git a/Testing/Source/Tests/FastMathF32.cpp b/Testing/Source/Tests/FastMathF32.cpp
index 7c5cbd0a..d5ec8627 100755
--- a/Testing/Source/Tests/FastMathF32.cpp
+++ b/Testing/Source/Tests/FastMathF32.cpp
@@ -1,10 +1,10 @@
#include "FastMathF32.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "arm_vec_math.h"
#include "Test.h"
-#include
#define SNR_THRESHOLD 120
/*
diff --git a/Testing/Source/Tests/FastMathQ15.cpp b/Testing/Source/Tests/FastMathQ15.cpp
index 270dfd00..977faaa7 100755
--- a/Testing/Source/Tests/FastMathQ15.cpp
+++ b/Testing/Source/Tests/FastMathQ15.cpp
@@ -1,9 +1,9 @@
#include "FastMathQ15.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "Test.h"
-#include
#define SNR_THRESHOLD 70
/*
diff --git a/Testing/Source/Tests/FastMathQ31.cpp b/Testing/Source/Tests/FastMathQ31.cpp
index a9c160b3..0de59869 100755
--- a/Testing/Source/Tests/FastMathQ31.cpp
+++ b/Testing/Source/Tests/FastMathQ31.cpp
@@ -1,9 +1,9 @@
#include "FastMathQ31.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "Test.h"
-#include
#define SNR_THRESHOLD 100
/*
diff --git a/Testing/Source/Tests/FullyConnected.cpp b/Testing/Source/Tests/FullyConnected.cpp
index 70593c46..231d3181 100644
--- a/Testing/Source/Tests/FullyConnected.cpp
+++ b/Testing/Source/Tests/FullyConnected.cpp
@@ -1,10 +1,11 @@
#include "FullyConnected.h"
+#include
#include "Error.h"
#include "arm_nnfunctions.h"
#include "Test.h"
#include "stdio.h"
-#include
+
void printPattern(char *s,Client::AnyPattern pat)
{
diff --git a/Testing/Source/Tests/MISCF32.cpp b/Testing/Source/Tests/MISCF32.cpp
index b25d0207..2920b25b 100755
--- a/Testing/Source/Tests/MISCF32.cpp
+++ b/Testing/Source/Tests/MISCF32.cpp
@@ -1,11 +1,10 @@
#include "MISCF32.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "arm_vec_math.h"
#include "Test.h"
-#include
-
#define SNR_THRESHOLD 120
/*
diff --git a/Testing/Source/Tests/MISCQ15.cpp b/Testing/Source/Tests/MISCQ15.cpp
index 6f6e0305..96e1b866 100755
--- a/Testing/Source/Tests/MISCQ15.cpp
+++ b/Testing/Source/Tests/MISCQ15.cpp
@@ -1,11 +1,10 @@
#include "MISCQ15.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "arm_vec_math.h"
#include "Test.h"
-#include
-
#define SNR_THRESHOLD 70
/*
diff --git a/Testing/Source/Tests/MISCQ31.cpp b/Testing/Source/Tests/MISCQ31.cpp
index 9f538606..d56f4f3f 100755
--- a/Testing/Source/Tests/MISCQ31.cpp
+++ b/Testing/Source/Tests/MISCQ31.cpp
@@ -1,11 +1,10 @@
#include "MISCQ31.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "arm_vec_math.h"
#include "Test.h"
-#include
-
#define SNR_THRESHOLD 100
/*
diff --git a/Testing/Source/Tests/MISCQ7.cpp b/Testing/Source/Tests/MISCQ7.cpp
index 11bf6558..6b496521 100755
--- a/Testing/Source/Tests/MISCQ7.cpp
+++ b/Testing/Source/Tests/MISCQ7.cpp
@@ -1,11 +1,10 @@
#include "MISCQ7.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "arm_vec_math.h"
#include "Test.h"
-#include
-
#define SNR_THRESHOLD 15
/*
diff --git a/Testing/Source/Tests/NNSupport.cpp b/Testing/Source/Tests/NNSupport.cpp
index 550d96d9..4e87792f 100755
--- a/Testing/Source/Tests/NNSupport.cpp
+++ b/Testing/Source/Tests/NNSupport.cpp
@@ -1,10 +1,9 @@
#include "NNSupport.h"
+#include
#include "Error.h"
#include "arm_nnfunctions.h"
#include "Test.h"
-#include
-
void NNSupport::test_nn_elementwise_add_s8()
diff --git a/Testing/Source/Tests/Pooling.cpp b/Testing/Source/Tests/Pooling.cpp
index a1bd9f50..0b144efb 100755
--- a/Testing/Source/Tests/Pooling.cpp
+++ b/Testing/Source/Tests/Pooling.cpp
@@ -1,10 +1,9 @@
#include "Pooling.h"
+#include
#include "Error.h"
#include "arm_nnfunctions.h"
#include "Test.h"
-#include
-
void Pooling::test_avgpool_s8()
{
diff --git a/Testing/Source/Tests/SVMF32.cpp b/Testing/Source/Tests/SVMF32.cpp
index 939fe157..95dda2c3 100755
--- a/Testing/Source/Tests/SVMF32.cpp
+++ b/Testing/Source/Tests/SVMF32.cpp
@@ -1,4 +1,5 @@
#include "SVMF32.h"
+#include
#include "Error.h"
diff --git a/Testing/Source/Tests/Softmax.cpp b/Testing/Source/Tests/Softmax.cpp
index 10019334..a979ad24 100755
--- a/Testing/Source/Tests/Softmax.cpp
+++ b/Testing/Source/Tests/Softmax.cpp
@@ -1,10 +1,9 @@
#include "Softmax.h"
+#include
#include "Error.h"
#include "arm_nnfunctions.h"
#include "Test.h"
-#include
-
/*
Tests have shown that, compared to a float32 implementation
diff --git a/Testing/Source/Tests/StatsTestsF32.cpp b/Testing/Source/Tests/StatsTestsF32.cpp
index e1320095..7de2b932 100755
--- a/Testing/Source/Tests/StatsTestsF32.cpp
+++ b/Testing/Source/Tests/StatsTestsF32.cpp
@@ -1,9 +1,9 @@
#include "StatsTestsF32.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "Test.h"
-#include
#define SNR_THRESHOLD 120
/*
diff --git a/Testing/Source/Tests/StatsTestsQ15.cpp b/Testing/Source/Tests/StatsTestsQ15.cpp
index 3b697be8..b02dea32 100755
--- a/Testing/Source/Tests/StatsTestsQ15.cpp
+++ b/Testing/Source/Tests/StatsTestsQ15.cpp
@@ -1,5 +1,6 @@
#include "arm_math.h"
#include "StatsTestsQ15.h"
+#include
#include "Error.h"
#include "Test.h"
diff --git a/Testing/Source/Tests/StatsTestsQ31.cpp b/Testing/Source/Tests/StatsTestsQ31.cpp
index 0e105106..900aef3a 100755
--- a/Testing/Source/Tests/StatsTestsQ31.cpp
+++ b/Testing/Source/Tests/StatsTestsQ31.cpp
@@ -1,5 +1,6 @@
#include "arm_math.h"
#include "StatsTestsQ31.h"
+#include
#include "Error.h"
#include "Test.h"
diff --git a/Testing/Source/Tests/StatsTestsQ7.cpp b/Testing/Source/Tests/StatsTestsQ7.cpp
index f3f521dc..895a93a2 100755
--- a/Testing/Source/Tests/StatsTestsQ7.cpp
+++ b/Testing/Source/Tests/StatsTestsQ7.cpp
@@ -1,5 +1,6 @@
#include "arm_math.h"
#include "StatsTestsQ7.h"
+#include
#include "Error.h"
#include "Test.h"
diff --git a/Testing/Source/Tests/SupportBarTestsF32.cpp b/Testing/Source/Tests/SupportBarTestsF32.cpp
index f3914cdc..33bdb292 100755
--- a/Testing/Source/Tests/SupportBarTestsF32.cpp
+++ b/Testing/Source/Tests/SupportBarTestsF32.cpp
@@ -1,10 +1,9 @@
#include "SupportBarTestsF32.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "Test.h"
-#include
-
void SupportBarTestsF32::test_barycenter_f32()
{
diff --git a/Testing/Source/Tests/SupportTestsF32.cpp b/Testing/Source/Tests/SupportTestsF32.cpp
index 736d663b..58468372 100755
--- a/Testing/Source/Tests/SupportTestsF32.cpp
+++ b/Testing/Source/Tests/SupportTestsF32.cpp
@@ -1,10 +1,9 @@
#include "SupportTestsF32.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "Test.h"
-#include
-
#define SNR_THRESHOLD 120
#define REL_ERROR (1.0e-5)
#define ABS_Q15_ERROR ((q15_t)10)
diff --git a/Testing/Source/Tests/SupportTestsQ15.cpp b/Testing/Source/Tests/SupportTestsQ15.cpp
index 2610e838..baf7a40a 100755
--- a/Testing/Source/Tests/SupportTestsQ15.cpp
+++ b/Testing/Source/Tests/SupportTestsQ15.cpp
@@ -1,10 +1,9 @@
#include "SupportTestsQ15.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "Test.h"
-#include
-
#define SNR_THRESHOLD 120
#define REL_ERROR (1.0e-3)
#define ABS_Q15_ERROR ((q15_t)10)
diff --git a/Testing/Source/Tests/SupportTestsQ31.cpp b/Testing/Source/Tests/SupportTestsQ31.cpp
index d2e1cb20..db5c3ed6 100755
--- a/Testing/Source/Tests/SupportTestsQ31.cpp
+++ b/Testing/Source/Tests/SupportTestsQ31.cpp
@@ -1,10 +1,9 @@
#include "SupportTestsQ31.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "Test.h"
-#include
-
#define SNR_THRESHOLD 120
#define REL_ERROR (1.0e-5)
#define ABS_Q15_ERROR ((q15_t)10)
diff --git a/Testing/Source/Tests/SupportTestsQ7.cpp b/Testing/Source/Tests/SupportTestsQ7.cpp
index 47356e87..d84b27f8 100755
--- a/Testing/Source/Tests/SupportTestsQ7.cpp
+++ b/Testing/Source/Tests/SupportTestsQ7.cpp
@@ -1,10 +1,9 @@
#include "SupportTestsQ7.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "Test.h"
-#include
-
#define SNR_THRESHOLD 120
#define REL_ERROR (1.0e-5)
#define ABS_Q15_ERROR ((q15_t)(1<<8))
diff --git a/Testing/Source/Tests/TransformCF32.cpp b/Testing/Source/Tests/TransformCF32.cpp
index 0cc6ae7c..5c68d5cc 100755
--- a/Testing/Source/Tests/TransformCF32.cpp
+++ b/Testing/Source/Tests/TransformCF32.cpp
@@ -1,11 +1,10 @@
#include "TransformCF32.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "arm_const_structs.h"
#include "Test.h"
-#include
-
#define SNR_THRESHOLD 120
void TransformCF32::test_cfft_f32()
diff --git a/Testing/Source/Tests/TransformCF64.cpp b/Testing/Source/Tests/TransformCF64.cpp
index 05c542b2..02b54f32 100755
--- a/Testing/Source/Tests/TransformCF64.cpp
+++ b/Testing/Source/Tests/TransformCF64.cpp
@@ -1,11 +1,10 @@
#include "TransformCF64.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "arm_const_structs.h"
#include "Test.h"
-#include
-
#define SNR_THRESHOLD 250
void TransformCF64::test_cfft_f64()
diff --git a/Testing/Source/Tests/TransformQ15.cpp b/Testing/Source/Tests/TransformQ15.cpp
index ea1393fc..16d26014 100755
--- a/Testing/Source/Tests/TransformQ15.cpp
+++ b/Testing/Source/Tests/TransformQ15.cpp
@@ -1,11 +1,10 @@
#include "TransformQ15.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "arm_const_structs.h"
#include "Test.h"
-#include
-
#define SNR_THRESHOLD 30
void TransformQ15::test_cfft_q15()
diff --git a/Testing/Source/Tests/TransformQ31.cpp b/Testing/Source/Tests/TransformQ31.cpp
index b61be559..a7c0e610 100755
--- a/Testing/Source/Tests/TransformQ31.cpp
+++ b/Testing/Source/Tests/TransformQ31.cpp
@@ -1,11 +1,10 @@
#include "TransformQ31.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "arm_const_structs.h"
#include "Test.h"
-#include
-
#define SNR_THRESHOLD 90
void TransformQ31::test_cfft_q31()
diff --git a/Testing/Source/Tests/TransformRF32.cpp b/Testing/Source/Tests/TransformRF32.cpp
index 5a3d301a..3dbf2634 100755
--- a/Testing/Source/Tests/TransformRF32.cpp
+++ b/Testing/Source/Tests/TransformRF32.cpp
@@ -1,10 +1,10 @@
#include "TransformRF32.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "arm_const_structs.h"
#include "Test.h"
-#include
#define SNR_THRESHOLD 120
diff --git a/Testing/Source/Tests/TransformRF64.cpp b/Testing/Source/Tests/TransformRF64.cpp
index cb19870f..bcadcbd3 100755
--- a/Testing/Source/Tests/TransformRF64.cpp
+++ b/Testing/Source/Tests/TransformRF64.cpp
@@ -1,10 +1,10 @@
#include "TransformRF64.h"
+#include
#include "Error.h"
#include "arm_math.h"
#include "arm_const_structs.h"
#include "Test.h"
-#include
#define SNR_THRESHOLD 250
diff --git a/Testing/Source/Tests/UnaryTestsF32.cpp b/Testing/Source/Tests/UnaryTestsF32.cpp
index 3494521b..d7224d37 100755
--- a/Testing/Source/Tests/UnaryTestsF32.cpp
+++ b/Testing/Source/Tests/UnaryTestsF32.cpp
@@ -1,4 +1,5 @@
#include "UnaryTestsF32.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 120
@@ -207,7 +208,7 @@ void UnaryTestsF32::test_mat_inverse_f32()
PREPAREDATA1(false);
status=arm_mat_inverse_f32(&this->in1,&this->out);
- //ASSERT_TRUE(status==ARM_MATH_SUCCESS);
+ ASSERT_TRUE(status==ARM_MATH_SUCCESS);
outp += (rows * columns);
inp1 += (rows * columns);
@@ -216,7 +217,7 @@ void UnaryTestsF32::test_mat_inverse_f32()
ASSERT_EMPTY_TAIL(output);
- //ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD_INV);
+ ASSERT_SNR(output,ref,(float32_t)SNR_THRESHOLD_INV);
ASSERT_CLOSE_ERROR(output,ref,ABS_ERROR_INV,REL_ERROR_INV);
diff --git a/Testing/Source/Tests/UnaryTestsF64.cpp b/Testing/Source/Tests/UnaryTestsF64.cpp
index df506c14..141c8321 100755
--- a/Testing/Source/Tests/UnaryTestsF64.cpp
+++ b/Testing/Source/Tests/UnaryTestsF64.cpp
@@ -1,5 +1,6 @@
#include "arm_math.h"
#include "UnaryTestsF64.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 120
diff --git a/Testing/Source/Tests/UnaryTestsQ15.cpp b/Testing/Source/Tests/UnaryTestsQ15.cpp
index 0ce730b1..e9a44de2 100755
--- a/Testing/Source/Tests/UnaryTestsQ15.cpp
+++ b/Testing/Source/Tests/UnaryTestsQ15.cpp
@@ -1,4 +1,5 @@
#include "UnaryTestsQ15.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 70
diff --git a/Testing/Source/Tests/UnaryTestsQ31.cpp b/Testing/Source/Tests/UnaryTestsQ31.cpp
index 23992786..adc06840 100755
--- a/Testing/Source/Tests/UnaryTestsQ31.cpp
+++ b/Testing/Source/Tests/UnaryTestsQ31.cpp
@@ -1,4 +1,5 @@
#include "UnaryTestsQ31.h"
+#include
#include "Error.h"
#define SNR_THRESHOLD 100
diff --git a/Toolchain/GCC.cmake b/Toolchain/GCC.cmake
index c2c96a4e..4fff3c1b 100644
--- a/Toolchain/GCC.cmake
+++ b/Toolchain/GCC.cmake
@@ -37,6 +37,11 @@ function(compilerSpecificCompileOptions PROJECTNAME)
target_compile_options(${PROJECTNAME} PUBLIC "-march=armv7e-m;-mfpu=fpv5-d16")
target_link_options(${PROJECTNAME} PUBLIC "-march=armv7e-m;-mfpu=fpv5-d16")
endif()
+
+ if (ARM_CPU STREQUAL "cortex-m0" )
+ target_compile_options(${PROJECTNAME} PUBLIC "-march=armv6-m")
+ target_link_options(${PROJECTNAME} PUBLIC "-march=armv6-m")
+ endif()
if (ARM_CPU STREQUAL "cortex-a9" )
@@ -113,14 +118,14 @@ function(toolchainSpecificLinkForCortexA PROJECTNAME ROOT CORE PLATFORMFOLDER)
# RTE Components
target_include_directories(${PROJECTNAME} PRIVATE ${ROOT}/CMSIS/DSP/Testing)
+ target_include_directories(${PROJECTNAME} PRIVATE ${PLATFORMFOLDER}/${CORE}/LinkScripts/GCC)
- # Using the mem file which is included.
- # Since meme file in same temp directory, it is found by linker
- # when processing the scatter file
+ file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tempLink)
set(SCATTERFILE ${CMAKE_CURRENT_BINARY_DIR}/tempLink/lnk.ld)
preprocessScatter(${CORE} ${PLATFORMFOLDER} ${SCATTERFILE})
- target_include_directories(${PROJECTNAME} PRIVATE ${PLATFORMFOLDER}/${CORE}/LinkScripts/GCC)
+
+ set_target_properties(${PROJECTNAME} PROPERTIES LINK_DEPENDS "${SCATTERFILE}")
target_link_options(${PROJECTNAME} PRIVATE "--entry=Reset_Handler;-T${SCATTERFILE}")
endfunction()