CMSIS-DSP: Some improvements to build Helium version with gcc.

Helium f16 disabled by default with gcc since there are a few remaining build issues.
pull/19/head
Christophe Favergeon 5 years ago
parent f657d52568
commit 4288cf4fec

@ -63,19 +63,7 @@ __STATIC_FORCEINLINE float32_t vecAddAcrossF32Mve(float32x4_t in)
return acc; return acc;
} }
__STATIC_FORCEINLINE float16_t vecAddAcrossF16Mve(float16x8_t in)
{
float16x8_t tmpVec;
_Float16 acc;
tmpVec = (float16x8_t) vrev32q_s16((int16x8_t) in);
in = vaddq_f16(tmpVec, in);
tmpVec = (float16x8_t) vrev64q_s32((int32x4_t) in);
in = vaddq_f16(tmpVec, in);
acc = (_Float16)vgetq_lane_f16(in, 0) + (_Float16)vgetq_lane_f16(in, 4);
return acc;
}
/* newton initial guess */ /* newton initial guess */
@ -103,7 +91,23 @@ __STATIC_FORCEINLINE float16_t vecAddAcrossF16Mve(float16x8_t in)
Definitions available for f16 datatype with HW acceleration only Definitions available for f16 datatype with HW acceleration only
***************************************/ ***************************************/
#if defined(ARM_FLOAT16_SUPPORTED)
#if defined (ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined (ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
__STATIC_FORCEINLINE float16_t vecAddAcrossF16Mve(float16x8_t in)
{
float16x8_t tmpVec;
_Float16 acc;
tmpVec = (float16x8_t) vrev32q_s16((int16x8_t) in);
in = vaddq_f16(tmpVec, in);
tmpVec = (float16x8_t) vrev64q_s32((int32x4_t) in);
in = vaddq_f16(tmpVec, in);
acc = (_Float16)vgetq_lane_f16(in, 0) + (_Float16)vgetq_lane_f16(in, 4);
return acc;
}
__STATIC_FORCEINLINE float16x8_t __mve_cmplx_sum_intra_vec_f16( __STATIC_FORCEINLINE float16x8_t __mve_cmplx_sum_intra_vec_f16(
float16x8_t vecIn) float16x8_t vecIn)
{ {
@ -174,6 +178,7 @@ __STATIC_FORCEINLINE void mve_cmplx_sum_intra_vec_f16(
invSqrt = vmulq(tmp, xStart); \ invSqrt = vmulq(tmp, xStart); \
} }
#endif
#endif #endif
/*************************************** /***************************************

@ -63,6 +63,7 @@ won't be built.
#if defined(ARM_MATH_NEON) || (defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)) /* floating point vector*/ #if defined(ARM_MATH_NEON) || (defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)) /* floating point vector*/
#if defined(ARM_MATH_MVE_FLOAT16) || defined(ARM_MATH_NEON_FLOAT16) #if defined(ARM_MATH_MVE_FLOAT16) || defined(ARM_MATH_NEON_FLOAT16)
/** /**
* @brief 16-bit floating-point 128-bit vector data type * @brief 16-bit floating-point 128-bit vector data type
*/ */

@ -35,7 +35,7 @@ extern "C"
#if defined(ARM_FLOAT16_SUPPORTED) #if defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
static const float16_t __logf_rng_f16=0.693147180f16; static const float16_t __logf_rng_f16=0.693147180f16;
@ -296,7 +296,7 @@ __STATIC_INLINE f16x8_t vtanhq_f16(
return tanh; return tanh;
} }
#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)*/ #endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)*/

@ -51,7 +51,7 @@
* *
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"
#include "arm_vec_math_f16.h" #include "arm_vec_math_f16.h"

@ -30,7 +30,7 @@
#if defined(ARM_FLOAT16_SUPPORTED) #if defined(ARM_FLOAT16_SUPPORTED)
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
@ -5568,6 +5568,6 @@ float16_t rearranged_twiddle_stride3_4096_f16[2728]={
#endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */ #endif /* !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ #endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
#endif /* if defined(ARM_FLOAT16_SUPPORTED) */ #endif /* if defined(ARM_FLOAT16_SUPPORTED) */

@ -67,7 +67,7 @@
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_cmplx_conj_f16( void arm_cmplx_conj_f16(
const float16_t * pSrc, const float16_t * pSrc,

@ -75,7 +75,7 @@
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"

@ -68,7 +68,7 @@
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"

@ -69,7 +69,7 @@
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_cmplx_mag_squared_f16( void arm_cmplx_mag_squared_f16(
const float16_t * pSrc, const float16_t * pSrc,

@ -70,7 +70,7 @@
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_cmplx_mult_cmplx_f16( void arm_cmplx_mult_cmplx_f16(
const float16_t * pSrcA, const float16_t * pSrcA,

@ -69,7 +69,7 @@
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_cmplx_mult_real_f16( void arm_cmplx_mult_real_f16(
const float16_t * pSrcCmplx, const float16_t * pSrcCmplx,

@ -66,7 +66,7 @@
* @return distance * @return distance
* *
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"

@ -63,7 +63,7 @@
* *
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"
#include "arm_vec_math_f16.h" #include "arm_vec_math_f16.h"

@ -57,7 +57,7 @@
* *
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"
#include "arm_vec_math.h" #include "arm_vec_math.h"

@ -56,7 +56,7 @@
* @return distance * @return distance
* *
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"
#include "arm_vec_math.h" #include "arm_vec_math.h"

@ -58,7 +58,7 @@
* @return distance * @return distance
* *
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"
#include "arm_vec_math.h" #include "arm_vec_math.h"

@ -48,7 +48,7 @@
@{ @{
*/ */
#if !defined(ARM_MATH_MVEF) || defined(ARM_MATH_AUTOVECTORIZE) #if !defined(ARM_MATH_MVE_FLOAT16) || defined(ARM_MATH_AUTOVECTORIZE)
/// @private /// @private
__STATIC_INLINE float16_t rel_entr(float16_t x, float16_t y) __STATIC_INLINE float16_t rel_entr(float16_t x, float16_t y)
{ {
@ -57,7 +57,7 @@ __STATIC_INLINE float16_t rel_entr(float16_t x, float16_t y)
#endif #endif
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"
#include "arm_vec_math_f16.h" #include "arm_vec_math_f16.h"

@ -59,7 +59,7 @@
* *
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"
#include "arm_vec_math_f16.h" #include "arm_vec_math_f16.h"

@ -32,9 +32,7 @@
#include "arm_common_tables.h" #include "arm_common_tables.h"
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM) || defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_vec_math_f16.h" #include "arm_vec_math_f16.h"
#endif
void arm_vexp_f16( void arm_vexp_f16(
@ -44,7 +42,7 @@ void arm_vexp_f16(
{ {
uint32_t blkCnt; uint32_t blkCnt;
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
f16x8_t src; f16x8_t src;
f16x8_t dst; f16x8_t dst;

@ -32,9 +32,7 @@
#include "arm_common_tables.h" #include "arm_common_tables.h"
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM) || defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_vec_math_f16.h" #include "arm_vec_math_f16.h"
#endif
void arm_vinverse_f16( void arm_vinverse_f16(
const float16_t * pSrc, const float16_t * pSrc,
@ -43,7 +41,7 @@ void arm_vinverse_f16(
{ {
uint32_t blkCnt; uint32_t blkCnt;
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
f16x8_t src; f16x8_t src;
f16x8_t dst; f16x8_t dst;

@ -31,10 +31,7 @@
#if defined(ARM_FLOAT16_SUPPORTED) #if defined(ARM_FLOAT16_SUPPORTED)
#include "arm_common_tables.h" #include "arm_common_tables.h"
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM) || defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_vec_math_f16.h" #include "arm_vec_math_f16.h"
#endif
void arm_vlog_f16( void arm_vlog_f16(
const float16_t * pSrc, const float16_t * pSrc,
@ -43,7 +40,7 @@ void arm_vlog_f16(
{ {
uint32_t blkCnt; uint32_t blkCnt;
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
f16x8_t src; f16x8_t src;
f16x8_t dst; f16x8_t dst;

@ -94,7 +94,7 @@ void arm_biquad_cascade_df1_init_f16(
S->pState = pState; S->pState = pState;
} }
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
static void generateCoefsFastBiquadF16(float16_t b0, float16_t b1, float16_t b2, float16_t a1, float16_t a2, static void generateCoefsFastBiquadF16(float16_t b0, float16_t b1, float16_t b2, float16_t a1, float16_t a2,
arm_biquad_mod_coef_f16 * newCoef) arm_biquad_mod_coef_f16 * newCoef)

@ -47,7 +47,7 @@
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_biquad_cascade_df2T_f16( void arm_biquad_cascade_df2T_f16(
const arm_biquad_cascade_df2T_instance_f16 * S, const arm_biquad_cascade_df2T_instance_f16 * S,
const float16_t * pSrc, const float16_t * pSrc,

@ -94,7 +94,7 @@
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"
#include "arm_vec_filtering.h" #include "arm_vec_filtering.h"

@ -45,7 +45,7 @@
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#define FIR_F32_MAX_COEF_BLK 8 #define FIR_F32_MAX_COEF_BLK 8

@ -84,7 +84,7 @@ void arm_fir_init_f16(
S->pCoeffs = pCoeffs; S->pCoeffs = pCoeffs;
/* Clear state buffer. The size is always (blockSize + numTaps - 1) */ /* Clear state buffer. The size is always (blockSize + numTaps - 1) */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
memset(pState, 0, (numTaps + (blockSize - 1U) + ROUND_UP(blockSize, 8)) * sizeof(float16_t)); memset(pState, 0, (numTaps + (blockSize - 1U) + ROUND_UP(blockSize, 8)) * sizeof(float16_t));
#else #else
memset(pState, 0, (numTaps + (blockSize - 1U)) * sizeof(float16_t)); memset(pState, 0, (numTaps + (blockSize - 1U)) * sizeof(float16_t));

@ -52,7 +52,7 @@
- \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_status arm_mat_add_f16( arm_status arm_mat_add_f16(
const arm_matrix_instance_f16 * pSrcA, const arm_matrix_instance_f16 * pSrcA,

@ -51,7 +51,7 @@
* The decomposition of A is returning a lower triangular matrix U such that A = U U^t * The decomposition of A is returning a lower triangular matrix U such that A = U U^t
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"

@ -50,7 +50,7 @@
- \ref ARM_MATH_SUCCESS : Operation successful - \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"

@ -171,16 +171,16 @@ arm_status arm_mat_cmplx_mult_q15(
pSrcAVec += 8; pSrcAVec += 8;
vecB = vldrhq_gather_shifted_offset(pInB, vecOffs); vecB = vldrhq_gather_shifted_offset(pInB, vecOffs);
acc0 = vmlsldavaq(acc0, vecA, vecB); acc0 = vmlsldavaq_s16(acc0, vecA, vecB);
acc1 = vmlaldavaxq(acc1, vecA, vecB); acc1 = vmlaldavaxq_s16(acc1, vecA, vecB);
vecB2 = vldrhq_gather_shifted_offset(pInB2, vecOffs); vecB2 = vldrhq_gather_shifted_offset(pInB2, vecOffs);
/* /*
* move Matrix B read offsets, 4 rows down * move Matrix B read offsets, 4 rows down
*/ */
vecOffs = vaddq(vecOffs, (uint16_t) (numColsB * 4 * CMPLX_DIM)); vecOffs = vaddq_n_u16(vecOffs, (uint16_t) (numColsB * 4 * CMPLX_DIM));
acc2 = vmlsldavaq(acc2, vecA, vecB2); acc2 = vmlsldavaq_s16(acc2, vecA, vecB2);
acc3 = vmlaldavaxq(acc3, vecA, vecB2); acc3 = vmlaldavaxq_s16(acc3, vecA, vecB2);
blkCnt--; blkCnt--;
} }
@ -196,17 +196,17 @@ arm_status arm_mat_cmplx_mult_q15(
vecA = vldrhq_z_s16(pSrcAVec, p0); vecA = vldrhq_z_s16(pSrcAVec, p0);
acc0 = vmlsldavaq(acc0, vecA, vecB); acc0 = vmlsldavaq_s16(acc0, vecA, vecB);
acc1 = vmlaldavaxq(acc1, vecA, vecB); acc1 = vmlaldavaxq_s16(acc1, vecA, vecB);
vecB2 = vldrhq_gather_shifted_offset(pInB2, vecOffs); vecB2 = vldrhq_gather_shifted_offset(pInB2, vecOffs);
/* /*
* move Matrix B read offsets, 4 rows down * move Matrix B read offsets, 4 rows down
*/ */
vecOffs = vaddq(vecOffs, (uint16_t) (numColsB * 4 * CMPLX_DIM)); vecOffs = vaddq_n_u16(vecOffs, (uint16_t) (numColsB * 4 * CMPLX_DIM));
acc2 = vmlsldavaq(acc2, vecA, vecB2); acc2 = vmlsldavaq_s16(acc2, vecA, vecB2);
acc3 = vmlaldavaxq(acc3, vecA, vecB2); acc3 = vmlaldavaxq_s16(acc3, vecA, vecB2);
} }
/* /*
@ -264,12 +264,12 @@ arm_status arm_mat_cmplx_mult_q15(
pSrcAVec += 8; pSrcAVec += 8;
vecB = vldrhq_gather_shifted_offset(pInB, vecOffs); vecB = vldrhq_gather_shifted_offset(pInB, vecOffs);
acc0 = vmlsldavaq(acc0, vecA, vecB); acc0 = vmlsldavaq_s16(acc0, vecA, vecB);
acc1 = vmlaldavaxq(acc1, vecA, vecB); acc1 = vmlaldavaxq_s16(acc1, vecA, vecB);
/* /*
* move Matrix B read offsets, 4 rows down * move Matrix B read offsets, 4 rows down
*/ */
vecOffs = vaddq(vecOffs, (uint16_t) (numColsB * 4 * CMPLX_DIM)); vecOffs = vaddq_n_u16(vecOffs, (uint16_t) (numColsB * 4 * CMPLX_DIM));
blkCnt--; blkCnt--;
} }
@ -284,8 +284,8 @@ arm_status arm_mat_cmplx_mult_q15(
vecB = vldrhq_gather_shifted_offset(pInB, vecOffs); vecB = vldrhq_gather_shifted_offset(pInB, vecOffs);
vecA = vldrhq_z_s16(pSrcAVec, p0); vecA = vldrhq_z_s16(pSrcAVec, p0);
acc0 = vmlsldavaq(acc0, vecA, vecB); acc0 = vmlsldavaq_s16(acc0, vecA, vecB);
acc1 = vmlaldavaxq(acc1, vecA, vecB); acc1 = vmlaldavaxq_s16(acc1, vecA, vecB);
} }
/* /*

@ -49,7 +49,7 @@
- \ref ARM_MATH_SUCCESS : Operation successful - \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"

@ -50,7 +50,7 @@
- \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
- \ref ARM_MATH_SINGULAR : Input matrix is found to be singular (non-invertible) - \ref ARM_MATH_SINGULAR : Input matrix is found to be singular (non-invertible)
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_status arm_mat_inverse_f16( arm_status arm_mat_inverse_f16(
const arm_matrix_instance_f16 * pSrc, const arm_matrix_instance_f16 * pSrc,

@ -50,7 +50,7 @@
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking. * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
__STATIC_FORCEINLINE arm_status arm_mat_mult_f16_2x2_mve( __STATIC_FORCEINLINE arm_status arm_mat_mult_f16_2x2_mve(
const arm_matrix_instance_f16 *pSrcA, const arm_matrix_instance_f16 *pSrcA,

@ -79,8 +79,8 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_q7_2x2_mve(
vecA0 = vldrbq_s8(pInA0); vecA0 = vldrbq_s8(pInA0);
vecA1 = vldrbq_s8(pInA1); vecA1 = vldrbq_s8(pInA1);
acc0 = vmladavq(vecA0, vecB); acc0 = vmladavq_s8(vecA0, vecB);
acc1 = vmladavq(vecA1, vecB); acc1 = vmladavq_s8(vecA1, vecB);
pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8); pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8);
pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8); pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8);
@ -91,8 +91,8 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_q7_2x2_mve(
vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0); vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0);
acc0 = vmladavq(vecA0, vecB); acc0 = vmladavq_s8(vecA0, vecB);
acc1 = vmladavq(vecA1, vecB); acc1 = vmladavq_s8(vecA1, vecB);
pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8); pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8);
pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8); pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8);
@ -108,7 +108,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_q7_3x3_mve(
const arm_matrix_instance_q7 * pSrcB, const arm_matrix_instance_q7 * pSrcB,
arm_matrix_instance_q7 * pDst) arm_matrix_instance_q7 * pDst)
{ {
const uint32_t MATRIX_DIM = 3; const uint8_t MATRIX_DIM = 3;
q7_t const *pInB = (q7_t const *)pSrcB->pData; /* input data matrix pointer B */ q7_t const *pInB = (q7_t const *)pSrcB->pData; /* input data matrix pointer B */
q7_t *pInA = pSrcA->pData; /* input data matrix pointer A */ q7_t *pInA = pSrcA->pData; /* input data matrix pointer A */
q7_t *pOut = pDst->pData; /* output data matrix pointer */ q7_t *pOut = pDst->pData; /* output data matrix pointer */
@ -131,9 +131,9 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_q7_3x3_mve(
vecA1 = vldrbq_s8(pInA1); vecA1 = vldrbq_s8(pInA1);
vecA2 = vldrbq_s8(pInA2); vecA2 = vldrbq_s8(pInA2);
acc0 = vmladavq(vecA0, vecB); acc0 = vmladavq_s8(vecA0, vecB);
acc1 = vmladavq(vecA1, vecB); acc1 = vmladavq_s8(vecA1, vecB);
acc2 = vmladavq(vecA2, vecB); acc2 = vmladavq_s8(vecA2, vecB);
pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8); pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8);
pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8); pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8);
@ -145,9 +145,9 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_q7_3x3_mve(
vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0); vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0);
acc0 = vmladavq(vecA0, vecB); acc0 = vmladavq_s8(vecA0, vecB);
acc1 = vmladavq(vecA1, vecB); acc1 = vmladavq_s8(vecA1, vecB);
acc2 = vmladavq(vecA2, vecB); acc2 = vmladavq_s8(vecA2, vecB);
pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8); pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8);
pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8); pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8);
@ -159,9 +159,9 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_q7_3x3_mve(
vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0); vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0);
acc0 = vmladavq(vecA0, vecB); acc0 = vmladavq_s8(vecA0, vecB);
acc1 = vmladavq(vecA1, vecB); acc1 = vmladavq_s8(vecA1, vecB);
acc2 = vmladavq(vecA2, vecB); acc2 = vmladavq_s8(vecA2, vecB);
pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8); pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8);
pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8); pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8);
@ -202,10 +202,10 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_q7_4x4_mve(
vecA2 = vldrbq_s8(pInA2); vecA2 = vldrbq_s8(pInA2);
vecA3 = vldrbq_s8(pInA3); vecA3 = vldrbq_s8(pInA3);
acc0 = vmladavq(vecA0, vecB); acc0 = vmladavq_s8(vecA0, vecB);
acc1 = vmladavq(vecA1, vecB); acc1 = vmladavq_s8(vecA1, vecB);
acc2 = vmladavq(vecA2, vecB); acc2 = vmladavq_s8(vecA2, vecB);
acc3 = vmladavq(vecA3, vecB); acc3 = vmladavq_s8(vecA3, vecB);
pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8); pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8);
pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8); pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8);
@ -218,10 +218,10 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_q7_4x4_mve(
vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0); vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0);
acc0 = vmladavq(vecA0, vecB); acc0 = vmladavq_s8(vecA0, vecB);
acc1 = vmladavq(vecA1, vecB); acc1 = vmladavq_s8(vecA1, vecB);
acc2 = vmladavq(vecA2, vecB); acc2 = vmladavq_s8(vecA2, vecB);
acc3 = vmladavq(vecA3, vecB); acc3 = vmladavq_s8(vecA3, vecB);
pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8); pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8);
pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8); pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8);
@ -234,10 +234,10 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_q7_4x4_mve(
vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0); vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0);
acc0 = vmladavq(vecA0, vecB); acc0 = vmladavq_s8(vecA0, vecB);
acc1 = vmladavq(vecA1, vecB); acc1 = vmladavq_s8(vecA1, vecB);
acc2 = vmladavq(vecA2, vecB); acc2 = vmladavq_s8(vecA2, vecB);
acc3 = vmladavq(vecA3, vecB); acc3 = vmladavq_s8(vecA3, vecB);
pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8); pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8);
pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8); pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8);
@ -250,10 +250,10 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_q7_4x4_mve(
vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0); vecB = vldrbq_gather_offset_z(pInB, vecColBOffs, p0);
acc0 = vmladavq(vecA0, vecB); acc0 = vmladavq_s8(vecA0, vecB);
acc1 = vmladavq(vecA1, vecB); acc1 = vmladavq_s8(vecA1, vecB);
acc2 = vmladavq(vecA2, vecB); acc2 = vmladavq_s8(vecA2, vecB);
acc3 = vmladavq(vecA3, vecB); acc3 = vmladavq_s8(vecA3, vecB);
pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8); pOut[0 * MATRIX_DIM] = (q7_t) __SSAT(acc0 >> 7, 8);
pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8); pOut[1 * MATRIX_DIM] = (q7_t) __SSAT(acc1 >> 7, 8);
@ -385,16 +385,16 @@ arm_status arm_mat_mult_q7(
{ {
vecB = vld1q(pSrcBVec); vecB = vld1q(pSrcBVec);
pSrcBVec += 16; pSrcBVec += 16;
acc0 = vmladavaq(acc0, vecA, vecB); acc0 = vmladavaq_s8(acc0, vecA, vecB);
vecA2 = vld1q(pSrcA2Vec); vecA2 = vld1q(pSrcA2Vec);
pSrcA2Vec += 16; pSrcA2Vec += 16;
acc1 = vmladavaq(acc1, vecA2, vecB); acc1 = vmladavaq_s8(acc1, vecA2, vecB);
vecB2 = vld1q(pSrcB2Vec); vecB2 = vld1q(pSrcB2Vec);
pSrcB2Vec += 16; pSrcB2Vec += 16;
acc2 = vmladavaq(acc2, vecA, vecB2); acc2 = vmladavaq_s8(acc2, vecA, vecB2);
vecA = vld1q(pSrcAVec); vecA = vld1q(pSrcAVec);
pSrcAVec += 16; pSrcAVec += 16;
acc3 = vmladavaq(acc3, vecA2, vecB2); acc3 = vmladavaq_s8(acc3, vecA2, vecB2);
blkCnt--; blkCnt--;
} }
@ -407,13 +407,13 @@ arm_status arm_mat_mult_q7(
{ {
mve_pred16_t p0 = vctp8q(blkCnt); mve_pred16_t p0 = vctp8q(blkCnt);
vecB = vld1q(pSrcBVec); vecB = vld1q(pSrcBVec);
acc0 = vmladavaq_p(acc0, vecA, vecB, p0); acc0 = vmladavaq_p_s8(acc0, vecA, vecB, p0);
vecA2 = vld1q(pSrcA2Vec); vecA2 = vld1q(pSrcA2Vec);
acc1 = vmladavaq_p(acc1, vecA2, vecB, p0); acc1 = vmladavaq_p_s8(acc1, vecA2, vecB, p0);
vecB2 = vld1q(pSrcB2Vec); vecB2 = vld1q(pSrcB2Vec);
acc2 = vmladavaq_p(acc2, vecA, vecB2, p0); acc2 = vmladavaq_p_s8(acc2, vecA, vecB2, p0);
vecA = vld1q(pSrcAVec); vecA = vld1q(pSrcAVec);
acc3 = vmladavaq_p(acc3, vecA2, vecB2, p0); acc3 = vmladavaq_p_s8(acc3, vecA2, vecB2, p0);
} }
*px++ = (q7_t) __SSAT(acc0 >> 7, 8); *px++ = (q7_t) __SSAT(acc0 >> 7, 8);
@ -473,7 +473,7 @@ arm_status arm_mat_mult_q7(
pSrcAVec += 16; pSrcAVec += 16;
vecB = vld1q(pSrcBVec); vecB = vld1q(pSrcBVec);
pSrcBVec += 16; pSrcBVec += 16;
acc0 = vmladavaq(acc0, vecA, vecB); acc0 = vmladavaq_s8(acc0, vecA, vecB);
blkCnt--; blkCnt--;
} }
@ -487,7 +487,7 @@ arm_status arm_mat_mult_q7(
mve_pred16_t p0 = vctp8q(blkCnt); mve_pred16_t p0 = vctp8q(blkCnt);
vecA = vld1q(pSrcAVec); vecA = vld1q(pSrcAVec);
vecB = vld1q(pSrcBVec); vecB = vld1q(pSrcBVec);
acc0 = vmladavaq_p(acc0, vecA, vecB, p0); acc0 = vmladavaq_p_s8(acc0, vecA, vecB, p0);
} }
*px = (q7_t) __SSAT(acc0 >> 7, 8); *px = (q7_t) __SSAT(acc0 >> 7, 8);
@ -539,7 +539,7 @@ arm_status arm_mat_mult_q7(
pSrcAVec += 16; pSrcAVec += 16;
vecB = vld1q(pSrcBVec); vecB = vld1q(pSrcBVec);
pSrcBVec += 16; pSrcBVec += 16;
acc0 = vmladavaq(acc0, vecA, vecB); acc0 = vmladavaq_s8(acc0, vecA, vecB);
blkCnt--; blkCnt--;
} }
@ -553,7 +553,7 @@ arm_status arm_mat_mult_q7(
mve_pred16_t p0 = vctp8q(blkCnt); mve_pred16_t p0 = vctp8q(blkCnt);
vecA = vld1q(pSrcAVec); vecA = vld1q(pSrcAVec);
vecB = vld1q(pSrcBVec); vecB = vld1q(pSrcBVec);
acc0 = vmladavaq_p(acc0, vecA, vecB, p0); acc0 = vmladavaq_p_s8(acc0, vecA, vecB, p0);
} }
*px++ = (q7_t) __SSAT(acc0 >> 7, 8); *px++ = (q7_t) __SSAT(acc0 >> 7, 8);

@ -50,7 +50,7 @@
- \ref ARM_MATH_SUCCESS : Operation successful - \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_status arm_mat_scale_f16( arm_status arm_mat_scale_f16(
const arm_matrix_instance_f16 * pSrc, const arm_matrix_instance_f16 * pSrc,

@ -46,7 +46,7 @@
* @return The function returns ARM_MATH_SINGULAR, if the system can't be solved. * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"

@ -48,7 +48,7 @@
* @return The function returns ARM_MATH_SINGULAR, if the system can't be solved. * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"

@ -50,7 +50,7 @@
- \ref ARM_MATH_SUCCESS : Operation successful - \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_status arm_mat_sub_f16( arm_status arm_mat_sub_f16(
const arm_matrix_instance_f16 * pSrcA, const arm_matrix_instance_f16 * pSrcA,

@ -48,7 +48,7 @@
- \ref ARM_MATH_SUCCESS : Operation successful - \ref ARM_MATH_SUCCESS : Operation successful
- \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"

@ -46,7 +46,7 @@
* @param[in] *pVec points to input vector * @param[in] *pVec points to input vector
* @param[out] *pDst points to output vector * @param[out] *pDst points to output vector
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"

@ -46,7 +46,7 @@
* @return none. * @return none.
* *
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"

@ -47,7 +47,7 @@
* *
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"
#include "arm_vec_math_f16.h" #include "arm_vec_math_f16.h"

@ -47,7 +47,7 @@
* *
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"
#include "arm_vec_math_f16.h" #include "arm_vec_math_f16.h"

@ -47,7 +47,7 @@
* *
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"
#include "arm_vec_math_f16.h" #include "arm_vec_math_f16.h"

@ -57,7 +57,7 @@
* *
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"
#include "arm_vec_math_f16.h" #include "arm_vec_math_f16.h"

@ -64,7 +64,7 @@
* *
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"
#include "arm_vec_math_f16.h" #include "arm_vec_math_f16.h"

@ -62,7 +62,7 @@
* *
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"
#include "arm_vec_math_f16.h" #include "arm_vec_math_f16.h"

@ -53,7 +53,7 @@
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_max_f16( void arm_max_f16(
const float16_t * pSrc, const float16_t * pSrc,

@ -52,7 +52,7 @@
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_max_no_idx_f16( void arm_max_no_idx_f16(
const float16_t *pSrc, const float16_t *pSrc,

@ -60,7 +60,7 @@
@param[out] pResult mean value returned here. @param[out] pResult mean value returned here.
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"

@ -54,7 +54,7 @@
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_min_f16( void arm_min_f16(
const float16_t * pSrc, const float16_t * pSrc,

@ -49,7 +49,7 @@
@param[out] pResult sum of the squares value returned here @param[out] pResult sum of the squares value returned here
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"

@ -61,7 +61,7 @@
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_rms_f16( void arm_rms_f16(
const float16_t * pSrc, const float16_t * pSrc,

@ -49,7 +49,7 @@
@param[out] pResult variance value returned here @param[out] pResult variance value returned here
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"

@ -81,8 +81,8 @@ void arm_var_q15(
/* Compute Sum of squares of the input samples /* Compute Sum of squares of the input samples
* and then store the result in a temporary variable, sumOfSquares. */ * and then store the result in a temporary variable, sumOfSquares. */
sumOfSquares = vmlaldavaq(sumOfSquares, vecSrc, vecSrc); sumOfSquares = vmlaldavaq_s16(sumOfSquares, vecSrc, vecSrc);
sum = vaddvaq(sum, vecSrc); sum = vaddvaq_s16(sum, vecSrc);
blkCnt --; blkCnt --;
pSrc += 8; pSrc += 8;

@ -60,7 +60,7 @@
* *
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_barycenter_f16(const float16_t *in, void arm_barycenter_f16(const float16_t *in,
const float16_t *weights, const float16_t *weights,

@ -48,7 +48,7 @@
@param[in] blockSize number of samples in each vector @param[in] blockSize number of samples in each vector
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_copy_f16( void arm_copy_f16(
const float16_t * pSrc, const float16_t * pSrc,

@ -53,7 +53,7 @@
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_f16_to_float( void arm_f16_to_float(
const float16_t * pSrc, const float16_t * pSrc,

@ -62,7 +62,7 @@
defined in the preprocessor section of project options. defined in the preprocessor section of project options.
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_f16_to_q15( void arm_f16_to_q15(
const float16_t * pSrc, const float16_t * pSrc,

@ -48,7 +48,7 @@
@param[in] blockSize number of samples in each vector @param[in] blockSize number of samples in each vector
@return none @return none
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_fill_f16( void arm_fill_f16(
float16_t value, float16_t value,

@ -49,7 +49,7 @@
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_float_to_f16( void arm_float_to_f16(
const float32_t * pSrc, const float32_t * pSrc,

@ -58,7 +58,7 @@
</pre> </pre>
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_q15_to_f16( void arm_q15_to_f16(
const q15_t * pSrc, const q15_t * pSrc,

@ -59,7 +59,7 @@
* *
*/ */
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h" #include "arm_helium_utils.h"

@ -272,8 +272,8 @@ static void _arm_radix4_butterfly_f16_mve(const arm_cfft_instance_f16 * S,float1
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc; vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
/* load scheduling */ /* load scheduling */
vecA = vldrwq_gather_base_wb_f32(&vecScGathAddr, 64); vecA = (f16x8_t)vldrwq_gather_base_wb_f32(&vecScGathAddr, 64);
vecC = vldrwq_gather_base_f32(vecScGathAddr, 8); vecC = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 8);
blkCnt = (fftLen >> 4); blkCnt = (fftLen >> 4);
while (blkCnt > 0U) while (blkCnt > 0U)
@ -281,27 +281,27 @@ static void _arm_radix4_butterfly_f16_mve(const arm_cfft_instance_f16 * S,float1
vecSum0 = vecA + vecC; /* vecSum0 = vaddq(vecA, vecC) */ vecSum0 = vecA + vecC; /* vecSum0 = vaddq(vecA, vecC) */
vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */ vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */
vecB = vldrwq_gather_base_f32(vecScGathAddr, 4); vecB = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 4);
vecD = vldrwq_gather_base_f32(vecScGathAddr, 12); vecD = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 12);
vecSum1 = vecB + vecD; vecSum1 = vecB + vecD;
vecDiff1 = vecB - vecD; vecDiff1 = vecB - vecD;
/* pre-load for next iteration */ /* pre-load for next iteration */
vecA = vldrwq_gather_base_wb_f32(&vecScGathAddr, 64); vecA = (f16x8_t)vldrwq_gather_base_wb_f32(&vecScGathAddr, 64);
vecC = vldrwq_gather_base_f32(vecScGathAddr, 8); vecC = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 8);
vecTmp0 = vecSum0 + vecSum1; vecTmp0 = vecSum0 + vecSum1;
vstrwq_scatter_base_f32(vecScGathAddr, -64, vecTmp0); vstrwq_scatter_base_f32(vecScGathAddr, -64, (f32x4_t)vecTmp0);
vecTmp0 = vecSum0 - vecSum1; vecTmp0 = vecSum0 - vecSum1;
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 4, vecTmp0); vstrwq_scatter_base_f32(vecScGathAddr, -64 + 4, (f32x4_t)vecTmp0);
vecTmp0 = MVE_CMPLX_SUB_A_ixB(vecDiff0, vecDiff1); vecTmp0 = MVE_CMPLX_SUB_A_ixB(vecDiff0, vecDiff1);
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 8, vecTmp0); vstrwq_scatter_base_f32(vecScGathAddr, -64 + 8, (f32x4_t)vecTmp0);
vecTmp0 = MVE_CMPLX_ADD_A_ixB(vecDiff0, vecDiff1); vecTmp0 = MVE_CMPLX_ADD_A_ixB(vecDiff0, vecDiff1);
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 12, vecTmp0); vstrwq_scatter_base_f32(vecScGathAddr, -64 + 12, (f32x4_t)vecTmp0);
blkCnt--; blkCnt--;
} }
@ -480,8 +480,8 @@ static void _arm_radix4_butterfly_inverse_f16_mve(const arm_cfft_instance_f16 *
/* /*
* load scheduling * load scheduling
*/ */
vecA = vldrwq_gather_base_wb_f32(&vecScGathAddr, 64); vecA = (f16x8_t)vldrwq_gather_base_wb_f32(&vecScGathAddr, 64);
vecC = vldrwq_gather_base_f32(vecScGathAddr, 8); vecC = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 8);
blkCnt = (fftLen >> 4); blkCnt = (fftLen >> 4);
while (blkCnt > 0U) while (blkCnt > 0U)
@ -489,30 +489,30 @@ static void _arm_radix4_butterfly_inverse_f16_mve(const arm_cfft_instance_f16 *
vecSum0 = vecA + vecC; /* vecSum0 = vaddq(vecA, vecC) */ vecSum0 = vecA + vecC; /* vecSum0 = vaddq(vecA, vecC) */
vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */ vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */
vecB = vldrwq_gather_base_f32(vecScGathAddr, 4); vecB = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 4);
vecD = vldrwq_gather_base_f32(vecScGathAddr, 12); vecD = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 12);
vecSum1 = vecB + vecD; vecSum1 = vecB + vecD;
vecDiff1 = vecB - vecD; vecDiff1 = vecB - vecD;
vecA = vldrwq_gather_base_wb_f32(&vecScGathAddr, 64); vecA = (f16x8_t)vldrwq_gather_base_wb_f32(&vecScGathAddr, 64);
vecC = vldrwq_gather_base_f32(vecScGathAddr, 8); vecC = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 8);
vecTmp0 = vecSum0 + vecSum1; vecTmp0 = vecSum0 + vecSum1;
vecTmp0 = vecTmp0 * onebyfftLen; vecTmp0 = vecTmp0 * onebyfftLen;
vstrwq_scatter_base_f32(vecScGathAddr, -64, vecTmp0); vstrwq_scatter_base_f32(vecScGathAddr, -64, (f32x4_t)vecTmp0);
vecTmp0 = vecSum0 - vecSum1; vecTmp0 = vecSum0 - vecSum1;
vecTmp0 = vecTmp0 * onebyfftLen; vecTmp0 = vecTmp0 * onebyfftLen;
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 4, vecTmp0); vstrwq_scatter_base_f32(vecScGathAddr, -64 + 4, (f32x4_t)vecTmp0);
vecTmp0 = MVE_CMPLX_ADD_A_ixB(vecDiff0, vecDiff1); vecTmp0 = MVE_CMPLX_ADD_A_ixB(vecDiff0, vecDiff1);
vecTmp0 = vecTmp0 * onebyfftLen; vecTmp0 = vecTmp0 * onebyfftLen;
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 8, vecTmp0); vstrwq_scatter_base_f32(vecScGathAddr, -64 + 8, (f32x4_t)vecTmp0);
vecTmp0 = MVE_CMPLX_SUB_A_ixB(vecDiff0, vecDiff1); vecTmp0 = MVE_CMPLX_SUB_A_ixB(vecDiff0, vecDiff1);
vecTmp0 = vecTmp0 * onebyfftLen; vecTmp0 = vecTmp0 * onebyfftLen;
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 12, vecTmp0); vstrwq_scatter_base_f32(vecScGathAddr, -64 + 12, (f32x4_t)vecTmp0);
blkCnt--; blkCnt--;
} }

@ -30,7 +30,7 @@
#if defined(ARM_FLOAT16_SUPPORTED) #if defined(ARM_FLOAT16_SUPPORTED)
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void stage_rfft_f16( void stage_rfft_f16(
const arm_rfft_fast_instance_f16 * S, const arm_rfft_fast_instance_f16 * S,

@ -138,6 +138,12 @@ void arm_rfft_q15(
#include "arm_helium_utils.h" #include "arm_helium_utils.h"
#include "arm_vec_fft.h" #include "arm_vec_fft.h"
#if defined(__CMSIS_GCC_H)
#define MVE_CMPLX_MULT_FX_AxB_S16(A,B) vqdmladhxq_s16(vqdmlsdhq_s16((__typeof(A))vuninitializedq_s16(), A, B), A, B)
#define MVE_CMPLX_MULT_FX_AxConjB_S16(A,B) vqdmladhq_s16(vqdmlsdhxq_s16((__typeof(A))vuninitializedq_s16(), A, B), A, B)
#endif
void arm_split_rfft_q15( void arm_split_rfft_q15(
q15_t * pSrc, q15_t * pSrc,
uint32_t fftLen, uint32_t fftLen,
@ -157,7 +163,7 @@ void arm_split_rfft_q15(
0, 1, 0, 1, 0, 1, 0, 1 0, 1, 0, 1, 0, 1, 0, 1
}; };
offsetCoef = vmulq(vld1q(offsetCoefArr), modifier) + vld1q(offsetCoefArr + 8); offsetCoef = vmulq_n_u16(vld1q_u16(offsetCoefArr), modifier) + vld1q_u16(offsetCoefArr + 8);
offsetIn = vaddq_n_u16(offsetIn, (2 * fftLen - 8)); offsetIn = vaddq_n_u16(offsetIn, (2 * fftLen - 8));
/* Init coefficient pointers */ /* Init coefficient pointers */
@ -173,15 +179,19 @@ void arm_split_rfft_q15(
i = fftLen - 1U; i = fftLen - 1U;
i = i / 4 + 1; i = i / 4 + 1;
while (i > 0U) { while (i > 0U) {
q15x8_t in1 = vld1q(pIn1); q15x8_t in1 = vld1q_s16(pIn1);
q15x8_t in2 = vldrhq_gather_shifted_offset_s16(pSrc, offsetIn); q15x8_t in2 = vldrhq_gather_shifted_offset_s16(pSrc, offsetIn);
q15x8_t coefA = vldrhq_gather_shifted_offset_s16(pCoefAb, offsetCoef); q15x8_t coefA = vldrhq_gather_shifted_offset_s16(pCoefAb, offsetCoef);
q15x8_t coefB = vldrhq_gather_shifted_offset_s16(pCoefBb, offsetCoef); q15x8_t coefB = vldrhq_gather_shifted_offset_s16(pCoefBb, offsetCoef);
q15x8_t out = vhaddq(MVE_CMPLX_MULT_FX_AxB(in1, coefA), #if defined(__CMSIS_GCC_H)
q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxB_S16(in1, coefA),
MVE_CMPLX_MULT_FX_AxConjB_S16(coefB, in2));
#else
q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxB(in1, coefA),
MVE_CMPLX_MULT_FX_AxConjB(coefB, in2)); MVE_CMPLX_MULT_FX_AxConjB(coefB, in2));
#endif
vst1q(pOut1, out); vst1q_s16(pOut1, out);
pOut1 += 8; pOut1 += 8;
offsetCoef = vaddq_n_u16(offsetCoef, modifier * 8); offsetCoef = vaddq_n_u16(offsetCoef, modifier * 8);
@ -379,7 +389,7 @@ void arm_split_rifft_q15(
0, 1, 0, 1, 0, 1, 0, 1 0, 1, 0, 1, 0, 1, 0, 1
}; };
offsetCoef = vmulq(vld1q(offsetCoefArr), modifier) + vld1q(offsetCoefArr + 8); offsetCoef = vmulq_n_u16(vld1q_u16(offsetCoefArr), modifier) + vld1q_u16(offsetCoefArr + 8);
offset = vaddq_n_u16(offset, (2 * fftLen - 6)); offset = vaddq_n_u16(offset, (2 * fftLen - 6));
@ -397,16 +407,16 @@ void arm_split_rifft_q15(
i = i / 4; i = i / 4;
while (i > 0U) { while (i > 0U) {
q15x8_t in1 = vld1q(pIn1); q15x8_t in1 = vld1q_s16(pIn1);
q15x8_t in2 = vldrhq_gather_shifted_offset_s16(pSrc, offset); q15x8_t in2 = vldrhq_gather_shifted_offset_s16(pSrc, offset);
q15x8_t coefA = vldrhq_gather_shifted_offset_s16(pCoefAb, offsetCoef); q15x8_t coefA = vldrhq_gather_shifted_offset_s16(pCoefAb, offsetCoef);
q15x8_t coefB = vldrhq_gather_shifted_offset_s16(pCoefBb, offsetCoef); q15x8_t coefB = vldrhq_gather_shifted_offset_s16(pCoefBb, offsetCoef);
/* can we avoid the conjugate here ? */ /* can we avoid the conjugate here ? */
q15x8_t out = vhaddq(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA), q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA),
vmulq(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB))); vmulq(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB)));
vst1q(pDst, out); vst1q_s16(pDst, out);
pDst += 8; pDst += 8;
offsetCoef = vaddq_n_u16(offsetCoef, modifier * 8); offsetCoef = vaddq_n_u16(offsetCoef, modifier * 8);

@ -136,6 +136,13 @@ void arm_rfft_q31(
#include "arm_helium_utils.h" #include "arm_helium_utils.h"
#include "arm_vec_fft.h" #include "arm_vec_fft.h"
#if defined(__CMSIS_GCC_H)
#define MVE_CMPLX_MULT_FX_AxB_S32(A,B) vqdmladhxq_s32(vqdmlsdhq_s32((__typeof(A))vuninitializedq_s32(), A, B), A, B)
#define MVE_CMPLX_MULT_FX_AxConjB_S32(A,B) vqdmladhq_s32(vqdmlsdhxq_s32((__typeof(A))vuninitializedq_s32(), A, B), A, B)
#endif
void arm_split_rfft_q31( void arm_split_rfft_q31(
q31_t *pSrc, q31_t *pSrc,
uint32_t fftLen, uint32_t fftLen,
@ -167,13 +174,15 @@ void arm_split_rfft_q31(
i = fftLen - 1U; i = fftLen - 1U;
i = i / 2 + 1; i = i / 2 + 1;
while (i > 0U) { while (i > 0U) {
q31x4_t in1 = vld1q(pIn1); q31x4_t in1 = vld1q_s32(pIn1);
q31x4_t in2 = vldrwq_gather_shifted_offset_s32(pSrc, offset); q31x4_t in2 = vldrwq_gather_shifted_offset_s32(pSrc, offset);
q31x4_t coefA = vldrwq_gather_shifted_offset_s32(pCoefAb, offsetCoef); q31x4_t coefA = vldrwq_gather_shifted_offset_s32(pCoefAb, offsetCoef);
q31x4_t coefB = vldrwq_gather_shifted_offset_s32(pCoefBb, offsetCoef); q31x4_t coefB = vldrwq_gather_shifted_offset_s32(pCoefBb, offsetCoef);
#if defined(__CMSIS_GCC_H)
q31x4_t out = vhaddq(MVE_CMPLX_MULT_FX_AxB(in1, coefA),MVE_CMPLX_MULT_FX_AxConjB(coefB, in2)); q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB_S32(in1, coefA),MVE_CMPLX_MULT_FX_AxConjB_S32(coefB, in2));
#else
q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB(in1, coefA),MVE_CMPLX_MULT_FX_AxConjB(coefB, in2));
#endif
vst1q(pOut1, out); vst1q(pOut1, out);
pOut1 += 4; pOut1 += 4;
@ -321,16 +330,20 @@ void arm_split_rifft_q31(
i = fftLen; i = fftLen;
i = i >> 1; i = i >> 1;
while (i > 0U) { while (i > 0U) {
q31x4_t in1 = vld1q(pIn1); q31x4_t in1 = vld1q_s32(pIn1);
q31x4_t in2 = vldrwq_gather_shifted_offset_s32(pSrc, offset); q31x4_t in2 = vldrwq_gather_shifted_offset_s32(pSrc, offset);
q31x4_t coefA = vldrwq_gather_shifted_offset_s32(pCoefAb, offsetCoef); q31x4_t coefA = vldrwq_gather_shifted_offset_s32(pCoefAb, offsetCoef);
q31x4_t coefB = vldrwq_gather_shifted_offset_s32(pCoefBb, offsetCoef); q31x4_t coefB = vldrwq_gather_shifted_offset_s32(pCoefBb, offsetCoef);
/* can we avoid the conjugate here ? */ /* can we avoid the conjugate here ? */
q31x4_t out = vhaddq(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA), #if defined(__CMSIS_GCC_H)
vmulq(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB))); q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB_S32(in1, coefA),
vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB_S32(in2, coefB)));
vst1q(pDst, out); #else
q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA),
vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB)));
#endif
vst1q_s32(pDst, out);
pDst += 4; pDst += 4;
offsetCoef += modifier * 4; offsetCoef += modifier * 4;

@ -1,8 +1,8 @@
#include "dsp/filtering_functions_f16.h"
#include "Test.h" #include "Test.h"
#include "Pattern.h" #include "Pattern.h"
#include "dsp/filtering_functions_f16.h"
class MISCF16:public Client::Suite class MISCF16:public Client::Suite
{ {
public: public:

@ -1,7 +1,6 @@
#include "FastMathF16.h" #include "FastMathF16.h"
#include <stdio.h> #include <stdio.h>
#include "Error.h" #include "Error.h"
#include "arm_vec_math_f16.h"
#include "Test.h" #include "Test.h"

@ -1,7 +1,9 @@
#include "arm_vec_math.h"
#include "FastMathF32.h" #include "FastMathF32.h"
#include <stdio.h> #include <stdio.h>
#include "Error.h" #include "Error.h"
#include "arm_vec_math.h"
#include "Test.h" #include "Test.h"

@ -1,7 +1,6 @@
#include "MISCF16.h" #include "MISCF16.h"
#include <stdio.h> #include <stdio.h>
#include "Error.h" #include "Error.h"
#include "arm_vec_math.h"
#include "Test.h" #include "Test.h"
#define SNR_THRESHOLD 60 #define SNR_THRESHOLD 60

@ -1,7 +1,8 @@
#include "arm_vec_math.h"
#include "MISCF32.h" #include "MISCF32.h"
#include <stdio.h> #include <stdio.h>
#include "Error.h" #include "Error.h"
#include "arm_vec_math.h"
#include "Test.h" #include "Test.h"
#define SNR_THRESHOLD 120 #define SNR_THRESHOLD 120

@ -1,7 +1,8 @@
#include "arm_vec_math.h"
#include "MISCQ15.h" #include "MISCQ15.h"
#include <stdio.h> #include <stdio.h>
#include "Error.h" #include "Error.h"
#include "arm_vec_math.h"
#include "Test.h" #include "Test.h"
#define SNR_THRESHOLD 70 #define SNR_THRESHOLD 70

@ -1,7 +1,8 @@
#include "arm_vec_math.h"
#include "MISCQ31.h" #include "MISCQ31.h"
#include <stdio.h> #include <stdio.h>
#include "Error.h" #include "Error.h"
#include "arm_vec_math.h"
#include "Test.h" #include "Test.h"
#define SNR_THRESHOLD 100 #define SNR_THRESHOLD 100

@ -1,7 +1,8 @@
#include "arm_vec_math.h"
#include "MISCQ7.h" #include "MISCQ7.h"
#include <stdio.h> #include <stdio.h>
#include "Error.h" #include "Error.h"
#include "arm_vec_math.h"
#include "Test.h" #include "Test.h"
#define SNR_THRESHOLD 15 #define SNR_THRESHOLD 15

Loading…
Cancel
Save