From 0bcb1384f27daf524deea05c508ad3bd722314fb Mon Sep 17 00:00:00 2001 From: Christophe Favergeon Date: Thu, 24 Sep 2020 15:15:13 +0200 Subject: [PATCH] CMSIS-DSP: More consistent use of the flag ARM_MATH_AUTOVECTORIZE --- Include/arm_common_tables.h | 6 +++--- Include/arm_helium_utils.h | 10 +++++----- Include/arm_math_types.h | 4 ++-- Include/arm_math_types_f16.h | 2 +- Include/arm_mve_tables.h | 4 ++-- Include/arm_vec_math.h | 2 +- Include/dsp/transform_functions.h | 8 ++++---- Source/BasicMathFunctions/arm_abs_q15.c | 2 +- Source/BasicMathFunctions/arm_abs_q31.c | 2 +- Source/BasicMathFunctions/arm_abs_q7.c | 2 +- Source/BasicMathFunctions/arm_add_q15.c | 2 +- Source/BasicMathFunctions/arm_add_q31.c | 2 +- Source/BasicMathFunctions/arm_add_q7.c | 2 +- Source/BasicMathFunctions/arm_dot_prod_q15.c | 2 +- Source/BasicMathFunctions/arm_dot_prod_q31.c | 2 +- Source/BasicMathFunctions/arm_dot_prod_q7.c | 2 +- Source/BasicMathFunctions/arm_mult_q15.c | 2 +- Source/BasicMathFunctions/arm_mult_q31.c | 2 +- Source/BasicMathFunctions/arm_mult_q7.c | 2 +- Source/BasicMathFunctions/arm_negate_q15.c | 2 +- Source/BasicMathFunctions/arm_negate_q31.c | 2 +- Source/BasicMathFunctions/arm_negate_q7.c | 2 +- Source/BasicMathFunctions/arm_offset_q15.c | 2 +- Source/BasicMathFunctions/arm_offset_q31.c | 2 +- Source/BasicMathFunctions/arm_offset_q7.c | 2 +- Source/BasicMathFunctions/arm_scale_q15.c | 2 +- Source/BasicMathFunctions/arm_scale_q31.c | 2 +- Source/BasicMathFunctions/arm_scale_q7.c | 2 +- Source/BasicMathFunctions/arm_shift_q15.c | 2 +- Source/BasicMathFunctions/arm_shift_q31.c | 2 +- Source/BasicMathFunctions/arm_shift_q7.c | 2 +- Source/BasicMathFunctions/arm_sub_q15.c | 2 +- Source/BasicMathFunctions/arm_sub_q31.c | 2 +- Source/BasicMathFunctions/arm_sub_q7.c | 2 +- Source/CommonTables/arm_common_tables.c | 4 ++-- Source/CommonTables/arm_const_structs.c | 4 ++-- Source/CommonTables/arm_mve_tables.c | 4 ++-- .../ComplexMathFunctions.c | 2 +- .../ComplexMathFunctions/arm_cmplx_conj_q15.c | 2 +- .../ComplexMathFunctions/arm_cmplx_conj_q31.c | 2 +- .../arm_cmplx_dot_prod_q15.c | 2 +- .../arm_cmplx_dot_prod_q31.c | 2 +- .../ComplexMathFunctions/arm_cmplx_mag_q15.c | 2 +- .../ComplexMathFunctions/arm_cmplx_mag_q31.c | 2 +- .../arm_cmplx_mag_squared_q15.c | 2 +- .../arm_cmplx_mag_squared_q31.c | 2 +- .../arm_cmplx_mult_cmplx_q15.c | 2 +- .../arm_cmplx_mult_cmplx_q31.c | 2 +- .../arm_cmplx_mult_real_q15.c | 2 +- .../arm_cmplx_mult_real_q31.c | 2 +- .../arm_boolean_distance_template.h | 2 +- .../arm_biquad_cascade_df1_32x64_q31.c | 2 +- .../arm_biquad_cascade_df1_q15.c | 2 +- .../arm_biquad_cascade_df1_q31.c | 2 +- Source/FilteringFunctions/arm_conv_q15.c | 2 +- Source/FilteringFunctions/arm_conv_q31.c | 2 +- Source/FilteringFunctions/arm_conv_q7.c | 2 +- Source/FilteringFunctions/arm_correlate_q15.c | 2 +- Source/FilteringFunctions/arm_correlate_q31.c | 2 +- Source/FilteringFunctions/arm_correlate_q7.c | 7 +++++-- .../FilteringFunctions/arm_fir_decimate_q15.c | 2 +- .../FilteringFunctions/arm_fir_decimate_q31.c | 2 +- .../arm_fir_interpolate_q15.c | 2 +- .../arm_fir_interpolate_q31.c | 2 +- Source/FilteringFunctions/arm_fir_q15.c | 2 +- Source/FilteringFunctions/arm_fir_q31.c | 2 +- Source/FilteringFunctions/arm_fir_q7.c | 2 +- Source/MatrixFunctions/arm_mat_add_q15.c | 2 +- Source/MatrixFunctions/arm_mat_add_q31.c | 2 +- .../MatrixFunctions/arm_mat_cmplx_mult_q15.c | 2 +- .../MatrixFunctions/arm_mat_cmplx_mult_q31.c | 2 +- .../MatrixFunctions/arm_mat_cmplx_trans_q15.c | 2 +- .../MatrixFunctions/arm_mat_cmplx_trans_q31.c | 2 +- Source/MatrixFunctions/arm_mat_mult_q15.c | 2 +- Source/MatrixFunctions/arm_mat_mult_q31.c | 2 +- Source/MatrixFunctions/arm_mat_mult_q7.c | 2 +- Source/MatrixFunctions/arm_mat_scale_q15.c | 2 +- Source/MatrixFunctions/arm_mat_scale_q31.c | 2 +- Source/MatrixFunctions/arm_mat_sub_q15.c | 2 +- Source/MatrixFunctions/arm_mat_sub_q31.c | 2 +- Source/MatrixFunctions/arm_mat_trans_q15.c | 2 +- Source/MatrixFunctions/arm_mat_trans_q31.c | 2 +- Source/MatrixFunctions/arm_mat_trans_q7.c | 2 +- Source/MatrixFunctions/arm_mat_vec_mult_f32.c | 8 +++++++- Source/MatrixFunctions/arm_mat_vec_mult_q15.c | 2 +- Source/MatrixFunctions/arm_mat_vec_mult_q31.c | 2 +- Source/MatrixFunctions/arm_mat_vec_mult_q7.c | 9 ++++++++- Source/StatisticsFunctions/arm_max_q15.c | 2 +- Source/StatisticsFunctions/arm_max_q31.c | 2 +- Source/StatisticsFunctions/arm_max_q7.c | 2 +- Source/StatisticsFunctions/arm_mean_q15.c | 2 +- Source/StatisticsFunctions/arm_mean_q31.c | 2 +- Source/StatisticsFunctions/arm_mean_q7.c | 2 +- Source/StatisticsFunctions/arm_min_q15.c | 2 +- Source/StatisticsFunctions/arm_min_q31.c | 2 +- Source/StatisticsFunctions/arm_min_q7.c | 2 +- Source/StatisticsFunctions/arm_power_q15.c | 2 +- Source/StatisticsFunctions/arm_power_q31.c | 2 +- Source/StatisticsFunctions/arm_power_q7.c | 2 +- Source/StatisticsFunctions/arm_rms_q15.c | 2 +- Source/StatisticsFunctions/arm_rms_q31.c | 2 +- Source/StatisticsFunctions/arm_std_q15.c | 2 +- Source/StatisticsFunctions/arm_std_q31.c | 2 +- Source/StatisticsFunctions/arm_var_q15.c | 2 +- Source/StatisticsFunctions/arm_var_q31.c | 2 +- Source/SupportFunctions/arm_copy_q15.c | 2 +- Source/SupportFunctions/arm_copy_q31.c | 2 +- Source/SupportFunctions/arm_copy_q7.c | 2 +- Source/SupportFunctions/arm_fill_q15.c | 2 +- Source/SupportFunctions/arm_fill_q31.c | 2 +- Source/SupportFunctions/arm_fill_q7.c | 2 +- Source/SupportFunctions/arm_q15_to_q31.c | 2 +- Source/SupportFunctions/arm_q15_to_q7.c | 2 +- Source/SupportFunctions/arm_q31_to_q15.c | 2 +- Source/SupportFunctions/arm_q31_to_q7.c | 2 +- Source/SupportFunctions/arm_q7_to_q15.c | 2 +- Source/SupportFunctions/arm_q7_to_q31.c | 2 +- Source/SupportFunctions/arm_quick_sort_f32.c | 2 +- Source/TransformFunctions/arm_cfft_init_q15.c | 2 +- Source/TransformFunctions/arm_cfft_init_q31.c | 2 +- Source/TransformFunctions/arm_cfft_q15.c | 2 +- Source/TransformFunctions/arm_cfft_q31.c | 2 +- Source/TransformFunctions/arm_rfft_init_q15.c | 18 +++++++++--------- Source/TransformFunctions/arm_rfft_init_q31.c | 18 +++++++++--------- Source/TransformFunctions/arm_rfft_q15.c | 6 +++--- Source/TransformFunctions/arm_rfft_q31.c | 6 +++--- Testing/CMakeLists.txt | 4 ++++ Testing/Source/Tests/FIRQ15.cpp | 6 +++--- Testing/Source/Tests/FIRQ31.cpp | 6 +++--- Testing/Source/Tests/FIRQ7.cpp | 6 +++--- Testing/TestScripts/Regression/Commands.py | 6 +++--- Testing/TestScripts/doc/Format.py | 2 +- Testing/createDb.sql | 1 + Testing/runAllTests.py | 6 +++--- 134 files changed, 198 insertions(+), 177 deletions(-) diff --git a/Include/arm_common_tables.h b/Include/arm_common_tables.h index 40b351b4..6abb568f 100644 --- a/Include/arm_common_tables.h +++ b/Include/arm_common_tables.h @@ -498,13 +498,13 @@ extern "C" extern const q15_t sinTable_q15[FAST_MATH_TABLE_SIZE + 1]; #endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */ - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q31_MVE) extern const q31_t sqrtTable_Q31[256]; #endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */ #endif - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q15_MVE) extern const q15_t sqrtTable_Q15[256]; #endif /* !defined(ARM_DSP_CONFIG_TABLES) defined(ARM_ALL_FAST_TABLES) */ @@ -517,7 +517,7 @@ extern "C" extern const float32_t __logf_lut_f32[8]; #endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) */ -#if (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) +#if (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) extern const unsigned char hwLUT[256]; #endif /* (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) */ diff --git a/Include/arm_helium_utils.h b/Include/arm_helium_utils.h index 915ad7ca..ae207fdc 100755 --- a/Include/arm_helium_utils.h +++ b/Include/arm_helium_utils.h @@ -39,7 +39,7 @@ extern "C" Definitions available for MVEF and MVEI ***************************************/ -#if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI) +#if (defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI)) && !defined(ARM_MATH_AUTOVECTORIZE) #define INACTIVELANE 0 /* inactive lane content */ @@ -51,7 +51,7 @@ Definitions available for MVEF and MVEI Definitions available for MVEF only ***************************************/ -#if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) +#if (defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) __STATIC_FORCEINLINE float32_t vecAddAcrossF32Mve(float32x4_t in) { @@ -103,7 +103,7 @@ __STATIC_FORCEINLINE float16_t vecAddAcrossF16Mve(float16x8_t in) Definitions available for f16 datatype with HW acceleration only ***************************************/ -#if defined (ARM_MATH_MVE_FLOAT16) +#if defined (ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) __STATIC_FORCEINLINE float16x8_t __mve_cmplx_sum_intra_vec_f16( float16x8_t vecIn) { @@ -181,7 +181,7 @@ __STATIC_FORCEINLINE void mve_cmplx_sum_intra_vec_f16( Definitions available for MVEI and MVEF only ***************************************/ -#if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI) +#if (defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI)) && !defined(ARM_MATH_AUTOVECTORIZE) /* Following functions are used to transpose matrix in f32 and q31 cases */ __STATIC_INLINE arm_status arm_mat_trans_32bit_2x2_mve( uint32_t * pDataSrc, @@ -596,7 +596,7 @@ __STATIC_INLINE arm_status arm_mat_cmplx_trans_16bit( Definitions available for MVEI only ***************************************/ -#if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEI) +#if (defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEI)) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_common_tables.h" diff --git a/Include/arm_math_types.h b/Include/arm_math_types.h index 95a17e35..8929807d 100755 --- a/Include/arm_math_types.h +++ b/Include/arm_math_types.h @@ -294,7 +294,7 @@ extern "C" /** * @brief vector types */ -#if defined(ARM_MATH_NEON) || defined (ARM_MATH_MVEI) +#if defined(ARM_MATH_NEON) || (defined (ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)) /** * @brief 64-bit fractional 128-bit vector data type in 1.63 format */ @@ -378,7 +378,7 @@ extern "C" #endif -#if defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF) /* floating point vector*/ +#if defined(ARM_MATH_NEON) || (defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)) /* floating point vector*/ /** * @brief 32-bit floating-point 128-bit vector type */ diff --git a/Include/arm_math_types_f16.h b/Include/arm_math_types_f16.h index a36c2c52..f43271dd 100755 --- a/Include/arm_math_types_f16.h +++ b/Include/arm_math_types_f16.h @@ -60,7 +60,7 @@ won't be built. #endif #endif -#if defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF) /* floating point vector*/ +#if defined(ARM_MATH_NEON) || (defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)) /* floating point vector*/ #if defined(ARM_MATH_MVE_FLOAT16) || defined(ARM_MATH_NEON_FLOAT16) /** diff --git a/Include/arm_mve_tables.h b/Include/arm_mve_tables.h index 6cf8eafc..d75e1026 100755 --- a/Include/arm_mve_tables.h +++ b/Include/arm_mve_tables.h @@ -100,7 +100,7 @@ extern float32_t rearranged_twiddle_stride3_4096_f32[2728]; -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) @@ -161,7 +161,7 @@ extern q31_t rearranged_twiddle_stride3_4096_q31[2728]; -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) diff --git a/Include/arm_vec_math.h b/Include/arm_vec_math.h index e1f9b6ff..eb484706 100755 --- a/Include/arm_vec_math.h +++ b/Include/arm_vec_math.h @@ -295,7 +295,7 @@ __STATIC_INLINE f32x4_t vpowq_f32( #endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)*/ -#if (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) +#if (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) #endif /* (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) */ #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) diff --git a/Include/dsp/transform_functions.h b/Include/dsp/transform_functions.h index b6e7284d..6e778948 100755 --- a/Include/dsp/transform_functions.h +++ b/Include/dsp/transform_functions.h @@ -217,7 +217,7 @@ extern "C" const q15_t *pTwiddle; /**< points to the Twiddle factor table. */ const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ uint16_t bitRevLength; /**< bit reversal table length. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) const uint32_t *rearranged_twiddle_tab_stride1_arr; /**< Per stage reordered twiddle pointer (offset 1) */ \ const uint32_t *rearranged_twiddle_tab_stride2_arr; /**< Per stage reordered twiddle pointer (offset 2) */ \ const uint32_t *rearranged_twiddle_tab_stride3_arr; /**< Per stage reordered twiddle pointer (offset 3) */ \ @@ -246,7 +246,7 @@ void arm_cfft_q15( const q31_t *pTwiddle; /**< points to the Twiddle factor table. */ const uint16_t *pBitRevTable; /**< points to the bit reversal table. */ uint16_t bitRevLength; /**< bit reversal table length. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) const uint32_t *rearranged_twiddle_tab_stride1_arr; /**< Per stage reordered twiddle pointer (offset 1) */ \ const uint32_t *rearranged_twiddle_tab_stride2_arr; /**< Per stage reordered twiddle pointer (offset 2) */ \ const uint32_t *rearranged_twiddle_tab_stride3_arr; /**< Per stage reordered twiddle pointer (offset 3) */ \ @@ -330,7 +330,7 @@ void arm_cfft_q31( uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ const q15_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ const q15_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) arm_cfft_instance_q15 cfftInst; #else const arm_cfft_instance_q15 *pCfft; /**< points to the complex FFT instance. */ @@ -359,7 +359,7 @@ void arm_cfft_q31( uint32_t twidCoefRModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */ const q31_t *pTwiddleAReal; /**< points to the real twiddle factor table. */ const q31_t *pTwiddleBReal; /**< points to the imag twiddle factor table. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) arm_cfft_instance_q31 cfftInst; #else const arm_cfft_instance_q31 *pCfft; /**< points to the complex FFT instance. */ diff --git a/Source/BasicMathFunctions/arm_abs_q15.c b/Source/BasicMathFunctions/arm_abs_q15.c index 971ddd36..439ce6fc 100644 --- a/Source/BasicMathFunctions/arm_abs_q15.c +++ b/Source/BasicMathFunctions/arm_abs_q15.c @@ -49,7 +49,7 @@ The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_abs_q31.c b/Source/BasicMathFunctions/arm_abs_q31.c index c32171fb..a2478689 100644 --- a/Source/BasicMathFunctions/arm_abs_q31.c +++ b/Source/BasicMathFunctions/arm_abs_q31.c @@ -49,7 +49,7 @@ The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_abs_q7.c b/Source/BasicMathFunctions/arm_abs_q7.c index e5ec87b7..a17b6c1a 100644 --- a/Source/BasicMathFunctions/arm_abs_q7.c +++ b/Source/BasicMathFunctions/arm_abs_q7.c @@ -51,7 +51,7 @@ The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_add_q15.c b/Source/BasicMathFunctions/arm_add_q15.c index c3d18816..0ac6541b 100644 --- a/Source/BasicMathFunctions/arm_add_q15.c +++ b/Source/BasicMathFunctions/arm_add_q15.c @@ -50,7 +50,7 @@ Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_add_q31.c b/Source/BasicMathFunctions/arm_add_q31.c index d9f0ea3a..8a0f33b0 100644 --- a/Source/BasicMathFunctions/arm_add_q31.c +++ b/Source/BasicMathFunctions/arm_add_q31.c @@ -50,7 +50,7 @@ Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_add_q7.c b/Source/BasicMathFunctions/arm_add_q7.c index 6342c2ea..34777ccc 100644 --- a/Source/BasicMathFunctions/arm_add_q7.c +++ b/Source/BasicMathFunctions/arm_add_q7.c @@ -51,7 +51,7 @@ Results outside of the allowable Q7 range [0x80 0x7F] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_dot_prod_q15.c b/Source/BasicMathFunctions/arm_dot_prod_q15.c index 65076898..ee86dc02 100644 --- a/Source/BasicMathFunctions/arm_dot_prod_q15.c +++ b/Source/BasicMathFunctions/arm_dot_prod_q15.c @@ -52,7 +52,7 @@ there is no risk of overflow. The return result is in 34.30 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_dot_prod_q31.c b/Source/BasicMathFunctions/arm_dot_prod_q31.c index f7bebb49..208a9851 100644 --- a/Source/BasicMathFunctions/arm_dot_prod_q31.c +++ b/Source/BasicMathFunctions/arm_dot_prod_q31.c @@ -54,7 +54,7 @@ The return result is in 16.48 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_dot_prod_q7.c b/Source/BasicMathFunctions/arm_dot_prod_q7.c index 2acbc760..e5c3f498 100644 --- a/Source/BasicMathFunctions/arm_dot_prod_q7.c +++ b/Source/BasicMathFunctions/arm_dot_prod_q7.c @@ -53,7 +53,7 @@ The return result is in 18.14 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_mult_q15.c b/Source/BasicMathFunctions/arm_mult_q15.c index 1985e72d..4c46fe92 100644 --- a/Source/BasicMathFunctions/arm_mult_q15.c +++ b/Source/BasicMathFunctions/arm_mult_q15.c @@ -49,7 +49,7 @@ The function uses saturating arithmetic. Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_mult_q31.c b/Source/BasicMathFunctions/arm_mult_q31.c index 437da243..3edf5f0f 100644 --- a/Source/BasicMathFunctions/arm_mult_q31.c +++ b/Source/BasicMathFunctions/arm_mult_q31.c @@ -49,7 +49,7 @@ The function uses saturating arithmetic. Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_mult_q7.c b/Source/BasicMathFunctions/arm_mult_q7.c index 1c0d5191..29eb9e71 100644 --- a/Source/BasicMathFunctions/arm_mult_q7.c +++ b/Source/BasicMathFunctions/arm_mult_q7.c @@ -49,7 +49,7 @@ The function uses saturating arithmetic. Results outside of the allowable Q7 range [0x80 0x7F] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_negate_q15.c b/Source/BasicMathFunctions/arm_negate_q15.c index e1c4fd58..9a3e7285 100644 --- a/Source/BasicMathFunctions/arm_negate_q15.c +++ b/Source/BasicMathFunctions/arm_negate_q15.c @@ -50,7 +50,7 @@ The function uses saturating arithmetic. The Q15 value -1 (0x8000) is saturated to the maximum allowable positive value 0x7FFF. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_negate_q31.c b/Source/BasicMathFunctions/arm_negate_q31.c index 1ac10fb8..9f86dc93 100644 --- a/Source/BasicMathFunctions/arm_negate_q31.c +++ b/Source/BasicMathFunctions/arm_negate_q31.c @@ -49,7 +49,7 @@ The Q31 value -1 (0x80000000) is saturated to the maximum allowable positive value 0x7FFFFFFF. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_negate_q7.c b/Source/BasicMathFunctions/arm_negate_q7.c index cdf9b340..4c155c9f 100644 --- a/Source/BasicMathFunctions/arm_negate_q7.c +++ b/Source/BasicMathFunctions/arm_negate_q7.c @@ -48,7 +48,7 @@ The function uses saturating arithmetic. The Q7 value -1 (0x80) is saturated to the maximum allowable positive value 0x7F. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_offset_q15.c b/Source/BasicMathFunctions/arm_offset_q15.c index 9e3689d1..900e9a0c 100644 --- a/Source/BasicMathFunctions/arm_offset_q15.c +++ b/Source/BasicMathFunctions/arm_offset_q15.c @@ -49,7 +49,7 @@ The function uses saturating arithmetic. Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_offset_q31.c b/Source/BasicMathFunctions/arm_offset_q31.c index 411c7c5d..e57a069d 100644 --- a/Source/BasicMathFunctions/arm_offset_q31.c +++ b/Source/BasicMathFunctions/arm_offset_q31.c @@ -50,7 +50,7 @@ Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_offset_q7.c b/Source/BasicMathFunctions/arm_offset_q7.c index 6f646976..861b297a 100644 --- a/Source/BasicMathFunctions/arm_offset_q7.c +++ b/Source/BasicMathFunctions/arm_offset_q7.c @@ -49,7 +49,7 @@ The function uses saturating arithmetic. Results outside of the allowable Q7 range [0x80 0x7F] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_scale_q15.c b/Source/BasicMathFunctions/arm_scale_q15.c index 1ba4e741..480cb818 100644 --- a/Source/BasicMathFunctions/arm_scale_q15.c +++ b/Source/BasicMathFunctions/arm_scale_q15.c @@ -51,7 +51,7 @@ These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to 1.15 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_scale_q31.c b/Source/BasicMathFunctions/arm_scale_q31.c index 456d773c..d24a01de 100644 --- a/Source/BasicMathFunctions/arm_scale_q31.c +++ b/Source/BasicMathFunctions/arm_scale_q31.c @@ -51,7 +51,7 @@ These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to 1.31 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_scale_q7.c b/Source/BasicMathFunctions/arm_scale_q7.c index f5893a82..bc276b50 100644 --- a/Source/BasicMathFunctions/arm_scale_q7.c +++ b/Source/BasicMathFunctions/arm_scale_q7.c @@ -51,7 +51,7 @@ These are multiplied to yield a 2.14 intermediate result and this is shifted with saturation to 1.7 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_shift_q15.c b/Source/BasicMathFunctions/arm_shift_q15.c index 528c9b35..c3449de7 100644 --- a/Source/BasicMathFunctions/arm_shift_q15.c +++ b/Source/BasicMathFunctions/arm_shift_q15.c @@ -50,7 +50,7 @@ Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_shift_q31.c b/Source/BasicMathFunctions/arm_shift_q31.c index 4b15c864..c1775013 100644 --- a/Source/BasicMathFunctions/arm_shift_q31.c +++ b/Source/BasicMathFunctions/arm_shift_q31.c @@ -67,7 +67,7 @@ Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_shift_q7.c b/Source/BasicMathFunctions/arm_shift_q7.c index c4f03d5b..ac516689 100644 --- a/Source/BasicMathFunctions/arm_shift_q7.c +++ b/Source/BasicMathFunctions/arm_shift_q7.c @@ -52,7 +52,7 @@ Results outside of the allowable Q7 range [0x80 0x7F] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_sub_q15.c b/Source/BasicMathFunctions/arm_sub_q15.c index 5a8e77fc..e3610919 100644 --- a/Source/BasicMathFunctions/arm_sub_q15.c +++ b/Source/BasicMathFunctions/arm_sub_q15.c @@ -50,7 +50,7 @@ Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_sub_q31.c b/Source/BasicMathFunctions/arm_sub_q31.c index 522aa696..8b5f85ac 100644 --- a/Source/BasicMathFunctions/arm_sub_q31.c +++ b/Source/BasicMathFunctions/arm_sub_q31.c @@ -50,7 +50,7 @@ Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/BasicMathFunctions/arm_sub_q7.c b/Source/BasicMathFunctions/arm_sub_q7.c index 86755b62..99962cc2 100644 --- a/Source/BasicMathFunctions/arm_sub_q7.c +++ b/Source/BasicMathFunctions/arm_sub_q7.c @@ -49,7 +49,7 @@ The function uses saturating arithmetic. Results outside of the allowable Q7 range [0x80 0x7F] will be saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/CommonTables/arm_common_tables.c b/Source/CommonTables/arm_common_tables.c index 7717f368..1b5c8174 100644 --- a/Source/CommonTables/arm_common_tables.c +++ b/Source/CommonTables/arm_common_tables.c @@ -70383,7 +70383,7 @@ const q15_t sinTable_q15[FAST_MATH_TABLE_SIZE + 1] = { }; #endif /* defined(ARM_ALL_FAST_TABLES) */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q31_MVE) const q31_t sqrtTable_Q31[256] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @@ -70535,7 +70535,7 @@ const float32_t __logf_lut_f32[8] = { #endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) */ -#if (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) +#if (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) /* haming weight LUT for bytes */ #define B2(n) n, n + 1, n + 1, n + 2 diff --git a/Source/CommonTables/arm_const_structs.c b/Source/CommonTables/arm_const_structs.c index ac6ee9f9..574255ed 100644 --- a/Source/CommonTables/arm_const_structs.c +++ b/Source/CommonTables/arm_const_structs.c @@ -154,7 +154,7 @@ const arm_cfft_instance_f32 arm_cfft_sR_f32_len4096 = { /* Fixed-point structs */ -#if !defined(ARM_MATH_MVEI) +#if !defined(ARM_MATH_MVEI) || defined(ARM_MATH_AUTOVECTORIZE) /* @@ -417,7 +417,7 @@ const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len4096 = { /* Fixed-point structs */ /* q31_t */ -#if !defined(ARM_MATH_MVEI) +#if !defined(ARM_MATH_MVEI) || defined(ARM_MATH_AUTOVECTORIZE) /* diff --git a/Source/CommonTables/arm_mve_tables.c b/Source/CommonTables/arm_mve_tables.c index efdfecb2..b77fc488 100755 --- a/Source/CommonTables/arm_mve_tables.c +++ b/Source/CommonTables/arm_mve_tables.c @@ -3764,7 +3764,7 @@ float32_t rearranged_twiddle_stride3_4096_f32[2728]={ #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) @@ -5429,7 +5429,7 @@ q31_t rearranged_twiddle_stride3_4096_q31[2728]={ #endif /* defined(ARM_MATH_MVEI) */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) diff --git a/Source/ComplexMathFunctions/ComplexMathFunctions.c b/Source/ComplexMathFunctions/ComplexMathFunctions.c index 17f58cfa..ad9b63c0 100644 --- a/Source/ComplexMathFunctions/ComplexMathFunctions.c +++ b/Source/ComplexMathFunctions/ComplexMathFunctions.c @@ -34,7 +34,7 @@ #include "arm_cmplx_dot_prod_q31.c" #include "arm_cmplx_mag_f32.c" -#if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEI) +#if (defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEI)) && !defined(ARM_MATH_AUTOVECTORIZE) #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_FAST_SQRT_Q15_MVE) #include "arm_cmplx_mag_q15.c" #endif diff --git a/Source/ComplexMathFunctions/arm_cmplx_conj_q15.c b/Source/ComplexMathFunctions/arm_cmplx_conj_q15.c index a78429c8..9e51ee65 100644 --- a/Source/ComplexMathFunctions/arm_cmplx_conj_q15.c +++ b/Source/ComplexMathFunctions/arm_cmplx_conj_q15.c @@ -50,7 +50,7 @@ */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_cmplx_conj_q15( const q15_t * pSrc, q15_t * pDst, diff --git a/Source/ComplexMathFunctions/arm_cmplx_conj_q31.c b/Source/ComplexMathFunctions/arm_cmplx_conj_q31.c index ba50394a..14702367 100644 --- a/Source/ComplexMathFunctions/arm_cmplx_conj_q31.c +++ b/Source/ComplexMathFunctions/arm_cmplx_conj_q31.c @@ -49,7 +49,7 @@ The Q31 value -1 (0x80000000) is saturated to the maximum allowable positive value 0x7FFFFFFF. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_cmplx_conj_q31( const q31_t * pSrc, diff --git a/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c b/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c index b4d930dc..84324412 100644 --- a/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c +++ b/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c @@ -54,7 +54,7 @@ The return results realResult and imagResult are in 8.24 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_cmplx_dot_prod_q15( const q15_t * pSrcA, const q15_t * pSrcB, diff --git a/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c b/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c index 618349c1..61618ca9 100644 --- a/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c +++ b/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c @@ -55,7 +55,7 @@ Input down scaling is not required. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_cmplx_dot_prod_q31( const q31_t * pSrcA, diff --git a/Source/ComplexMathFunctions/arm_cmplx_mag_q15.c b/Source/ComplexMathFunctions/arm_cmplx_mag_q15.c index 28cf7b91..c75bad87 100644 --- a/Source/ComplexMathFunctions/arm_cmplx_mag_q15.c +++ b/Source/ComplexMathFunctions/arm_cmplx_mag_q15.c @@ -47,7 +47,7 @@ @par Scaling and Overflow Behavior The function implements 1.15 by 1.15 multiplications and finally output is converted into 2.14 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/ComplexMathFunctions/arm_cmplx_mag_q31.c b/Source/ComplexMathFunctions/arm_cmplx_mag_q31.c index 5db9a9ec..1174e564 100644 --- a/Source/ComplexMathFunctions/arm_cmplx_mag_q31.c +++ b/Source/ComplexMathFunctions/arm_cmplx_mag_q31.c @@ -49,7 +49,7 @@ Input down scaling is not required. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c b/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c index a0ea4b19..325cbbc9 100644 --- a/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c +++ b/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c @@ -48,7 +48,7 @@ The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_cmplx_mag_squared_q15( const q15_t * pSrc, diff --git a/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c b/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c index c3ffd016..d0ec128e 100644 --- a/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c +++ b/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c @@ -49,7 +49,7 @@ Input down scaling is not required. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_cmplx_mag_squared_q31( const q31_t * pSrc, diff --git a/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c b/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c index 71f81497..69c1a457 100644 --- a/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c +++ b/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c @@ -49,7 +49,7 @@ The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_cmplx_mult_cmplx_q15( const q15_t * pSrcA, diff --git a/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c b/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c index 94b90207..b2910786 100644 --- a/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c +++ b/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c @@ -50,7 +50,7 @@ Input down scaling is not required. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_cmplx_mult_cmplx_q31( const q31_t * pSrcA, const q31_t * pSrcB, diff --git a/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c b/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c index 39a95c24..3097d634 100644 --- a/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c +++ b/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c @@ -49,7 +49,7 @@ The function uses saturating arithmetic. Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_cmplx_mult_real_q15( const q15_t * pSrcCmplx, diff --git a/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c b/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c index 63546153..b566afb2 100644 --- a/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c +++ b/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c @@ -50,7 +50,7 @@ Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_cmplx_mult_real_q31( const q31_t * pSrcCmplx, const q31_t * pSrcReal, diff --git a/Source/DistanceFunctions/arm_boolean_distance_template.h b/Source/DistanceFunctions/arm_boolean_distance_template.h index 414aca4b..f04d789f 100755 --- a/Source/DistanceFunctions/arm_boolean_distance_template.h +++ b/Source/DistanceFunctions/arm_boolean_distance_template.h @@ -62,7 +62,7 @@ * */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_common_tables.h" diff --git a/Source/FilteringFunctions/arm_biquad_cascade_df1_32x64_q31.c b/Source/FilteringFunctions/arm_biquad_cascade_df1_32x64_q31.c index dd44c7ab..386a10cc 100644 --- a/Source/FilteringFunctions/arm_biquad_cascade_df1_32x64_q31.c +++ b/Source/FilteringFunctions/arm_biquad_cascade_df1_32x64_q31.c @@ -172,7 +172,7 @@ - \ref arm_biquad_cascade_df1_fast_q31() implements a Biquad cascade with 32-bit coefficients and state variables with a Q31 accumulator. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" void arm_biquad_cas_df1_32x64_q31( diff --git a/Source/FilteringFunctions/arm_biquad_cascade_df1_q15.c b/Source/FilteringFunctions/arm_biquad_cascade_df1_q15.c index 3485ebe7..1862b2bb 100644 --- a/Source/FilteringFunctions/arm_biquad_cascade_df1_q15.c +++ b/Source/FilteringFunctions/arm_biquad_cascade_df1_q15.c @@ -56,7 +56,7 @@ Refer to \ref arm_biquad_cascade_df1_fast_q15() for a faster but less precise implementation of this filter. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_biquad_cascade_df1_q15( const arm_biquad_casd_df1_inst_q15 * S, diff --git a/Source/FilteringFunctions/arm_biquad_cascade_df1_q31.c b/Source/FilteringFunctions/arm_biquad_cascade_df1_q31.c index a77f338e..5584dc0f 100644 --- a/Source/FilteringFunctions/arm_biquad_cascade_df1_q31.c +++ b/Source/FilteringFunctions/arm_biquad_cascade_df1_q31.c @@ -55,7 +55,7 @@ @remark Refer to \ref arm_biquad_cascade_df1_fast_q31() for a faster but less precise implementation of this filter. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_biquad_cascade_df1_q31( const arm_biquad_casd_df1_inst_q31 * S, diff --git a/Source/FilteringFunctions/arm_conv_q15.c b/Source/FilteringFunctions/arm_conv_q15.c index e9311051..410fa408 100644 --- a/Source/FilteringFunctions/arm_conv_q15.c +++ b/Source/FilteringFunctions/arm_conv_q15.c @@ -58,7 +58,7 @@ @remark Refer to \ref arm_conv_opt_q15() for a faster implementation of this function using scratch buffers. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" #include "arm_vec_filtering.h" diff --git a/Source/FilteringFunctions/arm_conv_q31.c b/Source/FilteringFunctions/arm_conv_q31.c index d3b13de5..435486f2 100644 --- a/Source/FilteringFunctions/arm_conv_q31.c +++ b/Source/FilteringFunctions/arm_conv_q31.c @@ -59,7 +59,7 @@ @remark Refer to \ref arm_conv_fast_q31() for a faster but less precise implementation of this function. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" #include "arm_vec_filtering.h" diff --git a/Source/FilteringFunctions/arm_conv_q7.c b/Source/FilteringFunctions/arm_conv_q7.c index 58a65056..8974bac0 100644 --- a/Source/FilteringFunctions/arm_conv_q7.c +++ b/Source/FilteringFunctions/arm_conv_q7.c @@ -55,7 +55,7 @@ @remark Refer to \ref arm_conv_opt_q7() for a faster implementation of this function. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" #include "arm_vec_filtering.h" diff --git a/Source/FilteringFunctions/arm_correlate_q15.c b/Source/FilteringFunctions/arm_correlate_q15.c index 63e9b879..270eda70 100644 --- a/Source/FilteringFunctions/arm_correlate_q15.c +++ b/Source/FilteringFunctions/arm_correlate_q15.c @@ -58,7 +58,7 @@ @remark Refer to \ref arm_correlate_opt_q15() for a faster implementation of this function using scratch buffers. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" #include "arm_vec_filtering.h" diff --git a/Source/FilteringFunctions/arm_correlate_q31.c b/Source/FilteringFunctions/arm_correlate_q31.c index 90425517..c6bb38ec 100644 --- a/Source/FilteringFunctions/arm_correlate_q31.c +++ b/Source/FilteringFunctions/arm_correlate_q31.c @@ -59,7 +59,7 @@ @remark Refer to \ref arm_correlate_fast_q31() for a faster but less precise implementation of this function. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" #include "arm_vec_filtering.h" void arm_correlate_q31( diff --git a/Source/FilteringFunctions/arm_correlate_q7.c b/Source/FilteringFunctions/arm_correlate_q7.c index 8d5420dc..42bc3449 100644 --- a/Source/FilteringFunctions/arm_correlate_q7.c +++ b/Source/FilteringFunctions/arm_correlate_q7.c @@ -56,7 +56,7 @@ @remark Refer to \ref arm_correlate_opt_q7() for a faster implementation of this function. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" #include "arm_vec_filtering.h" @@ -884,7 +884,10 @@ void arm_correlate_q7( k = count; #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - +/* Temporary fix for bug in clang */ +#if defined(ARM_MATH_MVEF) && defined(ARM_MATH_AUTOVECTORIZE) + #pragma clang loop vectorize(disable) +#endif while (k > 0U) { /* Perform the multiply-accumulate */ diff --git a/Source/FilteringFunctions/arm_fir_decimate_q15.c b/Source/FilteringFunctions/arm_fir_decimate_q15.c index fe7cc46d..55c49b14 100644 --- a/Source/FilteringFunctions/arm_fir_decimate_q15.c +++ b/Source/FilteringFunctions/arm_fir_decimate_q15.c @@ -57,7 +57,7 @@ Refer to \ref arm_fir_decimate_fast_q15() for a faster but less precise implementation of this function. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/FilteringFunctions/arm_fir_decimate_q31.c b/Source/FilteringFunctions/arm_fir_decimate_q31.c index 692b88e9..b82c9423 100644 --- a/Source/FilteringFunctions/arm_fir_decimate_q31.c +++ b/Source/FilteringFunctions/arm_fir_decimate_q31.c @@ -56,7 +56,7 @@ Refer to \ref arm_fir_decimate_fast_q31() for a faster but less precise implementation of this function. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/FilteringFunctions/arm_fir_interpolate_q15.c b/Source/FilteringFunctions/arm_fir_interpolate_q15.c index a58d5d89..a70f27dd 100644 --- a/Source/FilteringFunctions/arm_fir_interpolate_q15.c +++ b/Source/FilteringFunctions/arm_fir_interpolate_q15.c @@ -54,7 +54,7 @@ Lastly, the accumulator is saturated to yield a result in 1.15 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" void arm_fir_interpolate_q15( diff --git a/Source/FilteringFunctions/arm_fir_interpolate_q31.c b/Source/FilteringFunctions/arm_fir_interpolate_q31.c index 56983faf..7fe17144 100644 --- a/Source/FilteringFunctions/arm_fir_interpolate_q31.c +++ b/Source/FilteringFunctions/arm_fir_interpolate_q31.c @@ -54,7 +54,7 @@ After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" void arm_fir_interpolate_q31( diff --git a/Source/FilteringFunctions/arm_fir_q15.c b/Source/FilteringFunctions/arm_fir_q15.c index 42fa9f2b..208f67c3 100644 --- a/Source/FilteringFunctions/arm_fir_q15.c +++ b/Source/FilteringFunctions/arm_fir_q15.c @@ -56,7 +56,7 @@ @remark Refer to \ref arm_fir_fast_q15() for a faster but less precise implementation of this function. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #define MVE_ASRL_SAT16(acc, shift) ((sqrshrl_sat48(acc, -(32-shift)) >> 32) & 0xffffffff) diff --git a/Source/FilteringFunctions/arm_fir_q31.c b/Source/FilteringFunctions/arm_fir_q31.c index 5002325c..bf406350 100644 --- a/Source/FilteringFunctions/arm_fir_q31.c +++ b/Source/FilteringFunctions/arm_fir_q31.c @@ -55,7 +55,7 @@ @remark Refer to \ref arm_fir_fast_q31() for a faster but less precise implementation of this filter. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/FilteringFunctions/arm_fir_q7.c b/Source/FilteringFunctions/arm_fir_q7.c index 378051fe..bb2aaf5b 100644 --- a/Source/FilteringFunctions/arm_fir_q7.c +++ b/Source/FilteringFunctions/arm_fir_q7.c @@ -54,7 +54,7 @@ Finally, the result is truncated to 1.7 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_fir_q7_1_16_mve(const arm_fir_instance_q7 * S, const q7_t * pSrc, q7_t * pDst, uint32_t blockSize) { diff --git a/Source/MatrixFunctions/arm_mat_add_q15.c b/Source/MatrixFunctions/arm_mat_add_q15.c index 66fe3c83..73d3942f 100644 --- a/Source/MatrixFunctions/arm_mat_add_q15.c +++ b/Source/MatrixFunctions/arm_mat_add_q15.c @@ -50,7 +50,7 @@ The function uses saturating arithmetic. Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) arm_status arm_mat_add_q15( const arm_matrix_instance_q15 * pSrcA, diff --git a/Source/MatrixFunctions/arm_mat_add_q31.c b/Source/MatrixFunctions/arm_mat_add_q31.c index 60724da5..ed3f8a51 100644 --- a/Source/MatrixFunctions/arm_mat_add_q31.c +++ b/Source/MatrixFunctions/arm_mat_add_q31.c @@ -50,7 +50,7 @@ The function uses saturating arithmetic. Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) arm_status arm_mat_add_q31( const arm_matrix_instance_q31 * pSrcA, const arm_matrix_instance_q31 * pSrcB, diff --git a/Source/MatrixFunctions/arm_mat_cmplx_mult_q15.c b/Source/MatrixFunctions/arm_mat_cmplx_mult_q15.c index 3e10ec77..449e4bd0 100644 --- a/Source/MatrixFunctions/arm_mat_cmplx_mult_q15.c +++ b/Source/MatrixFunctions/arm_mat_cmplx_mult_q15.c @@ -57,7 +57,7 @@ This approach provides 33 guard bits and there is no risk of overflow. The 34.30 result is then truncated to 34.15 format by discarding the low 15 bits and then saturated to 1.15 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #define MVE_ASRL_SAT16(acc, shift) ((sqrshrl_sat48(acc, -(32-shift)) >> 32) & 0xffffffff) diff --git a/Source/MatrixFunctions/arm_mat_cmplx_mult_q31.c b/Source/MatrixFunctions/arm_mat_cmplx_mult_q31.c index ad07c732..afd141ec 100644 --- a/Source/MatrixFunctions/arm_mat_cmplx_mult_q31.c +++ b/Source/MatrixFunctions/arm_mat_cmplx_mult_q31.c @@ -56,7 +56,7 @@ to avoid overflows, as a total of numColsA additions are performed internally. The 2.62 accumulator is right shifted by 31 bits and saturated to 1.31 format to yield the final result. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/MatrixFunctions/arm_mat_cmplx_trans_q15.c b/Source/MatrixFunctions/arm_mat_cmplx_trans_q15.c index 00058b71..3e2bae29 100755 --- a/Source/MatrixFunctions/arm_mat_cmplx_trans_q15.c +++ b/Source/MatrixFunctions/arm_mat_cmplx_trans_q15.c @@ -45,7 +45,7 @@ - \ref ARM_MATH_SUCCESS : Operation successful - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/MatrixFunctions/arm_mat_cmplx_trans_q31.c b/Source/MatrixFunctions/arm_mat_cmplx_trans_q31.c index 63422c5c..1fda260a 100755 --- a/Source/MatrixFunctions/arm_mat_cmplx_trans_q31.c +++ b/Source/MatrixFunctions/arm_mat_cmplx_trans_q31.c @@ -47,7 +47,7 @@ - \ref ARM_MATH_SUCCESS : Operation successful - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/MatrixFunctions/arm_mat_mult_q15.c b/Source/MatrixFunctions/arm_mat_mult_q15.c index c592be06..c67e2913 100644 --- a/Source/MatrixFunctions/arm_mat_mult_q15.c +++ b/Source/MatrixFunctions/arm_mat_mult_q15.c @@ -57,7 +57,7 @@ @par Refer to \ref arm_mat_mult_fast_q15() for a faster but less precise version of this function. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #define MVE_ASRL_SAT16(acc, shift) ((sqrshrl_sat48(acc, -(32-shift)) >> 32) & 0xffffffff) diff --git a/Source/MatrixFunctions/arm_mat_mult_q31.c b/Source/MatrixFunctions/arm_mat_mult_q31.c index 5d85ca6e..08f06d74 100644 --- a/Source/MatrixFunctions/arm_mat_mult_q31.c +++ b/Source/MatrixFunctions/arm_mat_mult_q31.c @@ -58,7 +58,7 @@ @remark Refer to \ref arm_mat_mult_fast_q31() for a faster but less precise implementation of this function. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #define MATRIX_DIM2 2 #define MATRIX_DIM3 3 diff --git a/Source/MatrixFunctions/arm_mat_mult_q7.c b/Source/MatrixFunctions/arm_mat_mult_q7.c index ade638f3..58708896 100755 --- a/Source/MatrixFunctions/arm_mat_mult_q7.c +++ b/Source/MatrixFunctions/arm_mat_mult_q7.c @@ -53,7 +53,7 @@ * * */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) __STATIC_FORCEINLINE arm_status arm_mat_mult_q7_2x2_mve( const arm_matrix_instance_q7 * pSrcA, const arm_matrix_instance_q7 * pSrcB, diff --git a/Source/MatrixFunctions/arm_mat_scale_q15.c b/Source/MatrixFunctions/arm_mat_scale_q15.c index 0303cb92..d4fa1bdd 100644 --- a/Source/MatrixFunctions/arm_mat_scale_q15.c +++ b/Source/MatrixFunctions/arm_mat_scale_q15.c @@ -51,7 +51,7 @@ The input data *pSrc and scaleFract are in 1.15 format. These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to 1.15 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) arm_status arm_mat_scale_q15( const arm_matrix_instance_q15 * pSrc, q15_t scaleFract, diff --git a/Source/MatrixFunctions/arm_mat_scale_q31.c b/Source/MatrixFunctions/arm_mat_scale_q31.c index c7710f56..ca09a111 100644 --- a/Source/MatrixFunctions/arm_mat_scale_q31.c +++ b/Source/MatrixFunctions/arm_mat_scale_q31.c @@ -51,7 +51,7 @@ The input data *pSrc and scaleFract are in 1.31 format. These are multiplied to yield a 2.62 intermediate result which is shifted with saturation to 1.31 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) arm_status arm_mat_scale_q31( const arm_matrix_instance_q31 * pSrc, q31_t scaleFract, diff --git a/Source/MatrixFunctions/arm_mat_sub_q15.c b/Source/MatrixFunctions/arm_mat_sub_q15.c index 5740919a..1077196f 100644 --- a/Source/MatrixFunctions/arm_mat_sub_q15.c +++ b/Source/MatrixFunctions/arm_mat_sub_q15.c @@ -50,7 +50,7 @@ The function uses saturating arithmetic. Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) arm_status arm_mat_sub_q15( const arm_matrix_instance_q15 * pSrcA, diff --git a/Source/MatrixFunctions/arm_mat_sub_q31.c b/Source/MatrixFunctions/arm_mat_sub_q31.c index 510d486f..909ce9a2 100644 --- a/Source/MatrixFunctions/arm_mat_sub_q31.c +++ b/Source/MatrixFunctions/arm_mat_sub_q31.c @@ -50,7 +50,7 @@ The function uses saturating arithmetic. Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) arm_status arm_mat_sub_q31( const arm_matrix_instance_q31 * pSrcA, const arm_matrix_instance_q31 * pSrcB, diff --git a/Source/MatrixFunctions/arm_mat_trans_q15.c b/Source/MatrixFunctions/arm_mat_trans_q15.c index a46dd917..12c4adba 100644 --- a/Source/MatrixFunctions/arm_mat_trans_q15.c +++ b/Source/MatrixFunctions/arm_mat_trans_q15.c @@ -46,7 +46,7 @@ - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/MatrixFunctions/arm_mat_trans_q31.c b/Source/MatrixFunctions/arm_mat_trans_q31.c index edb8cc45..604d8a46 100644 --- a/Source/MatrixFunctions/arm_mat_trans_q31.c +++ b/Source/MatrixFunctions/arm_mat_trans_q31.c @@ -45,7 +45,7 @@ - \ref ARM_MATH_SUCCESS : Operation successful - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/MatrixFunctions/arm_mat_trans_q7.c b/Source/MatrixFunctions/arm_mat_trans_q7.c index 4918c2c5..ee841946 100755 --- a/Source/MatrixFunctions/arm_mat_trans_q7.c +++ b/Source/MatrixFunctions/arm_mat_trans_q7.c @@ -45,7 +45,7 @@ - \ref ARM_MATH_SUCCESS : Operation successful - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) arm_status arm_mat_trans_q7(const arm_matrix_instance_q7 *pSrc, arm_matrix_instance_q7 *pDst) { diff --git a/Source/MatrixFunctions/arm_mat_vec_mult_f32.c b/Source/MatrixFunctions/arm_mat_vec_mult_f32.c index d83d827a..664d4bd2 100755 --- a/Source/MatrixFunctions/arm_mat_vec_mult_f32.c +++ b/Source/MatrixFunctions/arm_mat_vec_mult_f32.c @@ -27,6 +27,7 @@ #include "dsp/matrix_functions.h" + /** * @ingroup groupMatrix */ @@ -283,6 +284,7 @@ void arm_mat_vec_mult_f32( } } #else + void arm_mat_vec_mult_f32(const arm_matrix_instance_f32 *pSrcMat, const float32_t *pVec, float32_t *pDst) { uint32_t numRows = pSrcMat->numRows; @@ -365,7 +367,6 @@ void arm_mat_vec_mult_f32(const arm_matrix_instance_f32 *pSrcMat, const float32_ pInA1 = pSrcA + i; colCnt = numCols >> 1; - while (colCnt > 0) { vecData = *(pInVec)++; vecData2 = *(pInVec)++; @@ -377,6 +378,11 @@ void arm_mat_vec_mult_f32(const arm_matrix_instance_f32 *pSrcMat, const float32_ } // process remainder of row colCnt = numCols & 1u; + +/* Temporary fix for bug in clang */ +#if defined(ARM_MATH_MVEF) && defined(ARM_MATH_AUTOVECTORIZE) + #pragma clang loop vectorize(disable) +#endif while (colCnt > 0) { sum += *pInA1++ * *pInVec++; colCnt--; diff --git a/Source/MatrixFunctions/arm_mat_vec_mult_q15.c b/Source/MatrixFunctions/arm_mat_vec_mult_q15.c index f236bde9..ac448323 100755 --- a/Source/MatrixFunctions/arm_mat_vec_mult_q15.c +++ b/Source/MatrixFunctions/arm_mat_vec_mult_q15.c @@ -44,7 +44,7 @@ * @param[in] *pVec points to input vector * @param[out] *pDst points to output vector */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/MatrixFunctions/arm_mat_vec_mult_q31.c b/Source/MatrixFunctions/arm_mat_vec_mult_q31.c index 49dfe666..6c911b46 100755 --- a/Source/MatrixFunctions/arm_mat_vec_mult_q31.c +++ b/Source/MatrixFunctions/arm_mat_vec_mult_q31.c @@ -44,7 +44,7 @@ * @param[in] *pVec points to the input vector * @param[out] *pDst points to the output vector */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_mat_vec_mult_q31( const arm_matrix_instance_q31 * pSrcMat, const q31_t *pSrcVec, diff --git a/Source/MatrixFunctions/arm_mat_vec_mult_q7.c b/Source/MatrixFunctions/arm_mat_vec_mult_q7.c index 821f8fce..0ae2ddf2 100755 --- a/Source/MatrixFunctions/arm_mat_vec_mult_q7.c +++ b/Source/MatrixFunctions/arm_mat_vec_mult_q7.c @@ -44,7 +44,7 @@ * @param[in] *pVec points to the input vector * @param[out] *pDst points to the output vector */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" @@ -295,6 +295,8 @@ void arm_mat_vec_mult_q7(const arm_matrix_instance_q7 *pSrcMat, const q7_t *pVec i = 0u; px = pDst; + + /* The following loop performs the dot-product of each row in pSrcA with the vector */ while (row > 0) { /* For every row wise process, the pInVec pointer is set @@ -318,6 +320,7 @@ void arm_mat_vec_mult_q7(const arm_matrix_instance_q7 *pSrcMat, const q7_t *pVec // Inner loop: matrix-vector multiplication + while (colCnt > 0u) { // Read 4 values from vector vecData = read_q7x4_ia ((q7_t **) &pInVec); @@ -350,7 +353,11 @@ void arm_mat_vec_mult_q7(const arm_matrix_instance_q7 *pSrcMat, const q7_t *pVec } /* process any remaining columns */ + colCnt = numCols & 3u; +#if defined(ARM_MATH_MVEI) && defined(ARM_MATH_AUTOVECTORIZE) + #pragma clang loop vectorize(disable) +#endif while (colCnt > 0) { vecData = *pInVec++; sum1 += *pInA1++ * vecData; diff --git a/Source/StatisticsFunctions/arm_max_q15.c b/Source/StatisticsFunctions/arm_max_q15.c index 4d724c3f..0ae049b4 100644 --- a/Source/StatisticsFunctions/arm_max_q15.c +++ b/Source/StatisticsFunctions/arm_max_q15.c @@ -45,7 +45,7 @@ @param[out] pIndex index of maximum value returned here @return none */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/StatisticsFunctions/arm_max_q31.c b/Source/StatisticsFunctions/arm_max_q31.c index 73f78b34..9b4a7436 100644 --- a/Source/StatisticsFunctions/arm_max_q31.c +++ b/Source/StatisticsFunctions/arm_max_q31.c @@ -45,7 +45,7 @@ @param[out] pIndex index of maximum value returned here @return none */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/StatisticsFunctions/arm_max_q7.c b/Source/StatisticsFunctions/arm_max_q7.c index 596aa142..1bb2a1f9 100644 --- a/Source/StatisticsFunctions/arm_max_q7.c +++ b/Source/StatisticsFunctions/arm_max_q7.c @@ -45,7 +45,7 @@ @param[out] pIndex index of maximum value returned here @return none */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/StatisticsFunctions/arm_mean_q15.c b/Source/StatisticsFunctions/arm_mean_q15.c index 8df341f7..27e9cd73 100644 --- a/Source/StatisticsFunctions/arm_mean_q15.c +++ b/Source/StatisticsFunctions/arm_mean_q15.c @@ -53,7 +53,7 @@ Finally, the accumulator is truncated to yield a result of 1.15 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_mean_q15( const q15_t * pSrc, uint32_t blockSize, diff --git a/Source/StatisticsFunctions/arm_mean_q31.c b/Source/StatisticsFunctions/arm_mean_q31.c index 995a2482..f10a4d1b 100644 --- a/Source/StatisticsFunctions/arm_mean_q31.c +++ b/Source/StatisticsFunctions/arm_mean_q31.c @@ -52,7 +52,7 @@ full precision of intermediate result is preserved. Finally, the accumulator is truncated to yield a result of 1.31 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_mean_q31( const q31_t * pSrc, uint32_t blockSize, diff --git a/Source/StatisticsFunctions/arm_mean_q7.c b/Source/StatisticsFunctions/arm_mean_q7.c index f96ea179..78341f28 100644 --- a/Source/StatisticsFunctions/arm_mean_q7.c +++ b/Source/StatisticsFunctions/arm_mean_q7.c @@ -53,7 +53,7 @@ Finally, the accumulator is truncated to yield a result of 1.7 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_mean_q7( const q7_t * pSrc, diff --git a/Source/StatisticsFunctions/arm_min_q15.c b/Source/StatisticsFunctions/arm_min_q15.c index 90df0ede..f8ef0942 100644 --- a/Source/StatisticsFunctions/arm_min_q15.c +++ b/Source/StatisticsFunctions/arm_min_q15.c @@ -46,7 +46,7 @@ @param[out] pIndex index of minimum value returned here @return none */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/StatisticsFunctions/arm_min_q31.c b/Source/StatisticsFunctions/arm_min_q31.c index e38f5b4a..2b0c8127 100644 --- a/Source/StatisticsFunctions/arm_min_q31.c +++ b/Source/StatisticsFunctions/arm_min_q31.c @@ -46,7 +46,7 @@ @param[out] pIndex index of minimum value returned here @return none */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/StatisticsFunctions/arm_min_q7.c b/Source/StatisticsFunctions/arm_min_q7.c index dc53aab6..a107d390 100644 --- a/Source/StatisticsFunctions/arm_min_q7.c +++ b/Source/StatisticsFunctions/arm_min_q7.c @@ -46,7 +46,7 @@ @param[out] pIndex index of minimum value returned here @return none */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/StatisticsFunctions/arm_power_q15.c b/Source/StatisticsFunctions/arm_power_q15.c index d5e72822..335c5d3a 100644 --- a/Source/StatisticsFunctions/arm_power_q15.c +++ b/Source/StatisticsFunctions/arm_power_q15.c @@ -53,7 +53,7 @@ full precision of the intermediate multiplication is preserved. Finally, the return result is in 34.30 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_power_q15( const q15_t * pSrc, diff --git a/Source/StatisticsFunctions/arm_power_q31.c b/Source/StatisticsFunctions/arm_power_q31.c index acadf5c3..707c960b 100644 --- a/Source/StatisticsFunctions/arm_power_q31.c +++ b/Source/StatisticsFunctions/arm_power_q31.c @@ -54,7 +54,7 @@ full precision of the intermediate multiplication is preserved. Finally, the return result is in 16.48 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_power_q31( const q31_t * pSrc, uint32_t blockSize, diff --git a/Source/StatisticsFunctions/arm_power_q7.c b/Source/StatisticsFunctions/arm_power_q7.c index 87037bc6..bdeeaf73 100644 --- a/Source/StatisticsFunctions/arm_power_q7.c +++ b/Source/StatisticsFunctions/arm_power_q7.c @@ -53,7 +53,7 @@ full precision of the intermediate multiplication is preserved. Finally, the return result is in 18.14 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_power_q7( const q7_t * pSrc, uint32_t blockSize, diff --git a/Source/StatisticsFunctions/arm_rms_q15.c b/Source/StatisticsFunctions/arm_rms_q15.c index 716aae0e..f784fc9b 100644 --- a/Source/StatisticsFunctions/arm_rms_q15.c +++ b/Source/StatisticsFunctions/arm_rms_q15.c @@ -54,7 +54,7 @@ Finally, the 34.30 result is truncated to 34.15 format by discarding the lower 15 bits, and then saturated to yield a result in 1.15 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_rms_q15( const q15_t * pSrc, uint32_t blockSize, diff --git a/Source/StatisticsFunctions/arm_rms_q31.c b/Source/StatisticsFunctions/arm_rms_q31.c index c029c748..588d7117 100644 --- a/Source/StatisticsFunctions/arm_rms_q31.c +++ b/Source/StatisticsFunctions/arm_rms_q31.c @@ -56,7 +56,7 @@ log2(blockSize) bits, as a total of blockSize additions are performed internally. Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_rms_q31( const q31_t * pSrc, diff --git a/Source/StatisticsFunctions/arm_std_q15.c b/Source/StatisticsFunctions/arm_std_q15.c index a14d84bb..3864eba5 100644 --- a/Source/StatisticsFunctions/arm_std_q15.c +++ b/Source/StatisticsFunctions/arm_std_q15.c @@ -54,7 +54,7 @@ Finally, the 34.30 result is truncated to 34.15 format by discarding the lower 15 bits, and then saturated to yield a result in 1.15 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_std_q15( const q15_t * pSrc, uint32_t blockSize, diff --git a/Source/StatisticsFunctions/arm_std_q31.c b/Source/StatisticsFunctions/arm_std_q31.c index eb3a0e80..fa610784 100644 --- a/Source/StatisticsFunctions/arm_std_q31.c +++ b/Source/StatisticsFunctions/arm_std_q31.c @@ -57,7 +57,7 @@ After division, internal variables should be Q18.46 Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_std_q31( const q31_t * pSrc, uint32_t blockSize, diff --git a/Source/StatisticsFunctions/arm_var_q15.c b/Source/StatisticsFunctions/arm_var_q15.c index e693a714..903fe93b 100644 --- a/Source/StatisticsFunctions/arm_var_q15.c +++ b/Source/StatisticsFunctions/arm_var_q15.c @@ -54,7 +54,7 @@ Finally, the 34.30 result is truncated to 34.15 format by discarding the lower 15 bits, and then saturated to yield a result in 1.15 format. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_var_q15( const q15_t * pSrc, uint32_t blockSize, diff --git a/Source/StatisticsFunctions/arm_var_q31.c b/Source/StatisticsFunctions/arm_var_q31.c index 4562809a..2a9b2225 100644 --- a/Source/StatisticsFunctions/arm_var_q31.c +++ b/Source/StatisticsFunctions/arm_var_q31.c @@ -57,7 +57,7 @@ After division, internal variables should be Q18.46 Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_var_q31( const q31_t * pSrc, uint32_t blockSize, diff --git a/Source/SupportFunctions/arm_copy_q15.c b/Source/SupportFunctions/arm_copy_q15.c index d9479318..750185fe 100644 --- a/Source/SupportFunctions/arm_copy_q15.c +++ b/Source/SupportFunctions/arm_copy_q15.c @@ -44,7 +44,7 @@ @param[in] blockSize number of samples in each vector @return none */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_copy_q15( const q15_t * pSrc, q15_t * pDst, diff --git a/Source/SupportFunctions/arm_copy_q31.c b/Source/SupportFunctions/arm_copy_q31.c index 4bbf5691..6bc02d43 100644 --- a/Source/SupportFunctions/arm_copy_q31.c +++ b/Source/SupportFunctions/arm_copy_q31.c @@ -44,7 +44,7 @@ @param[in] blockSize number of samples in each vector @return none */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_copy_q31( const q31_t * pSrc, q31_t * pDst, diff --git a/Source/SupportFunctions/arm_copy_q7.c b/Source/SupportFunctions/arm_copy_q7.c index 89977f0e..1190a8fb 100644 --- a/Source/SupportFunctions/arm_copy_q7.c +++ b/Source/SupportFunctions/arm_copy_q7.c @@ -44,7 +44,7 @@ @param[in] blockSize number of samples in each vector @return none */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_copy_q7( const q7_t * pSrc, q7_t * pDst, diff --git a/Source/SupportFunctions/arm_fill_q15.c b/Source/SupportFunctions/arm_fill_q15.c index ae4b4507..5228a7f4 100644 --- a/Source/SupportFunctions/arm_fill_q15.c +++ b/Source/SupportFunctions/arm_fill_q15.c @@ -44,7 +44,7 @@ @param[in] blockSize number of samples in each vector @return none */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_fill_q15( q15_t value, q15_t * pDst, diff --git a/Source/SupportFunctions/arm_fill_q31.c b/Source/SupportFunctions/arm_fill_q31.c index ce5dee34..2b333d55 100644 --- a/Source/SupportFunctions/arm_fill_q31.c +++ b/Source/SupportFunctions/arm_fill_q31.c @@ -44,7 +44,7 @@ @param[in] blockSize number of samples in each vector @return none */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_fill_q31( q31_t value, q31_t * pDst, diff --git a/Source/SupportFunctions/arm_fill_q7.c b/Source/SupportFunctions/arm_fill_q7.c index 8861f8f6..2f3bbd39 100644 --- a/Source/SupportFunctions/arm_fill_q7.c +++ b/Source/SupportFunctions/arm_fill_q7.c @@ -44,7 +44,7 @@ @param[in] blockSize number of samples in each vector @return none */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_fill_q7( q7_t value, q7_t * pDst, diff --git a/Source/SupportFunctions/arm_q15_to_q31.c b/Source/SupportFunctions/arm_q15_to_q31.c index 5f136d7a..25eac814 100644 --- a/Source/SupportFunctions/arm_q15_to_q31.c +++ b/Source/SupportFunctions/arm_q15_to_q31.c @@ -50,7 +50,7 @@ pDst[n] = (q31_t) pSrc[n] << 16; 0 <= n < blockSize. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_q15_to_q31( const q15_t * pSrc, q31_t * pDst, diff --git a/Source/SupportFunctions/arm_q15_to_q7.c b/Source/SupportFunctions/arm_q15_to_q7.c index deb31c3b..132e9a46 100644 --- a/Source/SupportFunctions/arm_q15_to_q7.c +++ b/Source/SupportFunctions/arm_q15_to_q7.c @@ -50,7 +50,7 @@ pDst[n] = (q7_t) pSrc[n] >> 8; 0 <= n < blockSize. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_q15_to_q7( const q15_t * pSrc, q7_t * pDst, diff --git a/Source/SupportFunctions/arm_q31_to_q15.c b/Source/SupportFunctions/arm_q31_to_q15.c index f7f9043e..c5d14751 100644 --- a/Source/SupportFunctions/arm_q31_to_q15.c +++ b/Source/SupportFunctions/arm_q31_to_q15.c @@ -50,7 +50,7 @@ pDst[n] = (q15_t) pSrc[n] >> 16; 0 <= n < blockSize. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_q31_to_q15( const q31_t * pSrc, q15_t * pDst, diff --git a/Source/SupportFunctions/arm_q31_to_q7.c b/Source/SupportFunctions/arm_q31_to_q7.c index 83f03f88..a0c3be9f 100644 --- a/Source/SupportFunctions/arm_q31_to_q7.c +++ b/Source/SupportFunctions/arm_q31_to_q7.c @@ -50,7 +50,7 @@ pDst[n] = (q7_t) pSrc[n] >> 24; 0 <= n < blockSize. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_q31_to_q7( const q31_t * pSrc, q7_t * pDst, diff --git a/Source/SupportFunctions/arm_q7_to_q15.c b/Source/SupportFunctions/arm_q7_to_q15.c index c235883e..0ec17f45 100644 --- a/Source/SupportFunctions/arm_q7_to_q15.c +++ b/Source/SupportFunctions/arm_q7_to_q15.c @@ -51,7 +51,7 @@ */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_q7_to_q15( const q7_t * pSrc, q15_t * pDst, diff --git a/Source/SupportFunctions/arm_q7_to_q31.c b/Source/SupportFunctions/arm_q7_to_q31.c index 249e457d..0b91de0f 100644 --- a/Source/SupportFunctions/arm_q7_to_q31.c +++ b/Source/SupportFunctions/arm_q7_to_q31.c @@ -50,7 +50,7 @@ pDst[n] = (q31_t) pSrc[n] << 24; 0 <= n < blockSize. */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_q7_to_q31( const q7_t * pSrc, q31_t * pDst, diff --git a/Source/SupportFunctions/arm_quick_sort_f32.c b/Source/SupportFunctions/arm_quick_sort_f32.c index 6e7e350f..b2c9059a 100644 --- a/Source/SupportFunctions/arm_quick_sort_f32.c +++ b/Source/SupportFunctions/arm_quick_sort_f32.c @@ -144,7 +144,7 @@ static void arm_quick_sort_core_f32(float32_t *pSrc, int32_t first, int32_t last * @par * In this implementation the Hoare partition scheme has been * used [Hoare, C. A. R. (1 January 1962). "Quicksort". The Computer - * Journal. 5 (1): 10–16.] The first element has always been chosen + * Journal. 5 (1): 10...16.] The first element has always been chosen * as the pivot. The partition algorithm guarantees that the returned * pivot is never placed outside the vector, since it is returned only * when the pointers crossed each other. In this way it isn't diff --git a/Source/TransformFunctions/arm_cfft_init_q15.c b/Source/TransformFunctions/arm_cfft_init_q15.c index 2b478f57..7dcce7fa 100755 --- a/Source/TransformFunctions/arm_cfft_init_q15.c +++ b/Source/TransformFunctions/arm_cfft_init_q15.c @@ -53,7 +53,7 @@ #include "arm_common_tables.h" #include "arm_const_structs.h" -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_vec_fft.h" #include "arm_mve_tables.h" diff --git a/Source/TransformFunctions/arm_cfft_init_q31.c b/Source/TransformFunctions/arm_cfft_init_q31.c index a68ee9d6..8f6820f4 100755 --- a/Source/TransformFunctions/arm_cfft_init_q31.c +++ b/Source/TransformFunctions/arm_cfft_init_q31.c @@ -53,7 +53,7 @@ #include "arm_common_tables.h" #include "arm_const_structs.h" -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_vec_fft.h" #include "arm_mve_tables.h" diff --git a/Source/TransformFunctions/arm_cfft_q15.c b/Source/TransformFunctions/arm_cfft_q15.c index 86c7e8a1..00503a6e 100644 --- a/Source/TransformFunctions/arm_cfft_q15.c +++ b/Source/TransformFunctions/arm_cfft_q15.c @@ -28,7 +28,7 @@ #include "dsp/transform_functions.h" -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_vec_fft.h" diff --git a/Source/TransformFunctions/arm_cfft_q31.c b/Source/TransformFunctions/arm_cfft_q31.c index 80a6d963..13c5d840 100644 --- a/Source/TransformFunctions/arm_cfft_q31.c +++ b/Source/TransformFunctions/arm_cfft_q31.c @@ -30,7 +30,7 @@ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_vec_fft.h" diff --git a/Source/TransformFunctions/arm_rfft_init_q15.c b/Source/TransformFunctions/arm_rfft_init_q15.c index e84fc6bf..a6882e04 100644 --- a/Source/TransformFunctions/arm_rfft_init_q15.c +++ b/Source/TransformFunctions/arm_rfft_init_q15.c @@ -100,7 +100,7 @@ arm_status arm_rfft_init_q15( case 8192U: S->twidCoefRModifier = 1U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q15(&(S->cfftInst),4096); if (status != ARM_MATH_SUCCESS) { @@ -115,7 +115,7 @@ arm_status arm_rfft_init_q15( case 4096U: S->twidCoefRModifier = 2U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q15(&(S->cfftInst),2048); if (status != ARM_MATH_SUCCESS) { @@ -130,7 +130,7 @@ arm_status arm_rfft_init_q15( case 2048U: S->twidCoefRModifier = 4U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q15(&(S->cfftInst),1024); if (status != ARM_MATH_SUCCESS) { @@ -145,7 +145,7 @@ arm_status arm_rfft_init_q15( case 1024U: S->twidCoefRModifier = 8U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q15(&(S->cfftInst),512); if (status != ARM_MATH_SUCCESS) { @@ -160,7 +160,7 @@ arm_status arm_rfft_init_q15( case 512U: S->twidCoefRModifier = 16U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q15(&(S->cfftInst),256); if (status != ARM_MATH_SUCCESS) { @@ -175,7 +175,7 @@ arm_status arm_rfft_init_q15( case 256U: S->twidCoefRModifier = 32U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q15(&(S->cfftInst),128); if (status != ARM_MATH_SUCCESS) { @@ -190,7 +190,7 @@ arm_status arm_rfft_init_q15( case 128U: S->twidCoefRModifier = 64U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q15(&(S->cfftInst),64); if (status != ARM_MATH_SUCCESS) { @@ -205,7 +205,7 @@ arm_status arm_rfft_init_q15( case 64U: S->twidCoefRModifier = 128U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q15(&(S->cfftInst),32); if (status != ARM_MATH_SUCCESS) { @@ -220,7 +220,7 @@ arm_status arm_rfft_init_q15( case 32U: S->twidCoefRModifier = 256U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q15(&(S->cfftInst),16); if (status != ARM_MATH_SUCCESS) { diff --git a/Source/TransformFunctions/arm_rfft_init_q31.c b/Source/TransformFunctions/arm_rfft_init_q31.c index 1325c1d8..8ad0acb2 100644 --- a/Source/TransformFunctions/arm_rfft_init_q31.c +++ b/Source/TransformFunctions/arm_rfft_init_q31.c @@ -104,7 +104,7 @@ arm_status arm_rfft_init_q31( S->twidCoefRModifier = 1U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q31(&(S->cfftInst),4096); if (status != ARM_MATH_SUCCESS) { @@ -119,7 +119,7 @@ arm_status arm_rfft_init_q31( case 4096U: S->twidCoefRModifier = 2U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q31(&(S->cfftInst),2048); if (status != ARM_MATH_SUCCESS) { @@ -134,7 +134,7 @@ arm_status arm_rfft_init_q31( case 2048U: S->twidCoefRModifier = 4U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q31(&(S->cfftInst),1024); if (status != ARM_MATH_SUCCESS) { @@ -148,7 +148,7 @@ arm_status arm_rfft_init_q31( #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_512) && defined(ARM_TABLE_BITREVIDX_FXT_512)) case 1024U: S->twidCoefRModifier = 8U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q31(&(S->cfftInst),512); if (status != ARM_MATH_SUCCESS) { @@ -162,7 +162,7 @@ arm_status arm_rfft_init_q31( #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_256) && defined(ARM_TABLE_BITREVIDX_FXT_256)) case 512U: S->twidCoefRModifier = 16U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q31(&(S->cfftInst),256); if (status != ARM_MATH_SUCCESS) { @@ -176,7 +176,7 @@ arm_status arm_rfft_init_q31( #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_128) && defined(ARM_TABLE_BITREVIDX_FXT_128)) case 256U: S->twidCoefRModifier = 32U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q31(&(S->cfftInst),128); if (status != ARM_MATH_SUCCESS) { @@ -190,7 +190,7 @@ arm_status arm_rfft_init_q31( #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_64) && defined(ARM_TABLE_BITREVIDX_FXT_64)) case 128U: S->twidCoefRModifier = 64U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q31(&(S->cfftInst),64); if (status != ARM_MATH_SUCCESS) { @@ -204,7 +204,7 @@ arm_status arm_rfft_init_q31( #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_32) && defined(ARM_TABLE_BITREVIDX_FXT_32)) case 64U: S->twidCoefRModifier = 128U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q31(&(S->cfftInst),32); if (status != ARM_MATH_SUCCESS) { @@ -218,7 +218,7 @@ arm_status arm_rfft_init_q31( #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_16) && defined(ARM_TABLE_BITREVIDX_FXT_16)) case 32U: S->twidCoefRModifier = 256U; - #if defined(ARM_MATH_MVEI) + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) status=arm_cfft_init_q31(&(S->cfftInst),16); if (status != ARM_MATH_SUCCESS) { diff --git a/Source/TransformFunctions/arm_rfft_q15.c b/Source/TransformFunctions/arm_rfft_q15.c index 43233304..6d68bf5e 100644 --- a/Source/TransformFunctions/arm_rfft_q15.c +++ b/Source/TransformFunctions/arm_rfft_q15.c @@ -84,7 +84,7 @@ void arm_rfft_q15( q15_t * pSrc, q15_t * pDst) { -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) const arm_cfft_instance_q15 *S_CFFT = &(S->cfftInst); #else const arm_cfft_instance_q15 *S_CFFT = S->pCfft; @@ -137,7 +137,7 @@ void arm_rfft_q15( The function implements a Real FFT */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_split_rfft_q15( q15_t * pSrc, uint32_t fftLen, @@ -402,7 +402,7 @@ void arm_split_rfft_q15( The function implements a Real IFFT */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_split_rifft_q15( q15_t * pSrc, diff --git a/Source/TransformFunctions/arm_rfft_q31.c b/Source/TransformFunctions/arm_rfft_q31.c index 274428e4..190ac91a 100644 --- a/Source/TransformFunctions/arm_rfft_q31.c +++ b/Source/TransformFunctions/arm_rfft_q31.c @@ -85,7 +85,7 @@ void arm_rfft_q31( q31_t * pSrc, q31_t * pDst) { -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) const arm_cfft_instance_q31 *S_CFFT = &(S->cfftInst); #else const arm_cfft_instance_q31 *S_CFFT = S->pCfft; @@ -135,7 +135,7 @@ void arm_rfft_q31( @return none */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_split_rfft_q31( q31_t *pSrc, @@ -338,7 +338,7 @@ void arm_split_rfft_q31( @return none */ -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_split_rifft_q31( q31_t * pSrc, diff --git a/Testing/CMakeLists.txt b/Testing/CMakeLists.txt index f30940b0..8db50d16 100644 --- a/Testing/CMakeLists.txt +++ b/Testing/CMakeLists.txt @@ -115,6 +115,10 @@ if (DISABLEFLOAT16) target_compile_definitions(FrameworkLib PRIVATE DISABLEFLOAT16) endif() +if (AUTOVECTORIZE) + target_compile_definitions(TestingLib PRIVATE ARM_MATH_AUTOVECTORIZE) +endif() + if (BENCHMARK) diff --git a/Testing/Source/Tests/FIRQ15.cpp b/Testing/Source/Tests/FIRQ15.cpp index d51e8274..bf786ad7 100644 --- a/Testing/Source/Tests/FIRQ15.cpp +++ b/Testing/Source/Tests/FIRQ15.cpp @@ -6,7 +6,7 @@ #define ABS_ERROR_Q15 ((q15_t)2) -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) static __ALIGNED(8) q15_t coeffArray[32]; #endif @@ -31,7 +31,7 @@ void checkInnerTail(q15_t *b) q15_t *outp = output.ptr(); unsigned long i; -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) int j; #endif int blockSize; @@ -50,7 +50,7 @@ void checkInnerTail(q15_t *b) blockSize = configp[0]; numTaps = configp[1]; -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) /* Copy coefficients and pad to zero */ memset(coeffArray,0,32); diff --git a/Testing/Source/Tests/FIRQ31.cpp b/Testing/Source/Tests/FIRQ31.cpp index 80ea681d..979709da 100644 --- a/Testing/Source/Tests/FIRQ31.cpp +++ b/Testing/Source/Tests/FIRQ31.cpp @@ -6,7 +6,7 @@ #define ABS_ERROR_Q31 ((q31_t)2) -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) static __ALIGNED(8) q31_t coeffArray[32]; #endif @@ -32,7 +32,7 @@ void checkInnerTail(q31_t *b) q31_t *outp = output.ptr(); unsigned long i; -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) int j; #endif int blockSize; @@ -51,7 +51,7 @@ void checkInnerTail(q31_t *b) blockSize = configp[0]; numTaps = configp[1]; -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) /* Copy coefficients and pad to zero */ memset(coeffArray,0,32); diff --git a/Testing/Source/Tests/FIRQ7.cpp b/Testing/Source/Tests/FIRQ7.cpp index 64ffda46..8516457b 100644 --- a/Testing/Source/Tests/FIRQ7.cpp +++ b/Testing/Source/Tests/FIRQ7.cpp @@ -6,7 +6,7 @@ #define ABS_ERROR_Q7 ((q7_t)2) -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) static __ALIGNED(8) q7_t coeffArray[32]; #endif @@ -31,7 +31,7 @@ void checkInnerTail(q7_t *b) q7_t *outp = output.ptr(); unsigned long i; -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) int j; #endif int blockSize; @@ -50,7 +50,7 @@ void checkInnerTail(q7_t *b) blockSize = configp[0]; numTaps = configp[1]; -#if defined(ARM_MATH_MVEI) +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) /* Copy coefficients and pad to zero */ memset(coeffArray,0,32); diff --git a/Testing/TestScripts/Regression/Commands.py b/Testing/TestScripts/Regression/Commands.py index 81af2d1f..2bf64b00 100755 --- a/Testing/TestScripts/Regression/Commands.py +++ b/Testing/TestScripts/Regression/Commands.py @@ -11,7 +11,7 @@ from pathlib import Path import sys DEBUGMODE = False -KEEPBUILDFOLDER = False +KEEPBUILDFOLDER = True DEBUGLIST=[ "-DBASICMATH=ON", @@ -41,9 +41,9 @@ def isDebugMode(): global DEBUGMODE return(DEBUGMODE) -def setKeepBuildFolder(): +def setNokeepBuildFolder(): global KEEPBUILDFOLDER - KEEPBUILDFOLDER=True + KEEPBUILDFOLDER=False def isKeepMode(): global KEEPBUILDFOLDER diff --git a/Testing/TestScripts/doc/Format.py b/Testing/TestScripts/doc/Format.py index 9c76a935..496db684 100755 --- a/Testing/TestScripts/doc/Format.py +++ b/Testing/TestScripts/doc/Format.py @@ -20,7 +20,7 @@ def joinit(iterable, delimiter): # Since some cores may be missing, each atble display # is computing a rstricted ordered core list with only the available cores. CORTEXCATEGORIES=["Cortex-M","Cortex-R","Cortex-A"] -CORECATEGORIES={"Cortex-M":["m0","m4", "m7", "m33" , "m55 scalar", "m55 mve"], +CORECATEGORIES={"Cortex-M":["m0","m4", "m7", "m33" , "m55 scalar", "m55 mve","m55 autovec"], "Cortex-R":["r8","r52"], "Cortex-A":["a32"] } diff --git a/Testing/createDb.sql b/Testing/createDb.sql index 2ef823dc..a1569d8b 100755 --- a/Testing/createDb.sql +++ b/Testing/createDb.sql @@ -121,6 +121,7 @@ INSERT INTO CORE VALUES(21,"r8","R8"); INSERT INTO CORE VALUES(22,"r5","R5"); INSERT INTO CORE VALUES(23,"a32","A32"); INSERT INTO CORE VALUES(24,"r52","R52"); +INSERT INTO CORE VALUES(25,"'m55 autovec","M55AUTOVEC"); .quit diff --git a/Testing/runAllTests.py b/Testing/runAllTests.py index 3e8e6818..65baa714 100755 --- a/Testing/runAllTests.py +++ b/Testing/runAllTests.py @@ -131,15 +131,15 @@ parser.add_argument('-regdb', nargs='?',type = str,help="Regression database") parser.add_argument('-sqlite', nargs='?',default="/usr/bin/sqlite3",type = str,help="sqlite executable") parser.add_argument('-debug', action='store_true', help="Debug mode") -parser.add_argument('-keep', action='store_true', help="Keep build folder") +parser.add_argument('-nokeep', action='store_true', help="Do not keep build folder") args = parser.parse_args() if args.debug: setDebugMode() -if args.keep: - setKeepBuildFolder() +if args.nokeep: + setNokeepBuildFolder() # Create missing database files # if the db arguments are specified