diff --git a/Include/arm_math.h b/Include/arm_math.h index afa8619a..91575d5b 100644 --- a/Include/arm_math.h +++ b/Include/arm_math.h @@ -143,7 +143,13 @@ * * - ARM_MATH_HELIUM: * - * It implies the flags ARM_MATH_MVEF and ARM_MATH_MVEI and ARM_MATH_FLOAT16. + * It implies the flags ARM_MATH_MVEF and ARM_MATH_MVEI and ARM_MATH_MVE_FLOAT16. + * + * - ARM_MATH_HELIUM_EXPERIMENTAL: + * + * Only taken into account when ARM_MATH_MVEF, ARM_MATH_MVEI or ARM_MATH_MVE_FLOAT16 are defined. + * Enable some vector versions which may have worse performance than scalar + * depending on the core / compiler configuration. * * - ARM_MATH_MVEF: * diff --git a/Source/CMakeLists.txt b/Source/CMakeLists.txt index f20d394b..285801dc 100755 --- a/Source/CMakeLists.txt +++ b/Source/CMakeLists.txt @@ -14,6 +14,7 @@ include(configLib) option(NEON "Neon acceleration" OFF) option(NEONEXPERIMENTAL "Neon experimental acceleration" OFF) +option(HELIUMEXPERIMENTAL "Helium experimental acceleration" OFF) option(LOOPUNROLL "Loop unrolling" ON) option(ROUNDING "Rounding" OFF) option(MATRIXCHECK "Matrix Checks" OFF) diff --git a/Source/FilteringFunctions/arm_biquad_cascade_df2T_f16.c b/Source/FilteringFunctions/arm_biquad_cascade_df2T_f16.c index 4c6e5bde..05ebd98a 100755 --- a/Source/FilteringFunctions/arm_biquad_cascade_df2T_f16.c +++ b/Source/FilteringFunctions/arm_biquad_cascade_df2T_f16.c @@ -47,7 +47,7 @@ @return none */ -#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) +#if (defined(ARM_MATH_MVE_FLOAT16) && defined(ARM_MATH_HELIUM_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) void arm_biquad_cascade_df2T_f16( const arm_biquad_cascade_df2T_instance_f16 * S, const float16_t * pSrc, diff --git a/Source/FilteringFunctions/arm_biquad_cascade_df2T_f32.c b/Source/FilteringFunctions/arm_biquad_cascade_df2T_f32.c index a0d29951..61c57aeb 100644 --- a/Source/FilteringFunctions/arm_biquad_cascade_df2T_f32.c +++ b/Source/FilteringFunctions/arm_biquad_cascade_df2T_f32.c @@ -45,7 +45,7 @@ @param[in] blockSize number of samples to process @return none */ -#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) +#if (defined(ARM_MATH_MVEF) && defined(ARM_MATH_HELIUM_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" void arm_biquad_cascade_df2T_f32( diff --git a/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c b/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c index 0c3da123..2a4c5047 100755 --- a/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c +++ b/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c @@ -51,7 +51,7 @@ #pragma GCC warning "Scalar version of arm_biquad_cascade_stereo_df2T_f16 built. Helium version has build issues with gcc." #endif -#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && !defined(__CMSIS_GCC_H) +#if (defined(ARM_MATH_MVE_FLOAT16) && defined(ARM_MATH_HELIUM_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) && !defined(__CMSIS_GCC_H) void arm_biquad_cascade_stereo_df2T_f16( const arm_biquad_cascade_stereo_df2T_instance_f16 * S, const float16_t * pSrc, diff --git a/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f32.c b/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f32.c index 2dd5f7d5..1c7373fb 100644 --- a/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f32.c +++ b/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f32.c @@ -45,7 +45,7 @@ @param[in] blockSize number of samples to process @return none */ -#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) +#if (defined(ARM_MATH_MVEF) && defined(ARM_MATH_HELIUM_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" void arm_biquad_cascade_stereo_df2T_f32( diff --git a/cmsisdspconfig.py b/cmsisdspconfig.py index 71c21c23..3d3512da 100755 --- a/cmsisdspconfig.py +++ b/cmsisdspconfig.py @@ -27,6 +27,7 @@ config["MVEI"]=False config["MVEF"]=False config["NEON"]=False config["HELIUM"]=False +config["HELIUMEXPERIMENTAL"]=False config["Float16"]=True config["HOST"]=False @@ -365,6 +366,9 @@ def interpretCmakeOptions(cmake): if test(cmake,"MVEF"): r.append("-DARM_MATH_MVEF") + if test(cmake,"HELIUMEXPERIMENTAL"): + r.append("-DARM_MATH_HELIUM_EXPERIMENTAL") + if test(cmake,"HELIUM") or test(cmake,"MVEF") or test(cmake,"MVEI"): r.append("-IPrivateInclude") @@ -508,6 +512,7 @@ if not forHost: check(config,"ROUNDING") check(config,"MATRIXCHECK") + st.sidebar.header('Vector extensions') st.sidebar.info("Enable vector code. It is not automatic for Neon. Use of Helium will enable new options to select some interpolation tables.") archi=st.sidebar.selectbox("Vector",('None','Helium','Neon')) if archi == 'Neon': @@ -515,6 +520,8 @@ if not forHost: if archi == 'Helium': multiselect(config,"MVE configuration",["MVEI","MVEF"]) HELIUM=True + st.sidebar.info("When checked some experimental versions will be enabled and may be less performant than scalar version depending on the architecture.") + check(config,"HELIUMEXPERIMENTAL") if archi != 'None': st.sidebar.info("When autovectorization is on, pure C code will be compiled. The version with C intrinsics won't be compiled.") check(config,"AUTOVECTORIZE") diff --git a/configCore.cmake b/configCore.cmake index 28bdc811..4b4ff886 100644 --- a/configCore.cmake +++ b/configCore.cmake @@ -280,6 +280,10 @@ function(configcore PROJECTNAME ROOT) target_compile_definitions(${PROJECTNAME} PRIVATE ARM_MATH_NEON_EXPERIMENTAL) endif() + if (HELIUMEXPERIMENTAL) + target_compile_definitions(${PROJECTNAME} PRIVATE ARM_MATH_HELIUM_EXPERIMENTAL) + endif() + if (HELIUM AND CORTEXM) target_compile_definitions(${PROJECTNAME} PRIVATE ARM_MATH_HELIUM) endif()