From 9a254bc9269bd2b2cee594c691d94d66b5e3656e Mon Sep 17 00:00:00 2001 From: Christophe Favergeon Date: Wed, 20 Jan 2021 13:26:22 +0100 Subject: [PATCH] CMSIS-DSP: Improvements for building with gcc on M55. Solve most of f16 issues. But there are still some remaining build issues with gcc10q4. 2 functions are reverting to scalar version when build with gcc on M55. (Since Helium versions of those functions are not building). --- Include/arm_helium_utils.h | 4 +-- Include/arm_math.h | 8 ++++++ Include/arm_math_types.h | 6 ---- PythonWrapper/setup.py | 15 ++++------ Source/BasicMathFunctions/arm_mult_f16.c | 3 +- .../arm_biquad_cascade_stereo_df2T_f16.c | 7 ++++- .../MatrixFunctions/arm_mat_cmplx_mult_f16.c | 11 +++++--- Source/MatrixFunctions/arm_mat_mult_f16.c | 26 ++++++++--------- Source/TransformFunctions/arm_cfft_init_q15.c | 18 ++++++------ Testing/PatternGeneration/Tools.py | 2 +- Toolchain/GCC.cmake | 1 + gcc.cmake | 28 +++++++++++-------- 12 files changed, 72 insertions(+), 57 deletions(-) diff --git a/Include/arm_helium_utils.h b/Include/arm_helium_utils.h index df497345..93645a3c 100755 --- a/Include/arm_helium_utils.h +++ b/Include/arm_helium_utils.h @@ -122,7 +122,7 @@ __STATIC_FORCEINLINE float16x8_t __mve_cmplx_sum_intra_vec_f16( * re0+re1 | im0+im1 | re0+re1 | im0+im1 * re2+re3 | im2+im3 | re2+re3 | im2+im3 */ - vecTmp = vaddq(vecTmp, vecIn); + vecTmp = vaddq_f16(vecTmp, vecIn); vecOut = vecTmp; /* * shift left, random tmp insertion in bottom @@ -133,7 +133,7 @@ __STATIC_FORCEINLINE float16x8_t __mve_cmplx_sum_intra_vec_f16( * DONTCARE | DONTCARE | re0+re1+re0+re1 |im0+im1+im0+im1 * re0+re1+re2+re3 | im0+im1+im2+im3 | re2+re3+re2+re3 |im2+im3+im2+im3 */ - vecOut = vaddq(vecOut, vecTmp); + vecOut = vaddq_f16(vecOut, vecTmp); /* * Cmplx sum is in 4rd & 5th f16 elt * return full vector diff --git a/Include/arm_math.h b/Include/arm_math.h index 404ee91c..98bc9f91 100644 --- a/Include/arm_math.h +++ b/Include/arm_math.h @@ -49,6 +49,14 @@ * The library has generally separate functions for operating on 8-bit integers, 16-bit integers, * 32-bit integer and 32-bit floating-point values. * + * The library is providing vectorized versions of most algorthms for Helium + * and of most f32 algorithms for Neon. + * + * When using a vectorized version, provide a little bit of padding after the end of + * a buffer (3 words) because the vectorized code may read a little bit after the end + * of a buffer. You don't have to modify your buffers but just ensure that the + * end of buffer + padding is not outside of a memory region. + * * \section using Using the Library * * The library installer contains prebuilt versions of the libraries in the Lib folder. diff --git a/Include/arm_math_types.h b/Include/arm_math_types.h index 7c87b36f..01e18a75 100755 --- a/Include/arm_math_types.h +++ b/Include/arm_math_types.h @@ -110,10 +110,7 @@ extern "C" #define ARM_MATH_MVEF #endif #if !defined(ARM_MATH_MVE_FLOAT16) - /* HW Float16 not yet well supported on gcc for M55 */ - #if !defined(__CMSIS_GCC_H) #define ARM_MATH_MVE_FLOAT16 - #endif #endif #endif @@ -130,10 +127,7 @@ extern "C" #endif #if !defined(ARM_MATH_MVE_FLOAT16) - /* HW Float16 not yet well supported on gcc for M55 */ - #if !defined(__CMSIS_GCC_H) #define ARM_MATH_MVE_FLOAT16 - #endif #endif #endif diff --git a/PythonWrapper/setup.py b/PythonWrapper/setup.py index defb900c..2d6b8ccf 100644 --- a/PythonWrapper/setup.py +++ b/PythonWrapper/setup.py @@ -11,11 +11,8 @@ includes = [os.path.join(ROOT,"Include"),os.path.join(ROOT,"PrivateInclude"),os. if sys.platform == 'win32': cflags = ["-DWIN",config.cflags,"-DUNALIGNED_SUPPORT_DISABLE"] - # Custom because a customized arm_math.h is required to build on windows - # since the visual compiler and the win platform are - # not supported by default in arm_math.h else: - cflags = ["-Wno-unused-variable","-Wno-implicit-function-declaration",config.cflags,"-D__GNUC_PYTHON__"] + cflags = ["-Wno-attributes","-Wno-unused-function","-Wno-unused-variable","-Wno-implicit-function-declaration",config.cflags,"-D__GNUC_PYTHON__"] transform = glob.glob(os.path.join(ROOT,"Source","TransformFunctions","*.c")) #transform.remove(os.path.join(ROOT,"Source","TransformFunctions","arm_dct4_init_q15.c")) @@ -69,18 +66,18 @@ allsrcs = support + fastmath + filtering + matrix + statistics + complexf + basi allsrcs = allsrcs + controller + transform + modulesrc + common+ interpolation def notf16(number): - if re.match(r'^.*_f16.c$',number): + if re.search(r'f16',number): return(False) - else: - return(True) + if re.search(r'F16',number): + return(False) + return(True) -# If there are too many files, the linker command in failing on Windows. +# If there are too many files, the linker command is failing on Windows. # So f16 functions are removed since they are not currently available in the wrapper. # A next version will have to structure this wrapper more cleanly so that the # build can work even with more functions srcs = list(filter(notf16, allsrcs)) - module1 = Extension(config.extensionName, sources = (srcs ) diff --git a/Source/BasicMathFunctions/arm_mult_f16.c b/Source/BasicMathFunctions/arm_mult_f16.c index e4df864d..cb521804 100755 --- a/Source/BasicMathFunctions/arm_mult_f16.c +++ b/Source/BasicMathFunctions/arm_mult_f16.c @@ -56,7 +56,8 @@ @return none */ -#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" diff --git a/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c b/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c index 11f4e6ef..0c3da123 100755 --- a/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c +++ b/Source/FilteringFunctions/arm_biquad_cascade_stereo_df2T_f16.c @@ -46,7 +46,12 @@ @param[in] blockSize number of samples to process @return none */ -#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(__CMSIS_GCC_H) +#pragma GCC warning "Scalar version of arm_biquad_cascade_stereo_df2T_f16 built. Helium version has build issues with gcc." +#endif + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && !defined(__CMSIS_GCC_H) void arm_biquad_cascade_stereo_df2T_f16( const arm_biquad_cascade_stereo_df2T_instance_f16 * S, const float16_t * pSrc, diff --git a/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c b/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c index 977cba7a..358dde36 100755 --- a/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c +++ b/Source/MatrixFunctions/arm_mat_cmplx_mult_f16.c @@ -50,7 +50,12 @@ - \ref ARM_MATH_SUCCESS : Operation successful - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed */ -#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(__CMSIS_GCC_H) +#pragma GCC warning "Scalar version of arm_mat_cmplx_mult_f16 built. Helium version has build issues with gcc." +#endif + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && !defined(__CMSIS_GCC_H) #include "arm_helium_utils.h" @@ -382,7 +387,7 @@ arm_status arm_mat_cmplx_mult_f16( uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */ uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */ uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */ - uint16_t col, i = 0U, row = numRowsA, colCnt; /* loop counters */ + uint16_t col, i = 0U, row = numRowsA; /* loop counters */ arm_status status; /* status of matrix multiplication */ uint16x8_t vecOffs, vecColBOffs; uint32_t blkCnt,rowCnt; /* loop counters */ @@ -466,7 +471,6 @@ if ((pSrcA->numCols != pSrcB->numRows) || /* * Matrix A columns number of MAC operations are to be performed */ - colCnt = numColsA; float16_t const *pSrcA0Vec, *pSrcA1Vec, *pSrcA2Vec, *pSrcA3Vec; float16_t const *pInA0 = pInA; @@ -612,7 +616,6 @@ if ((pSrcA->numCols != pSrcB->numRows) || /* * Matrix A columns number of MAC operations are to be performed */ - colCnt = numColsA; float16_t const *pSrcA0Vec; float16_t const *pInA0 = pInA; diff --git a/Source/MatrixFunctions/arm_mat_mult_f16.c b/Source/MatrixFunctions/arm_mat_mult_f16.c index e15a6b32..1530e791 100755 --- a/Source/MatrixFunctions/arm_mat_mult_f16.c +++ b/Source/MatrixFunctions/arm_mat_mult_f16.c @@ -87,7 +87,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_f16_2x2_mve( /* * move to 2nd column of matrix A */ - vecOffsA = vaddq(vecOffsA, (uint16_t) 1); + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) 1); /* * load {a01 a01 a11 a11 x x x x} */ @@ -95,7 +95,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_f16_2x2_mve( /* * move to next B row */ - vecOffsB = vaddq(vecOffsB, (uint16_t) 2); + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) 2); /* * load {b10, b11, b10, b11, x x x x } */ @@ -157,7 +157,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_f16_3x3_mve( /* * move to 2nd column of matrix A */ - vecOffsA = vaddq(vecOffsA, (uint16_t) 1); + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) 1); /* * load {a01 a01 a01 a11 a11 a11 a21 a21} */ @@ -165,7 +165,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_f16_3x3_mve( /* * move to next B row */ - vecOffsB = vaddq(vecOffsB, (uint16_t) 3); + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) 3); /* * load {b10, b11, b12, b10, b11, b12, b10, b11} */ @@ -179,7 +179,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_f16_3x3_mve( /* * move to 3rd column of matrix A */ - vecOffsA = vaddq(vecOffsA, (uint16_t) 1); + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) 1); /* * load {a02 a02 a02 a12 a12 a12 a22 a22} */ @@ -187,7 +187,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_f16_3x3_mve( /* * move to next B row */ - vecOffsB = vaddq(vecOffsB, (uint16_t) 3); + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) 3); /* * load {b20, b21, b22, b20, b21, b22, b20, b21} */ @@ -253,7 +253,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_f16_4x4_mve( /* * jump 2 x A rows (2nd half of matrix) */ - vecOffsA = vaddq(vecOffsA, (uint16_t) 8); + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) 8); /* * load {a20 a20 a20 a20 a30 a30 a30 a30} */ @@ -274,7 +274,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_f16_4x4_mve( /* * move to next B row */ - vecOffsB = vaddq(vecOffsB, (uint16_t) 4); + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) 4); /* * load {b10, b11, b12, b13, b10, b11, b12, b13} */ @@ -287,7 +287,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_f16_4x4_mve( /* * jump 2 x A rows (2nd half of matrix) */ - vecOffsA = vaddq(vecOffsA, (uint16_t) 8); + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) 8); /* * load {a21 a21 a21 a21 a31 a31 a31 a31} */ @@ -309,7 +309,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_f16_4x4_mve( /* * move to next B row */ - vecOffsB = vaddq(vecOffsB, (uint16_t) 4); + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) 4); /* * load {b20, b21, b22, b23, b20, b21, b22, b23} */ @@ -322,7 +322,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_f16_4x4_mve( /* * jump 2 x A rows */ - vecOffsA = vaddq(vecOffsA, (uint16_t) 8); + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) 8); /* * load {a22 a22 a22 a22 a32 a32 a32 a32} @@ -345,7 +345,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_f16_4x4_mve( /* * move to next B row */ - vecOffsB = vaddq(vecOffsB, (uint16_t) 4); + vecOffsB = vaddq_n_u16(vecOffsB, (uint16_t) 4); /* * load {b30, b31, b32, b33, b30, b31, b32, b33} */ @@ -358,7 +358,7 @@ __STATIC_FORCEINLINE arm_status arm_mat_mult_f16_4x4_mve( /* * jump 2 x A rows */ - vecOffsA = vaddq(vecOffsA, (uint16_t) 8); + vecOffsA = vaddq_n_u16(vecOffsA, (uint16_t) 8); /* * load {a23 a23 a23 a23 a33 a33 a33 a33} */ diff --git a/Source/TransformFunctions/arm_cfft_init_q15.c b/Source/TransformFunctions/arm_cfft_init_q15.c index 7dcce7fa..738cd430 100755 --- a/Source/TransformFunctions/arm_cfft_init_q15.c +++ b/Source/TransformFunctions/arm_cfft_init_q15.c @@ -275,7 +275,7 @@ arm_status arm_cfft_init_q15( /* Initializations of Instance structure depending on the FFT length */ switch (S->fftLen) { -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096)) +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096)) /* Initializations of structure parameters for 4096 point FFT */ case 4096U: /* Initialise the bit reversal table modifier */ @@ -283,7 +283,7 @@ arm_status arm_cfft_init_q15( break; #endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048)) +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048)) /* Initializations of structure parameters for 2048 point FFT */ case 2048U: /* Initialise the bit reversal table modifier */ @@ -292,7 +292,7 @@ arm_status arm_cfft_init_q15( break; #endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024)) +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024)) /* Initializations of structure parameters for 1024 point FFT */ case 1024U: /* Initialise the bit reversal table modifier */ @@ -301,7 +301,7 @@ arm_status arm_cfft_init_q15( break; #endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_512) && defined(ARM_TABLE_BITREVIDX_FXT_512)) +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_512) && defined(ARM_TABLE_BITREVIDX_FXT_512)) /* Initializations of structure parameters for 512 point FFT */ case 512U: /* Initialise the bit reversal table modifier */ @@ -309,31 +309,31 @@ arm_status arm_cfft_init_q15( break; #endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_256) && defined(ARM_TABLE_BITREVIDX_FXT_256)) +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_256) && defined(ARM_TABLE_BITREVIDX_FXT_256)) case 256U: FFTINIT(q15,256); break; #endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_128) && defined(ARM_TABLE_BITREVIDX_FXT_128)) +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_128) && defined(ARM_TABLE_BITREVIDX_FXT_128)) case 128U: FFTINIT(q15,128); break; #endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_64) && defined(ARM_TABLE_BITREVIDX_FXT_64)) +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_64) && defined(ARM_TABLE_BITREVIDX_FXT_64)) case 64U: FFTINIT(q15,64); break; #endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_32) && defined(ARM_TABLE_BITREVIDX_FXT_32)) +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_32) && defined(ARM_TABLE_BITREVIDX_FXT_32)) case 32U: FFTINIT(q15,32); break; #endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_16) && defined(ARM_TABLE_BITREVIDX_FXT_16)) +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_16) && defined(ARM_TABLE_BITREVIDX_FXT_16)) case 16U: /* Initializations of structure parameters for 16 point FFT */ FFTINIT(q15,16); diff --git a/Testing/PatternGeneration/Tools.py b/Testing/PatternGeneration/Tools.py index abda8d41..70e6b2a6 100755 --- a/Testing/PatternGeneration/Tools.py +++ b/Testing/PatternGeneration/Tools.py @@ -3,7 +3,7 @@ import struct import numpy as np def normalize(a): - return(a/max(np.abs(a))) + return(a/np.max(np.abs(a))) TAILONLY = 1 BODYONLY = 2 diff --git a/Toolchain/GCC.cmake b/Toolchain/GCC.cmake index 4e0dcf15..c0e051d4 100644 --- a/Toolchain/GCC.cmake +++ b/Toolchain/GCC.cmake @@ -43,6 +43,7 @@ function(compilerSpecificCompileOptions PROJECTNAME ROOT) # Need to add other gcc config for other cortex-m cores if (ARM_CPU STREQUAL "cortex-m55" ) + target_compile_options(${PROJECTNAME} PUBLIC "-march=armv8.1-m.main+mve.fp+fp.dp") target_compile_options(${PROJECTNAME} PUBLIC "-mfpu=fpv5-d16") target_link_options(${PROJECTNAME} PUBLIC "-mfpu=fpv5-d16") endif() diff --git a/gcc.cmake b/gcc.cmake index 3f7b6e66..9cb2f42b 100644 --- a/gcc.cmake +++ b/gcc.cmake @@ -6,26 +6,25 @@ SET(CMAKE_SYSTEM_PROCESSOR arm) -#SET(tools "C:/PROGRA~2/GNUTOO~1/82018-~1") -#SET(CMAKE_C_COMPILER "${tools}/bin/arm-none-eabi-gcc.exe") -#SET(CMAKE_CXX_COMPILER "${tools}/bin/arm-none-eabi-g++.exe") -#SET(CMAKE_ASM_COMPILER "${tools}/bin/arm-none-eabi-gcc.exe") +#SET(CMAKE_C_COMPILER "${tools}/bin/arm-none-eabi-gcc") +#SET(CMAKE_CXX_COMPILER "${tools}/bin/arm-none-eabi-g++") +#SET(CMAKE_ASM_COMPILER "${tools}/bin/arm-none-eabi-gcc") find_program(CMAKE_C_COMPILER NAMES arm-none-eabi-gcc arm-none-eabi-gcc.exe) find_program(CMAKE_CXX_COMPILER NAMES arm-none-eabi-g++ arm-none-eabi-g++.exe) find_program(CMAKE_ASM_COMPILER NAMES arm-none-eabi-gcc arm-none-eabi-gcc.exe) -#SET(CMAKE_AR "${tools}/bin/arm-none-eabi-gcc-ar.exe") -find_program(CMAKE_AR NAMES arm-none-eabi-gcc-ar arm-none-eabi-gcc-ar.exe) -find_program(CMAKE_CXX_COMPILER_AR NAMES arm-none-eabi-gcc-ar arm-none-eabi-gcc-ar.exe) -find_program(CMAKE_C_COMPILER_AR NAMES arm-none-eabi-gcc-ar arm-none-eabi-gcc-ar.exe) +SET(CMAKE_AR "${tools}/bin/ar") +SET(CMAKE_CXX_COMPILER_AR "${tools}/bin/ar") +SET(CMAKE_C_COMPILER_AR "${tools}/bin/ar") +#find_program(CMAKE_AR NAMES arm-none-eabi-gcc-ar arm-none-eabi-gcc-ar.exe ) +#find_program(CMAKE_CXX_COMPILER_AR NAMES arm-none-eabi-gcc-ar arm-none-eabi-gcc-ar.exe ) +#find_program(CMAKE_C_COMPILER_AR NAMES arm-none-eabi-gcc-ar arm-none-eabi-gcc-ar.exe) -#SET(CMAKE_CXX_COMPILER_AR "${tools}/bin/arm-none-eabi-gcc-ar.exe") -#SET(CMAKE_C_COMPILER_AR "${tools}/bin/arm-none-eabi-gcc-ar.exe") -#SET(CMAKE_LINKER "${tools}/bin/arm-none-eabi-g++.exe") +#SET(CMAKE_LINKER "${tools}/bin/arm-none-eabi-g++") find_program(CMAKE_LINKER NAMES arm-none-eabi-g++ arm-none-eabi-g++.exe) SET(CMAKE_C_LINK_EXECUTABLE " -o ") @@ -48,10 +47,17 @@ if(NOT ARM_CPU) ) endif(NOT ARM_CPU) +if (ARM_CPU STREQUAL "cortex-m55") +SET(CMAKE_C_FLAGS "-ffunction-sections -fdata-sections -march=armv8.1-m.main+mve.fp+fp.dp" CACHE INTERNAL "C compiler common flags") +SET(CMAKE_CXX_FLAGS "-ffunction-sections -fdata-sections -march=armv8.1-m.main+mve.fp+fp.dp" CACHE INTERNAL "C compiler common flags") +SET(CMAKE_ASM_FLAGS "-march=armv8.1-m.main+mve.fp+fp.dp" CACHE INTERNAL "ASM compiler common flags") +SET(CMAKE_EXE_LINKER_FLAGS "-fno-use-linker-plugin -march=armv8.1-m.main+mve.fp+fp.dp" CACHE INTERNAL "linker flags") +else() SET(CMAKE_C_FLAGS "-ffunction-sections -fdata-sections -mcpu=${ARM_CPU}" CACHE INTERNAL "C compiler common flags") SET(CMAKE_CXX_FLAGS "-ffunction-sections -fdata-sections -mcpu=${ARM_CPU}" CACHE INTERNAL "C compiler common flags") SET(CMAKE_ASM_FLAGS "-mcpu=${ARM_CPU}" CACHE INTERNAL "ASM compiler common flags") SET(CMAKE_EXE_LINKER_FLAGS "-mcpu=${ARM_CPU}" CACHE INTERNAL "linker flags") +endif() get_property(IS_IN_TRY_COMPILE GLOBAL PROPERTY IN_TRY_COMPILE) if(IS_IN_TRY_COMPILE)