diff --git a/Source/SVMFunctions/arm_svm_linear_predict_f16.c b/Source/SVMFunctions/arm_svm_linear_predict_f16.c index c4cbac24..79638527 100755 --- a/Source/SVMFunctions/arm_svm_linear_predict_f16.c +++ b/Source/SVMFunctions/arm_svm_linear_predict_f16.c @@ -146,10 +146,12 @@ void arm_svm_linear_predict_f16( /* * Sum the partial parts */ - sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc0); - sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc1); - sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc2); - sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc3); + acc0 = vmulq_n_f16(acc0,*pDualCoef++); + acc0 = vfmaq_n_f16(acc0,acc1,*pDualCoef++); + acc0 = vfmaq_n_f16(acc0,acc2,*pDualCoef++); + acc0 = vfmaq_n_f16(acc0,acc3,*pDualCoef++); + + sum += vecAddAcrossF16Mve(acc0); pSrcA += numCols * 4; /* @@ -216,8 +218,10 @@ void arm_svm_linear_predict_f16( /* * Sum the partial parts */ - sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc0); - sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc1); + acc0 = vmulq_n_f16(acc0,*pDualCoef++); + acc0 = vfmaq_n_f16(acc0,acc1,*pDualCoef++); + + sum += vecAddAcrossF16Mve(acc0); pSrcA += numCols * 2; row -= 2; diff --git a/Source/SVMFunctions/arm_svm_linear_predict_f32.c b/Source/SVMFunctions/arm_svm_linear_predict_f32.c index c45426ab..215a2384 100755 --- a/Source/SVMFunctions/arm_svm_linear_predict_f32.c +++ b/Source/SVMFunctions/arm_svm_linear_predict_f32.c @@ -142,10 +142,13 @@ void arm_svm_linear_predict_f32( /* * Sum the partial parts */ - sum += *pDualCoef++ * vecAddAcrossF32Mve(acc0); - sum += *pDualCoef++ * vecAddAcrossF32Mve(acc1); - sum += *pDualCoef++ * vecAddAcrossF32Mve(acc2); - sum += *pDualCoef++ * vecAddAcrossF32Mve(acc3); + + acc0 = vmulq_n_f32(acc0,*pDualCoef++); + acc0 = vfmaq_n_f32(acc0,acc1,*pDualCoef++); + acc0 = vfmaq_n_f32(acc0,acc2,*pDualCoef++); + acc0 = vfmaq_n_f32(acc0,acc3,*pDualCoef++); + + sum += vecAddAcrossF32Mve(acc0); pSrcA += numCols * 4; /* @@ -212,8 +215,11 @@ void arm_svm_linear_predict_f32( /* * Sum the partial parts */ - sum += *pDualCoef++ * vecAddAcrossF32Mve(acc0); - sum += *pDualCoef++ * vecAddAcrossF32Mve(acc1); + acc0 = vmulq_n_f32(acc0,*pDualCoef++); + acc0 = vfmaq_n_f32(acc0,acc1,*pDualCoef++); + + sum += vecAddAcrossF32Mve(acc0); + pSrcA += numCols * 2; row -= 2; diff --git a/Testing/CMakeLists.txt b/Testing/CMakeLists.txt index 7020095a..f30940b0 100644 --- a/Testing/CMakeLists.txt +++ b/Testing/CMakeLists.txt @@ -403,16 +403,18 @@ endif() disableOptimization(TestingLib) disableOptimization(FrameworkLib) - +## Only build f16 version when running float16tests +if ((NOT ARMAC5) AND (FLOAT16TESTS) AND ((FLOAT16) OR (MVEF) OR (HELIUM) OR (NEON) OR (NEONEXPERIMENTAL))) +target_sources(TestingLib PRIVATE ${TESTSRC16}) +else() target_sources(TestingLib PRIVATE ${TESTSRC}) - +endif() if(NN) target_sources(TestingLib PRIVATE ${NNSRC}) endif() -target_sources(TestingLib PRIVATE ${TESTSRC16}) target_sources(TestingLib PRIVATE testmain.cpp) target_sources(TestingLib PRIVATE GeneratedSource/TestDesc.cpp) diff --git a/Testing/Source/Tests/SupportTestsF16.cpp b/Testing/Source/Tests/SupportTestsF16.cpp index e644ab72..c05b4203 100755 --- a/Testing/Source/Tests/SupportTestsF16.cpp +++ b/Testing/Source/Tests/SupportTestsF16.cpp @@ -8,7 +8,7 @@ #define REL_ERROR (1.0e-5) #define ABS_WEIGHTEDSUM_ERROR (5.0e-2) -#define REL_WEIGHTEDSUM_ERROR (1.0e-2) +#define REL_WEIGHTEDSUM_ERROR (5.0e-2) #define ABS_ERROR_F32 (1.0e-3) #define REL_ERROR_F32 (1.0e-3)