CMSIS-DSP: Improvement to f16 helium code for linear SVM.

Improved tests for weighted sum.
pull/19/head
Christophe Favergeon 5 years ago
parent 29a69f56d0
commit 4b4d5322c8

@ -146,10 +146,12 @@ void arm_svm_linear_predict_f16(
/*
* Sum the partial parts
*/
sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc0);
sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc1);
sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc2);
sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc3);
acc0 = vmulq_n_f16(acc0,*pDualCoef++);
acc0 = vfmaq_n_f16(acc0,acc1,*pDualCoef++);
acc0 = vfmaq_n_f16(acc0,acc2,*pDualCoef++);
acc0 = vfmaq_n_f16(acc0,acc3,*pDualCoef++);
sum += vecAddAcrossF16Mve(acc0);
pSrcA += numCols * 4;
/*
@ -216,8 +218,10 @@ void arm_svm_linear_predict_f16(
/*
* Sum the partial parts
*/
sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc0);
sum += (_Float16)*pDualCoef++ * vecAddAcrossF16Mve(acc1);
acc0 = vmulq_n_f16(acc0,*pDualCoef++);
acc0 = vfmaq_n_f16(acc0,acc1,*pDualCoef++);
sum += vecAddAcrossF16Mve(acc0);
pSrcA += numCols * 2;
row -= 2;

@ -142,10 +142,13 @@ void arm_svm_linear_predict_f32(
/*
* Sum the partial parts
*/
sum += *pDualCoef++ * vecAddAcrossF32Mve(acc0);
sum += *pDualCoef++ * vecAddAcrossF32Mve(acc1);
sum += *pDualCoef++ * vecAddAcrossF32Mve(acc2);
sum += *pDualCoef++ * vecAddAcrossF32Mve(acc3);
acc0 = vmulq_n_f32(acc0,*pDualCoef++);
acc0 = vfmaq_n_f32(acc0,acc1,*pDualCoef++);
acc0 = vfmaq_n_f32(acc0,acc2,*pDualCoef++);
acc0 = vfmaq_n_f32(acc0,acc3,*pDualCoef++);
sum += vecAddAcrossF32Mve(acc0);
pSrcA += numCols * 4;
/*
@ -212,8 +215,11 @@ void arm_svm_linear_predict_f32(
/*
* Sum the partial parts
*/
sum += *pDualCoef++ * vecAddAcrossF32Mve(acc0);
sum += *pDualCoef++ * vecAddAcrossF32Mve(acc1);
acc0 = vmulq_n_f32(acc0,*pDualCoef++);
acc0 = vfmaq_n_f32(acc0,acc1,*pDualCoef++);
sum += vecAddAcrossF32Mve(acc0);
pSrcA += numCols * 2;
row -= 2;

@ -403,16 +403,18 @@ endif()
disableOptimization(TestingLib)
disableOptimization(FrameworkLib)
## Only build f16 version when running float16tests
if ((NOT ARMAC5) AND (FLOAT16TESTS) AND ((FLOAT16) OR (MVEF) OR (HELIUM) OR (NEON) OR (NEONEXPERIMENTAL)))
target_sources(TestingLib PRIVATE ${TESTSRC16})
else()
target_sources(TestingLib PRIVATE ${TESTSRC})
endif()
if(NN)
target_sources(TestingLib PRIVATE ${NNSRC})
endif()
target_sources(TestingLib PRIVATE ${TESTSRC16})
target_sources(TestingLib PRIVATE testmain.cpp)
target_sources(TestingLib PRIVATE GeneratedSource/TestDesc.cpp)

@ -8,7 +8,7 @@
#define REL_ERROR (1.0e-5)
#define ABS_WEIGHTEDSUM_ERROR (5.0e-2)
#define REL_WEIGHTEDSUM_ERROR (1.0e-2)
#define REL_WEIGHTEDSUM_ERROR (5.0e-2)
#define ABS_ERROR_F32 (1.0e-3)
#define REL_ERROR_F32 (1.0e-3)

Loading…
Cancel
Save