diff --git a/Source/MatrixFunctions/arm_mat_scale_f16.c b/Source/MatrixFunctions/arm_mat_scale_f16.c index 4a8de171..73b94be1 100755 --- a/Source/MatrixFunctions/arm_mat_scale_f16.c +++ b/Source/MatrixFunctions/arm_mat_scale_f16.c @@ -72,10 +72,12 @@ arm_status arm_mat_scale_f16( float16_t *pOut = pDst->pData; /* output data matrix pointer */ uint32_t numSamples; /* total number of elements in the matrix */ uint32_t blkCnt; /* loop counters */ - f16x8_t vecIn, vecOut; + f16x8_t vecIn, vecOut, vecScale; float16_t const *pInVec; pInVec = (float16_t const *) pIn; + + vecScale = vdupq_n_f16(scale); /* * Total number of samples in the input matrix */ @@ -90,7 +92,7 @@ arm_status arm_mat_scale_f16( vecIn = vld1q(pInVec); pInVec += 8; - vecOut = vmulq(vecIn, scale); + vecOut = vmulq_f16(vecIn, vecScale); vst1q(pOut, vecOut); pOut += 8; diff --git a/Source/SupportFunctions/arm_weighted_sum_f16.c b/Source/SupportFunctions/arm_weighted_sum_f16.c index 5de3143f..2a9924b0 100755 --- a/Source/SupportFunctions/arm_weighted_sum_f16.c +++ b/Source/SupportFunctions/arm_weighted_sum_f16.c @@ -79,7 +79,7 @@ float16_t arm_weighted_sum_f16(const float16_t *in,const float16_t *weigths, uin accum1V = vdupq_n_f16(0.0f16); accum2V = vdupq_n_f16(0.0f16); - blkCnt = blockSize >> 2; + blkCnt = blockSize >> 3; while (blkCnt > 0) { inV = vld1q(pIn); @@ -96,7 +96,7 @@ float16_t arm_weighted_sum_f16(const float16_t *in,const float16_t *weigths, uin accum1 = vecAddAcrossF16Mve(accum1V); accum2 = vecAddAcrossF16Mve(accum2V); - blkCnt = blockSize & 3; + blkCnt = blockSize & 7; while(blkCnt > 0) { accum1 += (_Float16)*pIn++ * (_Float16)*pW; diff --git a/Testing/Source/Tests/SupportTestsF16.cpp b/Testing/Source/Tests/SupportTestsF16.cpp index 654c1b90..e644ab72 100755 --- a/Testing/Source/Tests/SupportTestsF16.cpp +++ b/Testing/Source/Tests/SupportTestsF16.cpp @@ -7,9 +7,12 @@ #define SNR_THRESHOLD 120 #define REL_ERROR (1.0e-5) -#define REL_WEIGHTEDSUM_ERROR (2.0e-2) +#define ABS_WEIGHTEDSUM_ERROR (5.0e-2) +#define REL_WEIGHTEDSUM_ERROR (1.0e-2) +#define ABS_ERROR_F32 (1.0e-3) #define REL_ERROR_F32 (1.0e-3) + #define ABS_Q15_ERROR ((q15_t)10) #define ABS_Q31_ERROR ((q31_t)80) #define ABS_Q7_ERROR ((q7_t)10) @@ -26,8 +29,7 @@ void SupportTestsF16::test_weighted_sum_f16() *outp=arm_weighted_sum_f16(inp, coefsp,this->nbSamples); - - ASSERT_REL_ERROR(*outp,refp[this->offset],REL_WEIGHTEDSUM_ERROR); + ASSERT_CLOSE_ERROR(*outp,refp[this->offset],ABS_WEIGHTEDSUM_ERROR,REL_WEIGHTEDSUM_ERROR); ASSERT_EMPTY_TAIL(output); } @@ -130,7 +132,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector& { case TEST_WEIGHTED_SUM_F16_1: - this->nbSamples = 3; + this->nbSamples = 7; input.reload(SupportTestsF16::INPUTS_F16_ID,mgr,this->nbSamples); coefs.reload(SupportTestsF16::WEIGHTS_F16_ID,mgr,this->nbSamples); ref.reload(SupportTestsF16::REF_F16_ID,mgr); @@ -141,7 +143,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector& break; case TEST_WEIGHTED_SUM_F16_2: - this->nbSamples = 8; + this->nbSamples = 16; input.reload(SupportTestsF16::INPUTS_F16_ID,mgr,this->nbSamples); coefs.reload(SupportTestsF16::WEIGHTS_F16_ID,mgr,this->nbSamples); ref.reload(SupportTestsF16::REF_F16_ID,mgr); @@ -152,7 +154,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector& break; case TEST_WEIGHTED_SUM_F16_3: - this->nbSamples = 11; + this->nbSamples = 23; input.reload(SupportTestsF16::INPUTS_F16_ID,mgr,this->nbSamples); coefs.reload(SupportTestsF16::WEIGHTS_F16_ID,mgr,this->nbSamples); ref.reload(SupportTestsF16::REF_F16_ID,mgr); @@ -280,7 +282,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector& break; case TEST_F32_F16_19: - this->nbSamples = 3; + this->nbSamples = 7; inputF32.reload(SupportTestsF16::SAMPLES_F32_ID,mgr,this->nbSamples); ref.reload(SupportTestsF16::SAMPLES_F16_ID,mgr,this->nbSamples); output.create(this->nbSamples,SupportTestsF16::OUT_F16_ID,mgr); @@ -288,7 +290,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector& break; case TEST_F32_F16_20: - this->nbSamples = 8; + this->nbSamples = 16; inputF32.reload(SupportTestsF16::SAMPLES_F32_ID,mgr,this->nbSamples); ref.reload(SupportTestsF16::SAMPLES_F16_ID,mgr,this->nbSamples); output.create(this->nbSamples,SupportTestsF16::OUT_F16_ID,mgr); @@ -296,7 +298,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector& break; case TEST_F32_F16_21: - this->nbSamples = 11; + this->nbSamples = 23; inputF32.reload(SupportTestsF16::SAMPLES_F32_ID,mgr,this->nbSamples); ref.reload(SupportTestsF16::SAMPLES_F16_ID,mgr,this->nbSamples); output.create(this->nbSamples,SupportTestsF16::OUT_F16_ID,mgr);