CMSIS-DSP: Improved some f16 helium codes.

pull/19/head
Christophe Favergeon 5 years ago
parent 8ba4243dfa
commit 29a69f56d0

@ -72,10 +72,12 @@ arm_status arm_mat_scale_f16(
float16_t *pOut = pDst->pData; /* output data matrix pointer */
uint32_t numSamples; /* total number of elements in the matrix */
uint32_t blkCnt; /* loop counters */
f16x8_t vecIn, vecOut;
f16x8_t vecIn, vecOut, vecScale;
float16_t const *pInVec;
pInVec = (float16_t const *) pIn;
vecScale = vdupq_n_f16(scale);
/*
* Total number of samples in the input matrix
*/
@ -90,7 +92,7 @@ arm_status arm_mat_scale_f16(
vecIn = vld1q(pInVec);
pInVec += 8;
vecOut = vmulq(vecIn, scale);
vecOut = vmulq_f16(vecIn, vecScale);
vst1q(pOut, vecOut);
pOut += 8;

@ -79,7 +79,7 @@ float16_t arm_weighted_sum_f16(const float16_t *in,const float16_t *weigths, uin
accum1V = vdupq_n_f16(0.0f16);
accum2V = vdupq_n_f16(0.0f16);
blkCnt = blockSize >> 2;
blkCnt = blockSize >> 3;
while (blkCnt > 0)
{
inV = vld1q(pIn);
@ -96,7 +96,7 @@ float16_t arm_weighted_sum_f16(const float16_t *in,const float16_t *weigths, uin
accum1 = vecAddAcrossF16Mve(accum1V);
accum2 = vecAddAcrossF16Mve(accum2V);
blkCnt = blockSize & 3;
blkCnt = blockSize & 7;
while(blkCnt > 0)
{
accum1 += (_Float16)*pIn++ * (_Float16)*pW;

@ -7,9 +7,12 @@
#define SNR_THRESHOLD 120
#define REL_ERROR (1.0e-5)
#define REL_WEIGHTEDSUM_ERROR (2.0e-2)
#define ABS_WEIGHTEDSUM_ERROR (5.0e-2)
#define REL_WEIGHTEDSUM_ERROR (1.0e-2)
#define ABS_ERROR_F32 (1.0e-3)
#define REL_ERROR_F32 (1.0e-3)
#define ABS_Q15_ERROR ((q15_t)10)
#define ABS_Q31_ERROR ((q31_t)80)
#define ABS_Q7_ERROR ((q7_t)10)
@ -26,8 +29,7 @@ void SupportTestsF16::test_weighted_sum_f16()
*outp=arm_weighted_sum_f16(inp, coefsp,this->nbSamples);
ASSERT_REL_ERROR(*outp,refp[this->offset],REL_WEIGHTEDSUM_ERROR);
ASSERT_CLOSE_ERROR(*outp,refp[this->offset],ABS_WEIGHTEDSUM_ERROR,REL_WEIGHTEDSUM_ERROR);
ASSERT_EMPTY_TAIL(output);
}
@ -130,7 +132,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>&
{
case TEST_WEIGHTED_SUM_F16_1:
this->nbSamples = 3;
this->nbSamples = 7;
input.reload(SupportTestsF16::INPUTS_F16_ID,mgr,this->nbSamples);
coefs.reload(SupportTestsF16::WEIGHTS_F16_ID,mgr,this->nbSamples);
ref.reload(SupportTestsF16::REF_F16_ID,mgr);
@ -141,7 +143,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>&
break;
case TEST_WEIGHTED_SUM_F16_2:
this->nbSamples = 8;
this->nbSamples = 16;
input.reload(SupportTestsF16::INPUTS_F16_ID,mgr,this->nbSamples);
coefs.reload(SupportTestsF16::WEIGHTS_F16_ID,mgr,this->nbSamples);
ref.reload(SupportTestsF16::REF_F16_ID,mgr);
@ -152,7 +154,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>&
break;
case TEST_WEIGHTED_SUM_F16_3:
this->nbSamples = 11;
this->nbSamples = 23;
input.reload(SupportTestsF16::INPUTS_F16_ID,mgr,this->nbSamples);
coefs.reload(SupportTestsF16::WEIGHTS_F16_ID,mgr,this->nbSamples);
ref.reload(SupportTestsF16::REF_F16_ID,mgr);
@ -280,7 +282,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>&
break;
case TEST_F32_F16_19:
this->nbSamples = 3;
this->nbSamples = 7;
inputF32.reload(SupportTestsF16::SAMPLES_F32_ID,mgr,this->nbSamples);
ref.reload(SupportTestsF16::SAMPLES_F16_ID,mgr,this->nbSamples);
output.create(this->nbSamples,SupportTestsF16::OUT_F16_ID,mgr);
@ -288,7 +290,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>&
break;
case TEST_F32_F16_20:
this->nbSamples = 8;
this->nbSamples = 16;
inputF32.reload(SupportTestsF16::SAMPLES_F32_ID,mgr,this->nbSamples);
ref.reload(SupportTestsF16::SAMPLES_F16_ID,mgr,this->nbSamples);
output.create(this->nbSamples,SupportTestsF16::OUT_F16_ID,mgr);
@ -296,7 +298,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>&
break;
case TEST_F32_F16_21:
this->nbSamples = 11;
this->nbSamples = 23;
inputF32.reload(SupportTestsF16::SAMPLES_F32_ID,mgr,this->nbSamples);
ref.reload(SupportTestsF16::SAMPLES_F16_ID,mgr,this->nbSamples);
output.create(this->nbSamples,SupportTestsF16::OUT_F16_ID,mgr);

Loading…
Cancel
Save