diff --git a/Source/MatrixFunctions/arm_mat_scale_f16.c b/Source/MatrixFunctions/arm_mat_scale_f16.c
index 4a8de171..73b94be1 100755
--- a/Source/MatrixFunctions/arm_mat_scale_f16.c
+++ b/Source/MatrixFunctions/arm_mat_scale_f16.c
@@ -72,10 +72,12 @@ arm_status arm_mat_scale_f16(
     float16_t *pOut = pDst->pData;  /* output data matrix pointer */
     uint32_t  numSamples;           /* total number of elements in the matrix */
     uint32_t  blkCnt;               /* loop counters */
-    f16x8_t vecIn, vecOut;
+    f16x8_t vecIn, vecOut, vecScale;
     float16_t const *pInVec;
 
     pInVec = (float16_t const *) pIn;
+
+    vecScale = vdupq_n_f16(scale);
     /*
      * Total number of samples in the input matrix
      */
@@ -90,7 +92,7 @@ arm_status arm_mat_scale_f16(
         vecIn = vld1q(pInVec); 
         pInVec += 8;
 
-        vecOut = vmulq(vecIn, scale);
+        vecOut = vmulq_f16(vecIn, vecScale);
 
         vst1q(pOut, vecOut); 
         pOut += 8;
diff --git a/Source/SupportFunctions/arm_weighted_sum_f16.c b/Source/SupportFunctions/arm_weighted_sum_f16.c
index 5de3143f..2a9924b0 100755
--- a/Source/SupportFunctions/arm_weighted_sum_f16.c
+++ b/Source/SupportFunctions/arm_weighted_sum_f16.c
@@ -79,7 +79,7 @@ float16_t arm_weighted_sum_f16(const float16_t *in,const float16_t *weigths, uin
     accum1V = vdupq_n_f16(0.0f16);
     accum2V = vdupq_n_f16(0.0f16);
 
-    blkCnt = blockSize >> 2;
+    blkCnt = blockSize >> 3;
     while (blkCnt > 0) 
     {
         inV = vld1q(pIn);
@@ -96,7 +96,7 @@ float16_t arm_weighted_sum_f16(const float16_t *in,const float16_t *weigths, uin
     accum1 = vecAddAcrossF16Mve(accum1V);
     accum2 = vecAddAcrossF16Mve(accum2V);
 
-    blkCnt = blockSize & 3;
+    blkCnt = blockSize & 7;
     while(blkCnt > 0)
     {
         accum1 += (_Float16)*pIn++ * (_Float16)*pW;
diff --git a/Testing/Source/Tests/SupportTestsF16.cpp b/Testing/Source/Tests/SupportTestsF16.cpp
index 654c1b90..e644ab72 100755
--- a/Testing/Source/Tests/SupportTestsF16.cpp
+++ b/Testing/Source/Tests/SupportTestsF16.cpp
@@ -7,9 +7,12 @@
 #define SNR_THRESHOLD 120
 #define REL_ERROR (1.0e-5)
 
-#define REL_WEIGHTEDSUM_ERROR (2.0e-2)
+#define ABS_WEIGHTEDSUM_ERROR (5.0e-2)
+#define REL_WEIGHTEDSUM_ERROR (1.0e-2)
 
+#define ABS_ERROR_F32 (1.0e-3)
 #define REL_ERROR_F32 (1.0e-3)
+
 #define ABS_Q15_ERROR ((q15_t)10)
 #define ABS_Q31_ERROR ((q31_t)80)
 #define ABS_Q7_ERROR ((q7_t)10)
@@ -26,8 +29,7 @@ void SupportTestsF16::test_weighted_sum_f16()
  
  *outp=arm_weighted_sum_f16(inp, coefsp,this->nbSamples);
  
- 
- ASSERT_REL_ERROR(*outp,refp[this->offset],REL_WEIGHTEDSUM_ERROR);
+ ASSERT_CLOSE_ERROR(*outp,refp[this->offset],ABS_WEIGHTEDSUM_ERROR,REL_WEIGHTEDSUM_ERROR);
  ASSERT_EMPTY_TAIL(output);
 
 } 
@@ -130,7 +132,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>&
   {    
 
     case TEST_WEIGHTED_SUM_F16_1:
-    this->nbSamples = 3;
+    this->nbSamples = 7;
     input.reload(SupportTestsF16::INPUTS_F16_ID,mgr,this->nbSamples);
     coefs.reload(SupportTestsF16::WEIGHTS_F16_ID,mgr,this->nbSamples);
     ref.reload(SupportTestsF16::REF_F16_ID,mgr);
@@ -141,7 +143,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>&
     break;
 
     case TEST_WEIGHTED_SUM_F16_2:
-    this->nbSamples = 8;
+    this->nbSamples = 16;
     input.reload(SupportTestsF16::INPUTS_F16_ID,mgr,this->nbSamples);
     coefs.reload(SupportTestsF16::WEIGHTS_F16_ID,mgr,this->nbSamples);
     ref.reload(SupportTestsF16::REF_F16_ID,mgr);
@@ -152,7 +154,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>&
     break;
 
     case TEST_WEIGHTED_SUM_F16_3:
-    this->nbSamples = 11;
+    this->nbSamples = 23;
     input.reload(SupportTestsF16::INPUTS_F16_ID,mgr,this->nbSamples);
     coefs.reload(SupportTestsF16::WEIGHTS_F16_ID,mgr,this->nbSamples);
     ref.reload(SupportTestsF16::REF_F16_ID,mgr);
@@ -280,7 +282,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>&
     break;
 
     case TEST_F32_F16_19:
-    this->nbSamples = 3;
+    this->nbSamples = 7;
     inputF32.reload(SupportTestsF16::SAMPLES_F32_ID,mgr,this->nbSamples);
     ref.reload(SupportTestsF16::SAMPLES_F16_ID,mgr,this->nbSamples);
     output.create(this->nbSamples,SupportTestsF16::OUT_F16_ID,mgr);
@@ -288,7 +290,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>&
     break;
 
     case TEST_F32_F16_20:
-    this->nbSamples = 8;
+    this->nbSamples = 16;
     inputF32.reload(SupportTestsF16::SAMPLES_F32_ID,mgr,this->nbSamples);
     ref.reload(SupportTestsF16::SAMPLES_F16_ID,mgr,this->nbSamples);
     output.create(this->nbSamples,SupportTestsF16::OUT_F16_ID,mgr);
@@ -296,7 +298,7 @@ void SupportTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>&
     break;
 
     case TEST_F32_F16_21:
-    this->nbSamples = 11;
+    this->nbSamples = 23;
     inputF32.reload(SupportTestsF16::SAMPLES_F32_ID,mgr,this->nbSamples);
     ref.reload(SupportTestsF16::SAMPLES_F16_ID,mgr,this->nbSamples);
     output.create(this->nbSamples,SupportTestsF16::OUT_F16_ID,mgr);