CMSIS-DSP: Mean square error for q15, q31, f16, f32, f64.

Reworked q7 to have a bit more accuracy.
pull/19/head
Christophe Favergeon 4 years ago
parent 47a987217f
commit 8dcdb350a4

@ -910,6 +910,66 @@ void arm_mse_q7(
uint32_t blockSize,
q7_t * pResult);
/**
@brief Mean square error between two Q15 vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
void arm_mse_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
uint32_t blockSize,
q15_t * pResult);
/**
@brief Mean square error between two Q31 vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
void arm_mse_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
uint32_t blockSize,
q31_t * pResult);
/**
@brief Mean square error between two single precision float vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
void arm_mse_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t blockSize,
float32_t * pResult);
/**
@brief Mean square error between two double precision float vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
void arm_mse_f64(
const float64_t * pSrcA,
const float64_t * pSrcB,
uint32_t blockSize,
float64_t * pResult);
#ifdef __cplusplus
}
#endif

@ -243,6 +243,21 @@ float16_t arm_kullback_leibler_f16(const float16_t * pSrcA
uint32_t blockSize,
float16_t *pResult);
/**
@brief Mean square error between two half precision float vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
void arm_mse_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t blockSize,
float16_t * pResult);
#endif /*defined(ARM_FLOAT16_SUPPORTED)*/
#ifdef __cplusplus
}

@ -81,6 +81,11 @@ target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_f64.c)
configLib(CMSISDSPStatistics ${ROOT})
configDsp(CMSISDSPStatistics ${ROOT})

@ -94,3 +94,7 @@
#include "arm_absmin_no_idx_q31.c"
#include "arm_absmin_no_idx_q7.c"
#include "arm_mse_q7.c"
#include "arm_mse_q15.c"
#include "arm_mse_q31.c"
#include "arm_mse_f32.c"
#include "arm_mse_f64.c"

@ -43,3 +43,4 @@
#include "arm_absmin_f16.c"
#include "arm_absmax_no_idx_f16.c"
#include "arm_absmin_no_idx_f16.c"
#include "arm_mse_f16.c"

@ -0,0 +1,203 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mse_f16.c
* Description: Half floating point mean square error
*
* $Date: 05 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
/**
@ingroup groupStats
*/
/**
@addtogroup MSE
@{
*/
/**
@brief Mean square error between two half floating point vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] result mean square error
@return none
*/
#if !defined(ARM_MATH_AUTOVECTORIZE)
#if defined(ARM_MATH_MVE_FLOAT16)
#include "arm_helium_utils.h"
void arm_mse_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t blockSize,
float16_t * result)
{
float16x8_t vecA, vecB;
float16x8_t vecSum;
uint32_t blkCnt;
_Float16 sum = 0.0f16;
vecSum = vdupq_n_f16(0.0f16);
blkCnt = (blockSize) >> 3;
while (blkCnt > 0U)
{
vecA = vld1q(pSrcA);
pSrcA += 8;
vecB = vld1q(pSrcB);
pSrcB += 8;
vecA = vsubq(vecA, vecB);
vecSum = vfmaq(vecSum, vecA, vecA);
/*
* Decrement the blockSize loop counter
*/
blkCnt --;
}
blkCnt = (blockSize) & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vecA = vsubq(vecA, vecB);
vecSum = vfmaq_m(vecSum, vecA, vecA, p0);
}
sum = vecAddAcrossF16Mve(vecSum);
/* Store result in destination buffer */
*result = (_Float16)sum / (_Float16)blockSize;
}
#endif
#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/
#if defined(ARM_FLOAT16_SUPPORTED)
#if (!defined(ARM_MATH_MVE_FLOAT16)) || defined(ARM_MATH_AUTOVECTORIZE)
void arm_mse_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t blockSize,
float16_t * result)
{
uint32_t blkCnt; /* Loop counter */
_Float16 inA, inB;
_Float16 sum = 0.0f16; /* Temporary return variable */
#if defined (ARM_MATH_LOOPUNROLL)
blkCnt = (blockSize) >> 3;
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize) & 7;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer */
*result = (_Float16)sum / (_Float16)blockSize;
}
#endif /* end of test for vector instruction availability */
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
/**
@} end of MSE group
*/

@ -0,0 +1,246 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mse_f32.c
* Description: Floating point mean square error
*
* $Date: 05 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup MSE
@{
*/
/**
@brief Mean square error between two floating point vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] result mean square error
@return none
*/
#if !defined(ARM_MATH_AUTOVECTORIZE)
#if defined(ARM_MATH_MVEF)
#include "arm_helium_utils.h"
void arm_mse_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t blockSize,
float32_t * result)
{
float32x4_t vecA, vecB;
float32x4_t vecSum;
uint32_t blkCnt;
float32_t sum = 0.0f;
vecSum = vdupq_n_f32(0.0f);
/* Compute 4 outputs at a time */
blkCnt = (blockSize) >> 2;
while (blkCnt > 0U)
{
vecA = vld1q(pSrcA);
pSrcA += 4;
vecB = vld1q(pSrcB);
pSrcB += 4;
vecA = vsubq(vecA, vecB);
vecSum = vfmaq(vecSum, vecA, vecA);
/*
* Decrement the blockSize loop counter
*/
blkCnt --;
}
blkCnt = (blockSize) & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vecA = vsubq(vecA, vecB);
vecSum = vfmaq_m(vecSum, vecA, vecA, p0);
}
sum = vecAddAcrossF32Mve(vecSum);
/* Store result in destination buffer */
*result = sum / blockSize;
}
#endif
#if defined(ARM_MATH_NEON)
void arm_mse_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t blockSize,
float32_t * result)
{
float32x4_t vecA, vecB;
float32x4_t vecSum;
uint32_t blkCnt;
float32_t sum = 0.0f;
vecSum = vdupq_n_f32(0.0f);
#if !defined(__aarch64__)
f32x2_t tmp = vdup_n_f32(0.0f);
#endif
/* Compute 4 outputs at a time */
blkCnt = (blockSize) >> 2;
while (blkCnt > 0U)
{
vecA = vld1q_f32(pSrcA);
pSrcA += 4;
vecB = vld1q_f32(pSrcB);
pSrcB += 4;
vecA = vsubq_f32(vecA, vecB);
vecSum = vfmaq_f32(vecSum, vecA, vecA);
/*
* Decrement the blockSize loop counter
*/
blkCnt --;
}
#if defined(__aarch64__)
sum = vpadds_f32(vpadd_f32(vget_low_f32(vecSum), vget_high_f32(vecSum)));
#else
tmp = vpadd_f32(vget_low_f32(vecSum), vget_high_f32(vecSum));
sum = vget_lane_f32(tmp, 0) + vget_lane_f32(tmp, 1);
#endif
blkCnt = (blockSize) & 3;
while (blkCnt > 0U)
{
/* Calculate dot product and store result in a temporary buffer. */
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer */
*result = sum / blockSize;
}
#endif
#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/
#if (!defined(ARM_MATH_MVEF) && !defined(ARM_MATH_NEON)) || defined(ARM_MATH_AUTOVECTORIZE)
void arm_mse_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t blockSize,
float32_t * result)
{
uint32_t blkCnt; /* Loop counter */
float32_t inA, inB;
float32_t sum = 0.0f; /* Temporary return variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize) >> 2;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize) & 3;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer */
*result = sum / blockSize;
}
#endif /* end of test for vector instruction availability */
/**
@} end of MSE group
*/

@ -0,0 +1,110 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mse_f64.c
* Description: Double floating point mean square error
*
* $Date: 05 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup MSE
@{
*/
/**
@brief Mean square error between two double floating point vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] result mean square error
@return none
*/
void arm_mse_f64(
const float64_t * pSrcA,
const float64_t * pSrcB,
uint32_t blockSize,
float64_t * result)
{
uint32_t blkCnt; /* Loop counter */
float64_t inA, inB;
float64_t sum = 0.0; /* Temporary return variable */
#if defined (ARM_MATH_LOOPUNROLL)
blkCnt = (blockSize) >> 1;
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize) & 1;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer */
*result = sum / blockSize;
}
/**
@} end of MSE group
*/

@ -0,0 +1,175 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mse_q15.c
* Description: Mean square error between two Q15 vectors
*
* $Date: 04 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup MSE
@{
*/
/**
@brief Mean square error between two Q15 vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_mse_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
uint32_t blockSize,
q15_t * pResult)
{
uint32_t blkCnt; /* loop counters */
q15x8_t vecSrcA,vecSrcB;
q63_t sum = 0LL;
blkCnt = blockSize >> 3U;
while (blkCnt > 0U)
{
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vecSrcA = vshrq(vecSrcA,1);
vecSrcB = vshrq(vecSrcB,1);
vecSrcA = vqsubq(vecSrcA,vecSrcB);
/*
* sum lanes
*/
sum = vmlaldavaq(sum, vecSrcA, vecSrcA);
blkCnt--;
pSrcA += 8;
pSrcB += 8;
}
/*
* tail
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vecSrcA = vshrq(vecSrcA,1);
vecSrcB = vshrq(vecSrcB,1);
vecSrcA = vqsubq(vecSrcA,vecSrcB);
sum = vmlaldavaq_p(sum, vecSrcA, vecSrcA, p0);
}
*pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
}
#else
void arm_mse_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
uint32_t blockSize,
q15_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q63_t sum = 0; /* Temporary result storage */
q15_t inA,inB; /* Temporary variable to store input value */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
sum += (q63_t)((q31_t) inA * inA);
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
sum += (q63_t)((q31_t) inA * inA);
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
sum += (q63_t)((q31_t) inA * inA);
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
sum += (q63_t)((q31_t) inA * inA);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
sum += (q63_t)((q31_t) inA * inA);
/* Decrement loop counter */
blkCnt--;
}
/* Store result in q15 format */
*pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of MSE group
*/

@ -0,0 +1,176 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mse_q31.c
* Description: Mean square error between two Q31 vectors
*
* $Date: 04 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup MSE
@{
*/
/**
@brief Mean square error between two Q31 vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_mse_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
uint32_t blockSize,
q31_t * pResult)
{
uint32_t blkCnt; /* loop counters */
q31x4_t vecSrcA,vecSrcB;
q63_t sum = 0LL;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vecSrcA = vshrq(vecSrcA,1);
vecSrcB = vshrq(vecSrcB,1);
vecSrcA = vqsubq(vecSrcA,vecSrcB);
/*
* sum lanes
*/
sum = vrmlaldavhaq(sum, vecSrcA, vecSrcA);
blkCnt--;
pSrcA += 4;
pSrcB += 4;
}
/*
* tail
*/
blkCnt = blockSize & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vecSrcA = vshrq(vecSrcA,1);
vecSrcB = vshrq(vecSrcB,1);
vecSrcA = vqsubq(vecSrcA,vecSrcB);
sum = vrmlaldavhaq_p(sum, vecSrcA, vecSrcA, p0);
}
*pResult = (q31_t) ((sum / blockSize)>>21);
}
#else
void arm_mse_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
uint32_t blockSize,
q31_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q63_t sum = 0; /* Temporary result storage */
q31_t inA32,inB32; /* Temporary variable to store packed input value */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
inA32 = *pSrcA++ >> 1;
inB32 = *pSrcB++ >> 1;
inA32 = __QSUB(inA32, inB32);
sum += ((q63_t) inA32 * inA32) >> 14U;
inA32 = *pSrcA++ >> 1;
inB32 = *pSrcB++ >> 1;
inA32 = __QSUB(inA32, inB32);
sum += ((q63_t) inA32 * inA32) >> 14U;
inA32 = *pSrcA++ >> 1;
inB32 = *pSrcB++ >> 1;
inA32 = __QSUB(inA32, inB32);
sum += ((q63_t) inA32 * inA32) >> 14U;
inA32 = *pSrcA++ >> 1;
inB32 = *pSrcB++ >> 1;
inA32 = __QSUB(inA32, inB32);
sum += ((q63_t) inA32 * inA32) >> 14U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
inA32 = *pSrcA++ >> 1;
inB32 = *pSrcB++ >> 1;
inA32 = __QSUB(inA32, inB32);
sum += ((q63_t) inA32 * inA32) >> 14U;
/* Decrement loop counter */
blkCnt--;
}
/* Store result in q31 format */
*pResult = (q31_t) ((sum / blockSize)>>15);
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of MSE group
*/

@ -33,14 +33,14 @@
*/
/**
@defgroup mse Mean Square Error
@defgroup MSE Mean Square Error
Calculates the mean square error between two vectors.
*/
/**
@addtogroup mse
@addtogroup MSE
@{
*/
@ -48,11 +48,10 @@
@brief Mean square error between two Q7 vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_mse_q7(
const q7_t * pSrcA,
@ -63,14 +62,16 @@ void arm_mse_q7(
uint32_t blkCnt; /* loop counters */
q7x16_t vecSrcA,vecSrcB;
q31_t sum = 0LL;
q7_t inA,inB;
/* Compute 16 outputs at a time */
blkCnt = blockSize >> 4U;
while (blkCnt > 0U)
{
vecSrcA = vldrbq_s8(pSrcA);
vecSrcB = vldrbq_s8(pSrcB);
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vecSrcA = vshrq(vecSrcA,1);
vecSrcB = vshrq(vecSrcB,1);
vecSrcA = vqsubq(vecSrcA,vecSrcB);
/*
@ -87,23 +88,21 @@ void arm_mse_q7(
* tail
*/
blkCnt = blockSize & 0xF;
while (blkCnt > 0U)
if (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and store result in a temporary variable, sum. */
inA = *pSrcA++;
inB = *pSrcB++;
mve_pred16_t p0 = vctp8q(blkCnt);
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
vecSrcA = vshrq(vecSrcA,1);
vecSrcB = vshrq(vecSrcB,1);
sum += ((q15_t) inA * inA);
vecSrcA = vqsubq(vecSrcA,vecSrcB);
/* Decrement loop counter */
blkCnt--;
sum = vmladavaq_p(sum, vecSrcA, vecSrcA, p0);
}
*pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>7, 8);
*pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>5, 8);
}
#else
void arm_mse_q7(
@ -116,10 +115,6 @@ void arm_mse_q7(
q31_t sum = 0; /* Temporary result storage */
q7_t inA,inB; /* Temporary variable to store input value */
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
q31_t inA32,inB32; /* Temporary variable to store packed input value */
q31_t in1, in2; /* Temporary variables to store input value */
#endif
#if defined (ARM_MATH_LOOPUNROLL)
@ -128,42 +123,25 @@ void arm_mse_q7(
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and store result in a temporary variable, sum. */
#if defined (ARM_MATH_DSP)
inA32 = read_q7x4_ia ((q7_t **) &pSrcA);
inB32 = read_q7x4_ia ((q7_t **) &pSrcB);
inA32 = __QSUB8(inA32, inB32);
in1 = __SXTB16(__ROR(inA32, 8));
in2 = __SXTB16(inA32);
/* calculate power and accumulate to accumulator */
sum = __SMLAD(in1, in1, sum);
sum = __SMLAD(in2, in2, sum);
#else
inA = *pSrcA++;
inB = *pSrcB++;
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
sum += ((q15_t) inA * inA);
inA = *pSrcA++;
inB = *pSrcB++;
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
sum += ((q15_t) inA * inA);
inA = *pSrcA++;
inB = *pSrcB++;
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
sum += ((q15_t) inA * inA);
inA = *pSrcA++;
inB = *pSrcB++;
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
sum += ((q15_t) inA * inA);
#endif /* #if defined (ARM_MATH_DSP) */
/* Decrement loop counter */
blkCnt--;
@ -181,11 +159,8 @@ void arm_mse_q7(
while (blkCnt > 0U)
{
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
/* Compute Power and store result in a temporary variable, sum. */
inA = *pSrcA++;
inB = *pSrcB++;
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
sum += ((q15_t) inA * inA);
@ -195,10 +170,10 @@ void arm_mse_q7(
}
/* Store result in q7 format */
*pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>7, 8);;
*pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>5, 8);;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of power group
@} end of MSE group
*/

@ -477,7 +477,8 @@ def writeTests(config,nb,format):
# So new tests have to be added after existing ones
def writeNewsTests(config,nb,format):
NBSAMPLES = 300
#config.setOverwrite(True)
if format==Tools.F16:
config.setOverwrite(True)
data1=np.random.randn(NBSAMPLES)
data1 = Tools.normalize(data1)
@ -491,7 +492,7 @@ def writeNewsTests(config,nb,format):
config.writeInput(2, data2,"InputNew")
nb=generateOperatorTests(config,nb,format,data1,data2,mseTest,"MSEVals")
#config.setOverwrite(False)
config.setOverwrite(False)
def generateBenchmark(config,format):

@ -1,8 +1,8 @@
H
3
// 4
0x0004
// 4
0x0004
// 4
0x0004
// 6
0x0006
// 6
0x0006
// 18
0x0012

@ -1,8 +1,8 @@
H
3
// 0.423138
0x36c5
// 0.423138
0x36c5
// 0.423138
0x36c5
// 0.640755
0x3920
// 0.640755
0x3920
// 0.887109
0x3b19

@ -2,7 +2,7 @@ H
3
// 0
0x0000
// 15
0x000F
// 15
0x000F
// 7
0x0007
// 19
0x0013

@ -1,8 +1,8 @@
H
3
// 0.027578
0x270f
// 0.007974
0x2015
// 0.007974
0x2015
// 0.107198
0x2edc
// 0.021092
0x2566
// 0.002011
0x181e

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,10 +1,10 @@
H
4
// 0.038705
0x28f4
// 0.092517
0x2dec
// 0.106867
0x2ed7
// 0.225679
0x3339
// 0.211855
0x32c8
// 0.182973
0x31db
// 0.268630
0x344c
// 0.234421
0x3380

@ -1,8 +1,8 @@
H
3
// 1
0x0001
// 1
0x0001
// 1
0x0001
// 0
0x0000
// 7
0x0007
// 7
0x0007

@ -1,8 +1,8 @@
W
3
// 0.476185
0x3ef3ce78
// 0.476185
0x3ef3ce78
// 0.476185
0x3ef3ce78
// 0.725166
0x3f39a47a
// 0.817687
0x3f5153ed
// 0.817687
0x3f5153ed

@ -1,8 +1,8 @@
H
3
// 0
0x0000
// 7
0x0007
// 7
0x0007
// 1
0x0001
// 5
0x0005
// 9
0x0009

@ -1,8 +1,8 @@
W
3
// 0.184919
0x3e3d5b69
// 0.008792
0x3c100d1c
// 0.008792
0x3c100d1c
// 0.198876
0x3e4ba63c
// 0.035481
0x3d1154a3
// 0.034200
0x3d0c1510

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,10 +1,10 @@
W
4
// 0.125231
0x3e003c73
// 0.122919
0x3dfbbceb
// 0.145740
0x3e153cd2
// 0.189820
0x3e426031
// 0.072747
0x3d94fc3e
// 0.176808
0x3e350d0d
// 0.207669
0x3e54a726
// 0.183645
0x3e3c0d87

@ -2,7 +2,7 @@ H
3
// 1
0x0001
// 2
0x0002
// 2
0x0002
// 3
0x0003
// 3
0x0003

@ -1,8 +1,8 @@
D
3
// 0.203055
0x3fc9fdb6e0c81ee0
// 0.360222
0x3fd70de0df777efb
// 0.360222
0x3fd70de0df777efb
// 0.579795
0x3fe28dad67519d3d
// 0.783610
0x3fe91356237f16f6
// 0.783610
0x3fe91356237f16f6

@ -4,5 +4,5 @@ H
0x0000
// 0
0x0000
// 0
0x0000
// 4
0x0004

@ -1,8 +1,8 @@
D
3
// 0.003692
0x3f6e3f80ef9e8a83
// 0.003692
0x3f6e3f80ef9e8a83
// 0.003692
0x3f6e3f80ef9e8a83
// 0.310923
0x3fd3e6286ed8195c
// 0.310923
0x3fd3e6286ed8195c
// 0.150640
0x3fc34828d25e0053

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,10 +1,10 @@
D
4
// 0.001072
0x3f518f8a7ed015a2
// 0.073015
0x3fb2b11b5caa023a
// 0.060567
0x3faf02a5beb935ad
// 0.198414
0x3fc9659ffa60ff3b
// 0.221944
0x3fcc68ab519cbb08
// 0.487606
0x3fdf34ef9e2840ea
// 0.411797
0x3fda5ae1181a5066
// 0.186577
0x3fc7e1bdbcffc958

@ -1,8 +1,8 @@
H
3
// 4
0x0004
// 4
0x0004
// 18
0x0012
// 5
0x0005
// 15
0x000F
// 15
0x000F

@ -1,8 +1,8 @@
H
3
// 0.540886
0x453C
// 0.540886
0x453C
// 0.701466
0x59CA
// 0.511444
0x4177
// 0.572485
0x4947
// 0.572485
0x4947

@ -1,8 +1,8 @@
H
3
// 6
0x0006
// 6
0x0006
// 6
0x0006
// 1
0x0001
// 1
0x0001
// 1
0x0001

@ -1,8 +1,8 @@
H
3
// 0.003012
0x0063
// 0.003012
0x0063
// 0.003012
0x0063
// 0.065882
0x086F
// 0.065882
0x086F
// 0.065882
0x086F

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,10 +1,10 @@
H
4
// 0.291384
0x254C
// 0.326840
0x29D6
// 0.266990
0x222D
// 0.278624
0x23AA
// 0.123046
0x0FC0
// 0.134261
0x112F
// 0.135165
0x114D
// 0.237464
0x1E65

@ -1,8 +1,8 @@
H
3
// 1
0x0001
// 3
0x0003
// 8
0x0008
// 2
0x0002
// 7
0x0007
// 7
0x0007

@ -1,8 +1,8 @@
W
3
// 0.352374
0x2D1A96B5
// 0.530170
0x43DC9BE7
// 0.634745
0x513F5458
// 0.254671
0x20990B68
// 0.516980
0x422C699D
// 0.516980
0x422C699D

@ -2,7 +2,7 @@ H
3
// 0
0x0000
// 7
0x0007
// 7
0x0007
// 4
0x0004
// 4
0x0004

@ -1,8 +1,8 @@
W
3
// 0.132805
0x10FFBE95
// 0.003898
0x007FB95F
// 0.003898
0x007FB95F
// 0.053227
0x06D0231F
// 0.003305
0x006C4DD3
// 0.003305
0x006C4DD3

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,10 +1,10 @@
W
4
// 0.066580
0x0885AD96
// 0.089078
0x0B66E9B3
// 0.168307
0x158B15E2
// 0.196400
0x19239FC7
// 0.153783
0x13AF2B40
// 0.209919
0x1ADE9F11
// 0.155268
0x13DFD01C
// 0.248101
0x1FC1C512

@ -1,10 +1,10 @@
H
4
// 7
0x0007
// 7
0x0007
// 46
0x002E
// 1
0x0001
// 25
0x0019
// 25
0x0019
// 279
0x0117

@ -1,10 +1,10 @@
B
4
// 0.807620
// 0.619484
0x4F
// 0.802797
0x67
// 0.807620
// 0.802797
0x67
// 0.984827
0x7E
// 0.900000
0x73

@ -1,10 +1,10 @@
H
4
// 13
0x000D
// 13
0x000D
// 13
0x000D
// 8
0x0008
// 18
0x0012
// 18
0x0012
// 279
0x0117

@ -1,10 +1,10 @@
B
4
// 0.008109
0x01
// 0.008109
0x01
// 0.008109
// 0.008779
0x01
// 0.000193
0x00
// 0.000193
0x00
// 0.000000
0x00

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,10 +1,10 @@
B
4
// 0.191272
0x18
// 0.159547
0x14
// 0.205092
0x1A
// 0.257902
0x21
// 0.092336
0x0C
// 0.121537
0x10
// 0.162974
0x15
// 0.148534
0x13

@ -441,7 +441,28 @@ a double precision computation.
ASSERT_REL_ERROR(ref,output,REL_ERROR);
}
void StatsTestsF16::test_mse_f16()
{
const float16_t *inpA = inputA.ptr();
const float16_t *inpB = inputB.ptr();
float16_t result;
float16_t *refp = ref.ptr();
float16_t *outp = output.ptr();
arm_mse_f16(inpA,inpB,
inputA.nbSamples(),
&result);
outp[0] = result;
ASSERT_SNR(result,refp[this->refOffset],(float16_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(result,refp[this->refOffset],(float16_t)REL_ERROR);
}
void StatsTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
{
@ -1032,6 +1053,58 @@ a double precision computation.
refOffset = 2;
}
break;
case StatsTestsF16::TEST_MSE_F16_49:
{
inputA.reload(StatsTestsF16::INPUTNEW1_F16_ID,mgr,7);
inputB.reload(StatsTestsF16::INPUTNEW2_F16_ID,mgr,7);
ref.reload(StatsTestsF16::MSE_F16_ID,mgr);
output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
refOffset = 0;
}
break;
case StatsTestsF16::TEST_MSE_F16_50:
{
inputA.reload(StatsTestsF16::INPUTNEW1_F16_ID,mgr,16);
inputB.reload(StatsTestsF16::INPUTNEW2_F16_ID,mgr,16);
ref.reload(StatsTestsF16::MSE_F16_ID,mgr);
output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
refOffset = 1;
}
break;
case StatsTestsF16::TEST_MSE_F16_51:
{
inputA.reload(StatsTestsF16::INPUTNEW1_F16_ID,mgr,23);
inputB.reload(StatsTestsF16::INPUTNEW2_F16_ID,mgr,23);
ref.reload(StatsTestsF16::MSE_F16_ID,mgr);
output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
refOffset = 2;
}
break;
case StatsTestsF16::TEST_MSE_F16_52:
{
inputA.reload(StatsTestsF16::INPUTNEW1_F16_ID,mgr,100);
inputB.reload(StatsTestsF16::INPUTNEW2_F16_ID,mgr,100);
ref.reload(StatsTestsF16::MSE_F16_ID,mgr);
output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
refOffset = 3;
}
break;
}
}

@ -436,6 +436,29 @@ a double precision computation.
}
void StatsTestsF32::test_mse_f32()
{
const float32_t *inpA = inputA.ptr();
const float32_t *inpB = inputB.ptr();
float32_t result;
float32_t *refp = ref.ptr();
float32_t *outp = output.ptr();
arm_mse_f32(inpA,inpB,
inputA.nbSamples(),
&result);
outp[0] = result;
ASSERT_SNR(result,refp[this->refOffset],(float32_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(result,refp[this->refOffset],(float32_t)REL_ERROR);
}
void StatsTestsF32::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
{
@ -1027,6 +1050,58 @@ a double precision computation.
}
break;
case StatsTestsF32::TEST_MSE_F32_49:
{
inputA.reload(StatsTestsF32::INPUTNEW1_F32_ID,mgr,3);
inputB.reload(StatsTestsF32::INPUTNEW2_F32_ID,mgr,3);
ref.reload(StatsTestsF32::MSE_F32_ID,mgr);
output.create(1,StatsTestsF32::OUT_F32_ID,mgr);
refOffset = 0;
}
break;
case StatsTestsF32::TEST_MSE_F32_50:
{
inputA.reload(StatsTestsF32::INPUTNEW1_F32_ID,mgr,8);
inputB.reload(StatsTestsF32::INPUTNEW2_F32_ID,mgr,8);
ref.reload(StatsTestsF32::MSE_F32_ID,mgr);
output.create(1,StatsTestsF32::OUT_F32_ID,mgr);
refOffset = 1;
}
break;
case StatsTestsF32::TEST_MSE_F32_51:
{
inputA.reload(StatsTestsF32::INPUTNEW1_F32_ID,mgr,11);
inputB.reload(StatsTestsF32::INPUTNEW2_F32_ID,mgr,11);
ref.reload(StatsTestsF32::MSE_F32_ID,mgr);
output.create(1,StatsTestsF32::OUT_F32_ID,mgr);
refOffset = 2;
}
break;
case StatsTestsF32::TEST_MSE_F32_52:
{
inputA.reload(StatsTestsF32::INPUTNEW1_F32_ID,mgr,100);
inputB.reload(StatsTestsF32::INPUTNEW2_F32_ID,mgr,100);
ref.reload(StatsTestsF32::MSE_F32_ID,mgr);
output.create(1,StatsTestsF32::OUT_F32_ID,mgr);
refOffset = 3;
}
break;
}

@ -439,6 +439,29 @@ a double precision computation.
*/
void StatsTestsF64::test_mse_f64()
{
const float64_t *inpA = inputA.ptr();
const float64_t *inpB = inputB.ptr();
float64_t result;
float64_t *refp = ref.ptr();
float64_t *outp = output.ptr();
arm_mse_f64(inpA,inpB,
inputA.nbSamples(),
&result);
outp[0] = result;
ASSERT_SNR(result,refp[this->refOffset],(float64_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(result,refp[this->refOffset],(float64_t)REL_ERROR);
}
void StatsTestsF64::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
{
(void)paramsArgs;
@ -1030,6 +1053,58 @@ a double precision computation.
}
break;
case StatsTestsF64::TEST_MSE_F64_49:
{
inputA.reload(StatsTestsF64::INPUTNEW1_F64_ID,mgr,2);
inputB.reload(StatsTestsF64::INPUTNEW2_F64_ID,mgr,2);
ref.reload(StatsTestsF64::MSE_F64_ID,mgr);
output.create(1,StatsTestsF64::OUT_F64_ID,mgr);
refOffset = 0;
}
break;
case StatsTestsF64::TEST_MSE_F64_50:
{
inputA.reload(StatsTestsF64::INPUTNEW1_F64_ID,mgr,4);
inputB.reload(StatsTestsF64::INPUTNEW2_F64_ID,mgr,4);
ref.reload(StatsTestsF64::MSE_F64_ID,mgr);
output.create(1,StatsTestsF64::OUT_F64_ID,mgr);
refOffset = 1;
}
break;
case StatsTestsF64::TEST_MSE_F64_51:
{
inputA.reload(StatsTestsF64::INPUTNEW1_F64_ID,mgr,5);
inputB.reload(StatsTestsF64::INPUTNEW2_F64_ID,mgr,5);
ref.reload(StatsTestsF64::MSE_F64_ID,mgr);
output.create(1,StatsTestsF64::OUT_F64_ID,mgr);
refOffset = 2;
}
break;
case StatsTestsF64::TEST_MSE_F64_52:
{
inputA.reload(StatsTestsF64::INPUTNEW1_F64_ID,mgr,100);
inputB.reload(StatsTestsF64::INPUTNEW2_F64_ID,mgr,100);
ref.reload(StatsTestsF64::MSE_F64_ID,mgr);
output.create(1,StatsTestsF64::OUT_F64_ID,mgr);
refOffset = 3;
}
break;
}

@ -6,6 +6,8 @@
//#include <cstdio>
#define SNR_THRESHOLD 50
#define SNR_THRESHOLD_MSE 50
/*
Reference patterns are generated with
@ -13,6 +15,8 @@ a double precision computation.
*/
#define ABS_ERROR_Q15 ((q15_t)100)
#define ABS_ERROR_Q15_MSE ((q15_t)100)
#define ABS_ERROR_Q63 (1<<17)
void StatsTestsQ15::test_max_q15()
@ -310,6 +314,29 @@ a double precision computation.
}
void StatsTestsQ15::test_mse_q15()
{
const q15_t *inpA = inputA.ptr();
const q15_t *inpB = inputB.ptr();
q15_t result;
q15_t *refp = ref.ptr();
q15_t *outp = output.ptr();
arm_mse_q15(inpA,inpB,
inputA.nbSamples(),
&result);
outp[0] = result;
ASSERT_SNR(result,refp[this->refOffset],(float32_t)SNR_THRESHOLD_MSE);
ASSERT_NEAR_EQ(result,refp[this->refOffset],(q15_t)ABS_ERROR_Q15_MSE);
}
void StatsTestsQ15::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
{
@ -808,6 +835,58 @@ a double precision computation.
}
break;
case StatsTestsQ15::TEST_MSE_Q15_40:
{
inputA.reload(StatsTestsQ15::INPUTNEW1_Q15_ID,mgr,7);
inputB.reload(StatsTestsQ15::INPUTNEW2_Q15_ID,mgr,7);
ref.reload(StatsTestsQ15::MSE_Q15_ID,mgr);
output.create(1,StatsTestsQ15::OUT_Q15_ID,mgr);
refOffset = 0;
}
break;
case StatsTestsQ15::TEST_MSE_Q15_41:
{
inputA.reload(StatsTestsQ15::INPUTNEW1_Q15_ID,mgr,16);
inputB.reload(StatsTestsQ15::INPUTNEW2_Q15_ID,mgr,16);
ref.reload(StatsTestsQ15::MSE_Q15_ID,mgr);
output.create(1,StatsTestsQ15::OUT_Q15_ID,mgr);
refOffset = 1;
}
break;
case StatsTestsQ15::TEST_MSE_Q15_42:
{
inputA.reload(StatsTestsQ15::INPUTNEW1_Q15_ID,mgr,23);
inputB.reload(StatsTestsQ15::INPUTNEW2_Q15_ID,mgr,23);
ref.reload(StatsTestsQ15::MSE_Q15_ID,mgr);
output.create(1,StatsTestsQ15::OUT_Q15_ID,mgr);
refOffset = 2;
}
break;
case StatsTestsQ15::TEST_MSE_Q15_43:
{
inputA.reload(StatsTestsQ15::INPUTNEW1_Q15_ID,mgr,100);
inputB.reload(StatsTestsQ15::INPUTNEW2_Q15_ID,mgr,100);
ref.reload(StatsTestsQ15::MSE_Q15_ID,mgr);
output.create(1,StatsTestsQ15::OUT_Q15_ID,mgr);
refOffset = 3;
}
break;
}

@ -6,6 +6,8 @@
//#include <cstdio>
#define SNR_THRESHOLD 100
#define SNR_THRESHOLD_MSE 100
/*
Reference patterns are generated with
@ -13,6 +15,8 @@ a double precision computation.
*/
#define ABS_ERROR_Q31 ((q31_t)(100))
#define ABS_ERROR_Q31_MSE ((q31_t)(100))
#define ABS_ERROR_Q63 ((q63_t)(1<<18))
void StatsTestsQ31::test_max_q31()
@ -309,6 +313,29 @@ a double precision computation.
}
void StatsTestsQ31::test_mse_q31()
{
const q31_t *inpA = inputA.ptr();
const q31_t *inpB = inputB.ptr();
q31_t result;
q31_t *refp = ref.ptr();
q31_t *outp = output.ptr();
arm_mse_q31(inpA,inpB,
inputA.nbSamples(),
&result);
outp[0] = result;
ASSERT_SNR(result,refp[this->refOffset],(float32_t)SNR_THRESHOLD_MSE);
ASSERT_NEAR_EQ(result,refp[this->refOffset],(q31_t)ABS_ERROR_Q31_MSE);
}
void StatsTestsQ31::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
@ -808,6 +835,58 @@ a double precision computation.
}
break;
case StatsTestsQ31::TEST_MSE_Q31_40:
{
inputA.reload(StatsTestsQ31::INPUTNEW1_Q31_ID,mgr,3);
inputB.reload(StatsTestsQ31::INPUTNEW2_Q31_ID,mgr,3);
ref.reload(StatsTestsQ31::MSE_Q31_ID,mgr);
output.create(1,StatsTestsQ31::OUT_Q31_ID,mgr);
refOffset = 0;
}
break;
case StatsTestsQ31::TEST_MSE_Q31_41:
{
inputA.reload(StatsTestsQ31::INPUTNEW1_Q31_ID,mgr,8);
inputB.reload(StatsTestsQ31::INPUTNEW2_Q31_ID,mgr,8);
ref.reload(StatsTestsQ31::MSE_Q31_ID,mgr);
output.create(1,StatsTestsQ31::OUT_Q31_ID,mgr);
refOffset = 1;
}
break;
case StatsTestsQ31::TEST_MSE_Q31_42:
{
inputA.reload(StatsTestsQ31::INPUTNEW1_Q31_ID,mgr,11);
inputB.reload(StatsTestsQ31::INPUTNEW2_Q31_ID,mgr,11);
ref.reload(StatsTestsQ31::MSE_Q31_ID,mgr);
output.create(1,StatsTestsQ31::OUT_Q31_ID,mgr);
refOffset = 2;
}
break;
case StatsTestsQ31::TEST_MSE_Q31_43:
{
inputA.reload(StatsTestsQ31::INPUTNEW1_Q31_ID,mgr,100);
inputB.reload(StatsTestsQ31::INPUTNEW2_Q31_ID,mgr,100);
ref.reload(StatsTestsQ31::MSE_Q31_ID,mgr);
output.create(1,StatsTestsQ31::OUT_Q31_ID,mgr);
refOffset = 3;
}
break;
}

@ -6,7 +6,7 @@
//#include <cstdio>
#define SNR_THRESHOLD 20
#define SNR_THRESHOLD_MSE 14
#define SNR_THRESHOLD_MSE 20
/*

@ -16,6 +16,7 @@ group Root {
Pattern INPUT1_F64_ID : Input1_f64.txt
Pattern INPUTNEW1_F64_ID : InputNew1_f64.txt
Pattern INPUTNEW2_F64_ID : InputNew2_f64.txt
Pattern INPUT2_F64_ID : Input2_f64.txt
Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt
@ -52,6 +53,8 @@ group Root {
Pattern ABSMININDEXES_S16_ID : AbsMinIndexes27_s16.txt
Pattern ABSMINVALS_F64_ID : AbsMinVals27_f64.txt
Pattern MSE_F64_ID : MSEVals28_f64.txt
Output OUT_F64_ID : Output
Output OUT_S16_ID : Index
@ -122,6 +125,11 @@ group Root {
Test nb=2n arm_absmin_no_idx_f64:test_absmin_no_idx_f64
Test nb=2n+1 arm_absmin_no_idx_f64:test_absmin_no_idx_f64
Test nb=2 arm_mse_f64:test_mse_f64
Test nb=2n arm_mse_f64:test_mse_f64
Test nb=2n+1 arm_mse_f64:test_mse_f64
Test long arm_mse_f64:test_mse_f64
}
@ -133,6 +141,7 @@ group Root {
Pattern INPUT1_F32_ID : Input1_f32.txt
Pattern INPUTNEW1_F32_ID : InputNew1_f32.txt
Pattern INPUTNEW2_F32_ID : InputNew2_f32.txt
Pattern INPUT2_F32_ID : Input2_f32.txt
Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt
@ -169,6 +178,8 @@ group Root {
Pattern ABSMININDEXES_S16_ID : AbsMinIndexes27_s16.txt
Pattern ABSMINVALS_F32_ID : AbsMinVals27_f32.txt
Pattern MSE_F32_ID : MSEVals28_f32.txt
Output OUT_F32_ID : Output
Output OUT_S16_ID : Index
@ -231,13 +242,18 @@ group Root {
Test nb=4n arm_min_no_idx_f32:test_min_no_idx_f32
Test nb=4n+1 arm_min_no_idx_f32:test_min_no_idx_f32
Test nb=2 arm_absmax_no_idx_f32:test_absmax_no_idx_f32
Test nb=2n arm_absmax_no_idx_f32:test_absmax_no_idx_f32
Test nb=2n+1 arm_absmax_no_idx_f32:test_absmax_no_idx_f32
Test nb=3 arm_absmax_no_idx_f32:test_absmax_no_idx_f32
Test nb=4n arm_absmax_no_idx_f32:test_absmax_no_idx_f32
Test nb=4n+1 arm_absmax_no_idx_f32:test_absmax_no_idx_f32
Test nb=3 arm_absmin_no_idx_f32:test_absmin_no_idx_f32
Test nb=4n arm_absmin_no_idx_f32:test_absmin_no_idx_f32
Test nb=4n+1 arm_absmin_no_idx_f32:test_absmin_no_idx_f32
Test nb=2 arm_absmin_no_idx_f32:test_absmin_no_idx_f32
Test nb=2n arm_absmin_no_idx_f32:test_absmin_no_idx_f32
Test nb=2n+1 arm_absmin_no_idx_f32:test_absmin_no_idx_f32
Test nb=3 arm_mse_f32:test_mse_f32
Test nb=4n arm_mse_f32:test_mse_f32
Test nb=4n+1 arm_mse_f32:test_mse_f32
Test long arm_mse_f32:test_mse_f32
}
@ -252,6 +268,7 @@ group Root {
Pattern INPUT1_Q31_ID : Input1_q31.txt
Pattern INPUTNEW1_Q31_ID : InputNew1_q31.txt
Pattern INPUTNEW2_Q31_ID : InputNew2_q31.txt
Pattern INPUT2_Q31_ID : Input2_q31.txt
Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt
@ -270,6 +287,8 @@ group Root {
Pattern ABSMININDEXES_S16_ID : AbsMinIndexes9_s16.txt
Pattern ABSMINVALS_Q31_ID : AbsMinVals9_q31.txt
Pattern MSE_Q31_ID : MSEVals10_q31.txt
Output OUT_Q31_ID : Output
Output OUT_Q63_ID : Output
Output OUT_S16_ID : Index
@ -320,13 +339,18 @@ group Root {
Test nb=4n arm_min_no_idx_q31:test_min_no_idx_q31
Test nb=4n+1 arm_min_no_idx_q31:test_min_no_idx_q31
Test nb=2 arm_absmax_no_idx_q31:test_absmax_no_idx_q31
Test nb=2n arm_absmax_no_idx_q31:test_absmax_no_idx_q31
Test nb=2n+1 arm_absmax_no_idx_q31:test_absmax_no_idx_q31
Test nb=3 arm_absmax_no_idx_q31:test_absmax_no_idx_q31
Test nb=4n arm_absmax_no_idx_q31:test_absmax_no_idx_q31
Test nb=4n+1 arm_absmax_no_idx_q31:test_absmax_no_idx_q31
Test nb=3 arm_absmin_no_idx_q31:test_absmin_no_idx_q31
Test nb=4n arm_absmin_no_idx_q31:test_absmin_no_idx_q31
Test nb=4n+1 arm_absmin_no_idx_q31:test_absmin_no_idx_q31
Test nb=2 arm_absmin_no_idx_q31:test_absmin_no_idx_q31
Test nb=2n arm_absmin_no_idx_q31:test_absmin_no_idx_q31
Test nb=2n+1 arm_absmin_no_idx_q31:test_absmin_no_idx_q31
Test nb=3 arm_mse_q31:test_mse_q31
Test nb=4n arm_mse_q31:test_mse_q31
Test nb=4n+1 arm_mse_q31:test_mse_q31
Test long arm_mse_q31:test_mse_q31
}
@ -338,6 +362,7 @@ group Root {
Pattern INPUT1_Q15_ID : Input1_q15.txt
Pattern INPUTNEW1_Q15_ID : InputNew1_q15.txt
Pattern INPUTNEW2_Q15_ID : InputNew2_q15.txt
Pattern INPUT2_Q15_ID : Input2_q15.txt
Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt
@ -356,7 +381,7 @@ group Root {
Pattern ABSMININDEXES_S16_ID : AbsMinIndexes9_s16.txt
Pattern ABSMINVALS_Q15_ID : AbsMinVals9_q15.txt
Pattern MSE_Q15_ID : MSEVals10_q15.txt
Output OUT_Q15_ID : Output
Output OUT_Q63_ID : Output
@ -392,29 +417,34 @@ group Root {
Test nb=8n arm_var_q15:test_var_q15
Test nb=8n+1 arm_var_q15:test_var_q15
Test nb=3 arm_absmax_q15:test_absmax_q15
Test nb=4n arm_absmax_q15:test_absmax_q15
Test nb=4n+1 arm_absmax_q15:test_absmax_q15
Test nb=7 arm_absmax_q15:test_absmax_q15
Test nb=8n arm_absmax_q15:test_absmax_q15
Test nb=8n+1 arm_absmax_q15:test_absmax_q15
Test nb=7 arm_absmin_q15:test_absmin_q15
Test nb=8n arm_absmin_q15:test_absmin_q15
Test nb=8n+1 arm_absmin_q15:test_absmin_q15
Test nb=3 arm_absmin_q15:test_absmin_q15
Test nb=4n arm_absmin_q15:test_absmin_q15
Test nb=4n+1 arm_absmin_q15:test_absmin_q15
Test nb=7 arm_max_no_idx_q15:test_max_no_idx_q15
Test nb=8n arm_max_no_idx_q15:test_max_no_idx_q15
Test nb=8n+1 arm_max_no_idx_q15:test_max_no_idx_q15
Test nb=3 arm_max_no_idx_q15:test_max_no_idx_q15
Test nb=4n arm_max_no_idx_q15:test_max_no_idx_q15
Test nb=4n+1 arm_max_no_idx_q15:test_max_no_idx_q15
Test nb=7 arm_min_no_idx_q15:test_min_no_idx_q15
Test nb=8n arm_min_no_idx_q15:test_min_no_idx_q15
Test nb=8n+1 arm_min_no_idx_q15:test_min_no_idx_q15
Test nb=3 arm_min_no_idx_q15:test_min_no_idx_q15
Test nb=4n arm_min_no_idx_q15:test_min_no_idx_q15
Test nb=4n+1 arm_min_no_idx_q15:test_min_no_idx_q15
Test nb=7 arm_absmax_no_idx_q15:test_absmax_no_idx_q15
Test nb=8n arm_absmax_no_idx_q15:test_absmax_no_idx_q15
Test nb=8n+1 arm_absmax_no_idx_q15:test_absmax_no_idx_q15
Test nb=2 arm_absmax_no_idx_q15:test_absmax_no_idx_q15
Test nb=2n arm_absmax_no_idx_q15:test_absmax_no_idx_q15
Test nb=2n+1 arm_absmax_no_idx_q15:test_absmax_no_idx_q15
Test nb=7 arm_absmin_no_idx_q15:test_absmin_no_idx_q15
Test nb=8n arm_absmin_no_idx_q15:test_absmin_no_idx_q15
Test nb=8n+1 arm_absmin_no_idx_q15:test_absmin_no_idx_q15
Test nb=2 arm_absmin_no_idx_q15:test_absmin_no_idx_q15
Test nb=2n arm_absmin_no_idx_q15:test_absmin_no_idx_q15
Test nb=2n+1 arm_absmin_no_idx_q15:test_absmin_no_idx_q15
Test nb=7 arm_mse_q15:test_mse_q15
Test nb=8n arm_mse_q15:test_mse_q15
Test nb=8n+1 arm_mse_q15:test_mse_q15
Test long arm_mse_q15:test_mse_q15
}
@ -479,32 +509,32 @@ group Root {
Test big index arm_max_q7:test_max_q7
Test big index arm_min_q7:test_min_q7
Test nb=3 arm_absmax_q7:test_absmax_q7
Test nb=4n arm_absmax_q7:test_absmax_q7
Test nb=4n+1 arm_absmax_q7:test_absmax_q7
Test nb=15 arm_absmax_q7:test_absmax_q7
Test nb=16n arm_absmax_q7:test_absmax_q7
Test nb=16n+1 arm_absmax_q7:test_absmax_q7
Test nb=3 arm_absmin_q7:test_absmin_q7
Test nb=4n arm_absmin_q7:test_absmin_q7
Test nb=4n+1 arm_absmin_q7:test_absmin_q7
Test nb=15 arm_absmin_q7:test_absmin_q7
Test nb=16n arm_absmin_q7:test_absmin_q7
Test nb=16n+1 arm_absmin_q7:test_absmin_q7
Test big index arm_absmax_q7:test_absmax_q7
Test big index arm_absmin_q7:test_absmin_q7
Test nb=3 arm_max_no_idx_q7:test_max_no_idx_q7
Test nb=4n arm_max_no_idx_q7:test_max_no_idx_q7
Test nb=4n+1 arm_max_no_idx_q7:test_max_no_idx_q7
Test nb=15 arm_max_no_idx_q7:test_max_no_idx_q7
Test nb=16n arm_max_no_idx_q7:test_max_no_idx_q7
Test nb=16n+1 arm_max_no_idx_q7:test_max_no_idx_q7
Test nb=3 arm_min_no_idx_q7:test_min_no_idx_q7
Test nb=4n arm_min_no_idx_q7:test_min_no_idx_q7
Test nb=4n+1 arm_min_no_idx_q7:test_min_no_idx_q7
Test nb=15 arm_min_no_idx_q7:test_min_no_idx_q7
Test nb=16n arm_min_no_idx_q7:test_min_no_idx_q7
Test nb=16n+1 arm_min_no_idx_q7:test_min_no_idx_q7
Test nb=2 arm_absmax_no_idx_q7:test_absmax_no_idx_q7
Test nb=2n arm_absmax_no_idx_q7:test_absmax_no_idx_q7
Test nb=2n+1 arm_absmax_no_idx_q7:test_absmax_no_idx_q7
Test nb=15 arm_absmax_no_idx_q7:test_absmax_no_idx_q7
Test nb=16n arm_absmax_no_idx_q7:test_absmax_no_idx_q7
Test nb=16n+1 arm_absmax_no_idx_q7:test_absmax_no_idx_q7
Test nb=2 arm_absmin_no_idx_q7:test_absmin_no_idx_q7
Test nb=2n arm_absmin_no_idx_q7:test_absmin_no_idx_q7
Test nb=2n+1 arm_absmin_no_idx_q7:test_absmin_no_idx_q7
Test nb=15 arm_absmin_no_idx_q7:test_absmin_no_idx_q7
Test nb=16n arm_absmin_no_idx_q7:test_absmin_no_idx_q7
Test nb=16n+1 arm_absmin_no_idx_q7:test_absmin_no_idx_q7
Test nb=15 arm_mse_q7:test_mse_q7
Test nb=16n arm_mse_q7:test_mse_q7

@ -15,6 +15,7 @@ group Root {
Pattern INPUT1_F16_ID : Input1_f16.txt
Pattern INPUTNEW1_F16_ID : InputNew1_f16.txt
Pattern INPUTNEW2_F16_ID : InputNew2_f16.txt
Pattern INPUT2_F16_ID : Input2_f16.txt
Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt
@ -51,6 +52,8 @@ group Root {
Pattern ABSMININDEXES_S16_ID : AbsMinIndexes27_s16.txt
Pattern ABSMINVALS_F16_ID : AbsMinVals27_f16.txt
Pattern MSE_F16_ID : MSEVals28_f16.txt
Output OUT_F16_ID : Output
Output OUT_S16_ID : Index
Output TMP_F16_ID : Temp
@ -100,25 +103,30 @@ group Root {
Test stability arm_std_f16:test_std_stability_f16
Test nb=3 arm_absmax_f16:test_absmax_f16
Test nb=4n arm_absmax_f16:test_absmax_f16
Test nb=4n+1 arm_absmax_f16:test_absmax_f16
Test nb=7 arm_absmax_f16:test_absmax_f16
Test nb=8n arm_absmax_f16:test_absmax_f16
Test nb=8n+1 arm_absmax_f16:test_absmax_f16
Test nb=3 arm_absmin_f16:test_absmin_f16
Test nb=4n arm_absmin_f16:test_absmin_f16
Test nb=4n+1 arm_absmin_f16:test_absmin_f16
Test nb=7 arm_absmin_f16:test_absmin_f16
Test nb=8n arm_absmin_f16:test_absmin_f16
Test nb=8n+1 arm_absmin_f16:test_absmin_f16
Test nb=7 arm_min_no_idx_f16:test_min_no_idx_f16
Test nb=8n arm_min_no_idx_f16:test_min_no_idx_f16
Test nb=8n+1 arm_min_no_idx_f16:test_min_no_idx_f16
Test nb=2 arm_absmax_no_idx_f16:test_absmax_no_idx_f16
Test nb=2n arm_absmax_no_idx_f16:test_absmax_no_idx_f16
Test nb=2n+1 arm_absmax_no_idx_f16:test_absmax_no_idx_f16
Test nb=7 arm_absmax_no_idx_f16:test_absmax_no_idx_f16
Test nb=8n arm_absmax_no_idx_f16:test_absmax_no_idx_f16
Test nb=8n+1 arm_absmax_no_idx_f16:test_absmax_no_idx_f16
Test nb=7 arm_absmin_no_idx_f16:test_absmin_no_idx_f16
Test nb=8n arm_absmin_no_idx_f16:test_absmin_no_idx_f16
Test nb=8n+1 arm_absmin_no_idx_f16:test_absmin_no_idx_f16
Test nb=2 arm_absmin_no_idx_f16:test_absmin_no_idx_f16
Test nb=2n arm_absmin_no_idx_f16:test_absmin_no_idx_f16
Test nb=2n+1 arm_absmin_no_idx_f16:test_absmin_no_idx_f16
Test nb=7 arm_mse_f16:test_mse_f16
Test nb=8n arm_mse_f16:test_mse_f16
Test nb=8n+1 arm_mse_f16:test_mse_f16
Test long arm_mse_f16:test_mse_f16
}
}
}

Loading…
Cancel
Save