CMSIS-DSP: Mean square error for q15, q31, f16, f32, f64.

Reworked q7 to have a bit more accuracy.
pull/19/head
Christophe Favergeon 4 years ago
parent 47a987217f
commit 8dcdb350a4

@ -910,6 +910,66 @@ void arm_mse_q7(
uint32_t blockSize, uint32_t blockSize,
q7_t * pResult); q7_t * pResult);
/**
@brief Mean square error between two Q15 vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
void arm_mse_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
uint32_t blockSize,
q15_t * pResult);
/**
@brief Mean square error between two Q31 vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
void arm_mse_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
uint32_t blockSize,
q31_t * pResult);
/**
@brief Mean square error between two single precision float vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
void arm_mse_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t blockSize,
float32_t * pResult);
/**
@brief Mean square error between two double precision float vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
void arm_mse_f64(
const float64_t * pSrcA,
const float64_t * pSrcB,
uint32_t blockSize,
float64_t * pResult);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

@ -243,6 +243,21 @@ float16_t arm_kullback_leibler_f16(const float16_t * pSrcA
uint32_t blockSize, uint32_t blockSize,
float16_t *pResult); float16_t *pResult);
/**
@brief Mean square error between two half precision float vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
void arm_mse_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t blockSize,
float16_t * pResult);
#endif /*defined(ARM_FLOAT16_SUPPORTED)*/ #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
#ifdef __cplusplus #ifdef __cplusplus
} }

@ -81,6 +81,11 @@ target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q31.c) target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q7.c) target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_q7.c) target_sources(CMSISDSPStatistics PRIVATE arm_mse_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mse_f64.c)
configLib(CMSISDSPStatistics ${ROOT}) configLib(CMSISDSPStatistics ${ROOT})
configDsp(CMSISDSPStatistics ${ROOT}) configDsp(CMSISDSPStatistics ${ROOT})

@ -94,3 +94,7 @@
#include "arm_absmin_no_idx_q31.c" #include "arm_absmin_no_idx_q31.c"
#include "arm_absmin_no_idx_q7.c" #include "arm_absmin_no_idx_q7.c"
#include "arm_mse_q7.c" #include "arm_mse_q7.c"
#include "arm_mse_q15.c"
#include "arm_mse_q31.c"
#include "arm_mse_f32.c"
#include "arm_mse_f64.c"

@ -43,3 +43,4 @@
#include "arm_absmin_f16.c" #include "arm_absmin_f16.c"
#include "arm_absmax_no_idx_f16.c" #include "arm_absmax_no_idx_f16.c"
#include "arm_absmin_no_idx_f16.c" #include "arm_absmin_no_idx_f16.c"
#include "arm_mse_f16.c"

@ -0,0 +1,203 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mse_f16.c
* Description: Half floating point mean square error
*
* $Date: 05 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
/**
@ingroup groupStats
*/
/**
@addtogroup MSE
@{
*/
/**
@brief Mean square error between two half floating point vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] result mean square error
@return none
*/
#if !defined(ARM_MATH_AUTOVECTORIZE)
#if defined(ARM_MATH_MVE_FLOAT16)
#include "arm_helium_utils.h"
void arm_mse_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t blockSize,
float16_t * result)
{
float16x8_t vecA, vecB;
float16x8_t vecSum;
uint32_t blkCnt;
_Float16 sum = 0.0f16;
vecSum = vdupq_n_f16(0.0f16);
blkCnt = (blockSize) >> 3;
while (blkCnt > 0U)
{
vecA = vld1q(pSrcA);
pSrcA += 8;
vecB = vld1q(pSrcB);
pSrcB += 8;
vecA = vsubq(vecA, vecB);
vecSum = vfmaq(vecSum, vecA, vecA);
/*
* Decrement the blockSize loop counter
*/
blkCnt --;
}
blkCnt = (blockSize) & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vecA = vsubq(vecA, vecB);
vecSum = vfmaq_m(vecSum, vecA, vecA, p0);
}
sum = vecAddAcrossF16Mve(vecSum);
/* Store result in destination buffer */
*result = (_Float16)sum / (_Float16)blockSize;
}
#endif
#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/
#if defined(ARM_FLOAT16_SUPPORTED)
#if (!defined(ARM_MATH_MVE_FLOAT16)) || defined(ARM_MATH_AUTOVECTORIZE)
void arm_mse_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t blockSize,
float16_t * result)
{
uint32_t blkCnt; /* Loop counter */
_Float16 inA, inB;
_Float16 sum = 0.0f16; /* Temporary return variable */
#if defined (ARM_MATH_LOOPUNROLL)
blkCnt = (blockSize) >> 3;
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize) & 7;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = (_Float16)inA - (_Float16)inB;
sum += (_Float16)inA * (_Float16)inA;
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer */
*result = (_Float16)sum / (_Float16)blockSize;
}
#endif /* end of test for vector instruction availability */
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
/**
@} end of MSE group
*/

@ -0,0 +1,246 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mse_f32.c
* Description: Floating point mean square error
*
* $Date: 05 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup MSE
@{
*/
/**
@brief Mean square error between two floating point vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] result mean square error
@return none
*/
#if !defined(ARM_MATH_AUTOVECTORIZE)
#if defined(ARM_MATH_MVEF)
#include "arm_helium_utils.h"
void arm_mse_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t blockSize,
float32_t * result)
{
float32x4_t vecA, vecB;
float32x4_t vecSum;
uint32_t blkCnt;
float32_t sum = 0.0f;
vecSum = vdupq_n_f32(0.0f);
/* Compute 4 outputs at a time */
blkCnt = (blockSize) >> 2;
while (blkCnt > 0U)
{
vecA = vld1q(pSrcA);
pSrcA += 4;
vecB = vld1q(pSrcB);
pSrcB += 4;
vecA = vsubq(vecA, vecB);
vecSum = vfmaq(vecSum, vecA, vecA);
/*
* Decrement the blockSize loop counter
*/
blkCnt --;
}
blkCnt = (blockSize) & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vecA = vsubq(vecA, vecB);
vecSum = vfmaq_m(vecSum, vecA, vecA, p0);
}
sum = vecAddAcrossF32Mve(vecSum);
/* Store result in destination buffer */
*result = sum / blockSize;
}
#endif
#if defined(ARM_MATH_NEON)
void arm_mse_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t blockSize,
float32_t * result)
{
float32x4_t vecA, vecB;
float32x4_t vecSum;
uint32_t blkCnt;
float32_t sum = 0.0f;
vecSum = vdupq_n_f32(0.0f);
#if !defined(__aarch64__)
f32x2_t tmp = vdup_n_f32(0.0f);
#endif
/* Compute 4 outputs at a time */
blkCnt = (blockSize) >> 2;
while (blkCnt > 0U)
{
vecA = vld1q_f32(pSrcA);
pSrcA += 4;
vecB = vld1q_f32(pSrcB);
pSrcB += 4;
vecA = vsubq_f32(vecA, vecB);
vecSum = vfmaq_f32(vecSum, vecA, vecA);
/*
* Decrement the blockSize loop counter
*/
blkCnt --;
}
#if defined(__aarch64__)
sum = vpadds_f32(vpadd_f32(vget_low_f32(vecSum), vget_high_f32(vecSum)));
#else
tmp = vpadd_f32(vget_low_f32(vecSum), vget_high_f32(vecSum));
sum = vget_lane_f32(tmp, 0) + vget_lane_f32(tmp, 1);
#endif
blkCnt = (blockSize) & 3;
while (blkCnt > 0U)
{
/* Calculate dot product and store result in a temporary buffer. */
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer */
*result = sum / blockSize;
}
#endif
#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/
#if (!defined(ARM_MATH_MVEF) && !defined(ARM_MATH_NEON)) || defined(ARM_MATH_AUTOVECTORIZE)
void arm_mse_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t blockSize,
float32_t * result)
{
uint32_t blkCnt; /* Loop counter */
float32_t inA, inB;
float32_t sum = 0.0f; /* Temporary return variable */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize) >> 2;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize) & 3;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer */
*result = sum / blockSize;
}
#endif /* end of test for vector instruction availability */
/**
@} end of MSE group
*/

@ -0,0 +1,110 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mse_f64.c
* Description: Double floating point mean square error
*
* $Date: 05 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup MSE
@{
*/
/**
@brief Mean square error between two double floating point vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] result mean square error
@return none
*/
void arm_mse_f64(
const float64_t * pSrcA,
const float64_t * pSrcB,
uint32_t blockSize,
float64_t * result)
{
uint32_t blkCnt; /* Loop counter */
float64_t inA, inB;
float64_t sum = 0.0; /* Temporary return variable */
#if defined (ARM_MATH_LOOPUNROLL)
blkCnt = (blockSize) >> 1;
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize) & 1;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif
while (blkCnt > 0U)
{
inA = *pSrcA++;
inB = *pSrcB++;
inA = inA - inB;
sum += inA * inA;
/* Decrement loop counter */
blkCnt--;
}
/* Store result in destination buffer */
*result = sum / blockSize;
}
/**
@} end of MSE group
*/

@ -0,0 +1,175 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mse_q15.c
* Description: Mean square error between two Q15 vectors
*
* $Date: 04 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup MSE
@{
*/
/**
@brief Mean square error between two Q15 vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_mse_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
uint32_t blockSize,
q15_t * pResult)
{
uint32_t blkCnt; /* loop counters */
q15x8_t vecSrcA,vecSrcB;
q63_t sum = 0LL;
blkCnt = blockSize >> 3U;
while (blkCnt > 0U)
{
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vecSrcA = vshrq(vecSrcA,1);
vecSrcB = vshrq(vecSrcB,1);
vecSrcA = vqsubq(vecSrcA,vecSrcB);
/*
* sum lanes
*/
sum = vmlaldavaq(sum, vecSrcA, vecSrcA);
blkCnt--;
pSrcA += 8;
pSrcB += 8;
}
/*
* tail
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vecSrcA = vshrq(vecSrcA,1);
vecSrcB = vshrq(vecSrcB,1);
vecSrcA = vqsubq(vecSrcA,vecSrcB);
sum = vmlaldavaq_p(sum, vecSrcA, vecSrcA, p0);
}
*pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
}
#else
void arm_mse_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
uint32_t blockSize,
q15_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q63_t sum = 0; /* Temporary result storage */
q15_t inA,inB; /* Temporary variable to store input value */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
sum += (q63_t)((q31_t) inA * inA);
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
sum += (q63_t)((q31_t) inA * inA);
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
sum += (q63_t)((q31_t) inA * inA);
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
sum += (q63_t)((q31_t) inA * inA);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
sum += (q63_t)((q31_t) inA * inA);
/* Decrement loop counter */
blkCnt--;
}
/* Store result in q15 format */
*pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of MSE group
*/

@ -0,0 +1,176 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mse_q31.c
* Description: Mean square error between two Q31 vectors
*
* $Date: 04 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup MSE
@{
*/
/**
@brief Mean square error between two Q31 vectors.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult mean square error
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_mse_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
uint32_t blockSize,
q31_t * pResult)
{
uint32_t blkCnt; /* loop counters */
q31x4_t vecSrcA,vecSrcB;
q63_t sum = 0LL;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vecSrcA = vshrq(vecSrcA,1);
vecSrcB = vshrq(vecSrcB,1);
vecSrcA = vqsubq(vecSrcA,vecSrcB);
/*
* sum lanes
*/
sum = vrmlaldavhaq(sum, vecSrcA, vecSrcA);
blkCnt--;
pSrcA += 4;
pSrcB += 4;
}
/*
* tail
*/
blkCnt = blockSize & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vecSrcA = vshrq(vecSrcA,1);
vecSrcB = vshrq(vecSrcB,1);
vecSrcA = vqsubq(vecSrcA,vecSrcB);
sum = vrmlaldavhaq_p(sum, vecSrcA, vecSrcA, p0);
}
*pResult = (q31_t) ((sum / blockSize)>>21);
}
#else
void arm_mse_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
uint32_t blockSize,
q31_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
q63_t sum = 0; /* Temporary result storage */
q31_t inA32,inB32; /* Temporary variable to store packed input value */
#if defined (ARM_MATH_LOOPUNROLL)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
inA32 = *pSrcA++ >> 1;
inB32 = *pSrcB++ >> 1;
inA32 = __QSUB(inA32, inB32);
sum += ((q63_t) inA32 * inA32) >> 14U;
inA32 = *pSrcA++ >> 1;
inB32 = *pSrcB++ >> 1;
inA32 = __QSUB(inA32, inB32);
sum += ((q63_t) inA32 * inA32) >> 14U;
inA32 = *pSrcA++ >> 1;
inB32 = *pSrcB++ >> 1;
inA32 = __QSUB(inA32, inB32);
sum += ((q63_t) inA32 * inA32) >> 14U;
inA32 = *pSrcA++ >> 1;
inB32 = *pSrcB++ >> 1;
inA32 = __QSUB(inA32, inB32);
sum += ((q63_t) inA32 * inA32) >> 14U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
inA32 = *pSrcA++ >> 1;
inB32 = *pSrcB++ >> 1;
inA32 = __QSUB(inA32, inB32);
sum += ((q63_t) inA32 * inA32) >> 14U;
/* Decrement loop counter */
blkCnt--;
}
/* Store result in q31 format */
*pResult = (q31_t) ((sum / blockSize)>>15);
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of MSE group
*/

@ -33,14 +33,14 @@
*/ */
/** /**
@defgroup mse Mean Square Error @defgroup MSE Mean Square Error
Calculates the mean square error between two vectors. Calculates the mean square error between two vectors.
*/ */
/** /**
@addtogroup mse @addtogroup MSE
@{ @{
*/ */
@ -51,8 +51,7 @@
@param[in] blockSize number of samples in input vector @param[in] blockSize number of samples in input vector
@param[out] pResult mean square error @param[out] pResult mean square error
@return none @return none
*/
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_mse_q7( void arm_mse_q7(
const q7_t * pSrcA, const q7_t * pSrcA,
@ -63,14 +62,16 @@ void arm_mse_q7(
uint32_t blkCnt; /* loop counters */ uint32_t blkCnt; /* loop counters */
q7x16_t vecSrcA,vecSrcB; q7x16_t vecSrcA,vecSrcB;
q31_t sum = 0LL; q31_t sum = 0LL;
q7_t inA,inB;
/* Compute 16 outputs at a time */ /* Compute 16 outputs at a time */
blkCnt = blockSize >> 4U; blkCnt = blockSize >> 4U;
while (blkCnt > 0U) while (blkCnt > 0U)
{ {
vecSrcA = vldrbq_s8(pSrcA); vecSrcA = vld1q(pSrcA);
vecSrcB = vldrbq_s8(pSrcB); vecSrcB = vld1q(pSrcB);
vecSrcA = vshrq(vecSrcA,1);
vecSrcB = vshrq(vecSrcB,1);
vecSrcA = vqsubq(vecSrcA,vecSrcB); vecSrcA = vqsubq(vecSrcA,vecSrcB);
/* /*
@ -87,23 +88,21 @@ void arm_mse_q7(
* tail * tail
*/ */
blkCnt = blockSize & 0xF; blkCnt = blockSize & 0xF;
while (blkCnt > 0U) if (blkCnt > 0U)
{ {
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ mve_pred16_t p0 = vctp8q(blkCnt);
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
/* Compute Power and store result in a temporary variable, sum. */ vecSrcA = vshrq(vecSrcA,1);
inA = *pSrcA++; vecSrcB = vshrq(vecSrcB,1);
inB = *pSrcB++;
inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8); vecSrcA = vqsubq(vecSrcA,vecSrcB);
sum += ((q15_t) inA * inA);
/* Decrement loop counter */ sum = vmladavaq_p(sum, vecSrcA, vecSrcA, p0);
blkCnt--;
} }
*pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>7, 8); *pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>5, 8);
} }
#else #else
void arm_mse_q7( void arm_mse_q7(
@ -116,10 +115,6 @@ void arm_mse_q7(
q31_t sum = 0; /* Temporary result storage */ q31_t sum = 0; /* Temporary result storage */
q7_t inA,inB; /* Temporary variable to store input value */ q7_t inA,inB; /* Temporary variable to store input value */
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
q31_t inA32,inB32; /* Temporary variable to store packed input value */
q31_t in1, in2; /* Temporary variables to store input value */
#endif
#if defined (ARM_MATH_LOOPUNROLL) #if defined (ARM_MATH_LOOPUNROLL)
@ -128,42 +123,25 @@ void arm_mse_q7(
while (blkCnt > 0U) while (blkCnt > 0U)
{ {
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
/* Compute Power and store result in a temporary variable, sum. */
#if defined (ARM_MATH_DSP)
inA32 = read_q7x4_ia ((q7_t **) &pSrcA);
inB32 = read_q7x4_ia ((q7_t **) &pSrcB);
inA32 = __QSUB8(inA32, inB32);
in1 = __SXTB16(__ROR(inA32, 8));
in2 = __SXTB16(inA32);
/* calculate power and accumulate to accumulator */
sum = __SMLAD(in1, in1, sum);
sum = __SMLAD(in2, in2, sum);
#else
inA = *pSrcA++;
inB = *pSrcB++;
inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8); inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
sum += ((q15_t) inA * inA); sum += ((q15_t) inA * inA);
inA = *pSrcA++; inA = *pSrcA++ >> 1;
inB = *pSrcB++; inB = *pSrcB++ >> 1;
inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8); inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
sum += ((q15_t) inA * inA); sum += ((q15_t) inA * inA);
inA = *pSrcA++; inA = *pSrcA++ >> 1;
inB = *pSrcB++; inB = *pSrcB++ >> 1;
inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8); inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
sum += ((q15_t) inA * inA); sum += ((q15_t) inA * inA);
inA = *pSrcA++; inA = *pSrcA++ >> 1;
inB = *pSrcB++; inB = *pSrcB++ >> 1;
inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8); inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
sum += ((q15_t) inA * inA); sum += ((q15_t) inA * inA);
#endif /* #if defined (ARM_MATH_DSP) */
/* Decrement loop counter */ /* Decrement loop counter */
blkCnt--; blkCnt--;
@ -181,11 +159,8 @@ void arm_mse_q7(
while (blkCnt > 0U) while (blkCnt > 0U)
{ {
/* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ inA = *pSrcA++ >> 1;
inB = *pSrcB++ >> 1;
/* Compute Power and store result in a temporary variable, sum. */
inA = *pSrcA++;
inB = *pSrcB++;
inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8); inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8);
sum += ((q15_t) inA * inA); sum += ((q15_t) inA * inA);
@ -195,10 +170,10 @@ void arm_mse_q7(
} }
/* Store result in q7 format */ /* Store result in q7 format */
*pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>7, 8);; *pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>5, 8);;
} }
#endif /* defined(ARM_MATH_MVEI) */ #endif /* defined(ARM_MATH_MVEI) */
/** /**
@} end of power group @} end of MSE group
*/ */

@ -477,7 +477,8 @@ def writeTests(config,nb,format):
# So new tests have to be added after existing ones # So new tests have to be added after existing ones
def writeNewsTests(config,nb,format): def writeNewsTests(config,nb,format):
NBSAMPLES = 300 NBSAMPLES = 300
#config.setOverwrite(True) if format==Tools.F16:
config.setOverwrite(True)
data1=np.random.randn(NBSAMPLES) data1=np.random.randn(NBSAMPLES)
data1 = Tools.normalize(data1) data1 = Tools.normalize(data1)
@ -491,7 +492,7 @@ def writeNewsTests(config,nb,format):
config.writeInput(2, data2,"InputNew") config.writeInput(2, data2,"InputNew")
nb=generateOperatorTests(config,nb,format,data1,data2,mseTest,"MSEVals") nb=generateOperatorTests(config,nb,format,data1,data2,mseTest,"MSEVals")
#config.setOverwrite(False) config.setOverwrite(False)
def generateBenchmark(config,format): def generateBenchmark(config,format):

@ -1,8 +1,8 @@
H H
3 3
// 4 // 6
0x0004 0x0006
// 4 // 6
0x0004 0x0006
// 4 // 18
0x0004 0x0012

@ -1,8 +1,8 @@
H H
3 3
// 0.423138 // 0.640755
0x36c5 0x3920
// 0.423138 // 0.640755
0x36c5 0x3920
// 0.423138 // 0.887109
0x36c5 0x3b19

@ -2,7 +2,7 @@ H
3 3
// 0 // 0
0x0000 0x0000
// 15 // 7
0x000F 0x0007
// 15 // 19
0x000F 0x0013

@ -1,8 +1,8 @@
H H
3 3
// 0.027578 // 0.107198
0x270f 0x2edc
// 0.007974 // 0.021092
0x2015 0x2566
// 0.007974 // 0.002011
0x2015 0x181e

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,10 +1,10 @@
H H
4 4
// 0.038705 // 0.211855
0x28f4 0x32c8
// 0.092517 // 0.182973
0x2dec 0x31db
// 0.106867 // 0.268630
0x2ed7 0x344c
// 0.225679 // 0.234421
0x3339 0x3380

@ -1,8 +1,8 @@
H H
3 3
// 1 // 0
0x0001 0x0000
// 1 // 7
0x0001 0x0007
// 1 // 7
0x0001 0x0007

@ -1,8 +1,8 @@
W W
3 3
// 0.476185 // 0.725166
0x3ef3ce78 0x3f39a47a
// 0.476185 // 0.817687
0x3ef3ce78 0x3f5153ed
// 0.476185 // 0.817687
0x3ef3ce78 0x3f5153ed

@ -1,8 +1,8 @@
H H
3 3
// 0 // 1
0x0000 0x0001
// 7 // 5
0x0007 0x0005
// 7 // 9
0x0007 0x0009

@ -1,8 +1,8 @@
W W
3 3
// 0.184919 // 0.198876
0x3e3d5b69 0x3e4ba63c
// 0.008792 // 0.035481
0x3c100d1c 0x3d1154a3
// 0.008792 // 0.034200
0x3c100d1c 0x3d0c1510

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,10 +1,10 @@
W W
4 4
// 0.125231 // 0.072747
0x3e003c73 0x3d94fc3e
// 0.122919 // 0.176808
0x3dfbbceb 0x3e350d0d
// 0.145740 // 0.207669
0x3e153cd2 0x3e54a726
// 0.189820 // 0.183645
0x3e426031 0x3e3c0d87

@ -2,7 +2,7 @@ H
3 3
// 1 // 1
0x0001 0x0001
// 2 // 3
0x0002 0x0003
// 2 // 3
0x0002 0x0003

@ -1,8 +1,8 @@
D D
3 3
// 0.203055 // 0.579795
0x3fc9fdb6e0c81ee0 0x3fe28dad67519d3d
// 0.360222 // 0.783610
0x3fd70de0df777efb 0x3fe91356237f16f6
// 0.360222 // 0.783610
0x3fd70de0df777efb 0x3fe91356237f16f6

@ -4,5 +4,5 @@ H
0x0000 0x0000
// 0 // 0
0x0000 0x0000
// 0 // 4
0x0000 0x0004

@ -1,8 +1,8 @@
D D
3 3
// 0.003692 // 0.310923
0x3f6e3f80ef9e8a83 0x3fd3e6286ed8195c
// 0.003692 // 0.310923
0x3f6e3f80ef9e8a83 0x3fd3e6286ed8195c
// 0.003692 // 0.150640
0x3f6e3f80ef9e8a83 0x3fc34828d25e0053

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,10 +1,10 @@
D D
4 4
// 0.001072 // 0.221944
0x3f518f8a7ed015a2 0x3fcc68ab519cbb08
// 0.073015 // 0.487606
0x3fb2b11b5caa023a 0x3fdf34ef9e2840ea
// 0.060567 // 0.411797
0x3faf02a5beb935ad 0x3fda5ae1181a5066
// 0.198414 // 0.186577
0x3fc9659ffa60ff3b 0x3fc7e1bdbcffc958

@ -1,8 +1,8 @@
H H
3 3
// 4 // 5
0x0004 0x0005
// 4 // 15
0x0004 0x000F
// 18 // 15
0x0012 0x000F

@ -1,8 +1,8 @@
H H
3 3
// 0.540886 // 0.511444
0x453C 0x4177
// 0.540886 // 0.572485
0x453C 0x4947
// 0.701466 // 0.572485
0x59CA 0x4947

@ -1,8 +1,8 @@
H H
3 3
// 6 // 1
0x0006 0x0001
// 6 // 1
0x0006 0x0001
// 6 // 1
0x0006 0x0001

@ -1,8 +1,8 @@
H H
3 3
// 0.003012 // 0.065882
0x0063 0x086F
// 0.003012 // 0.065882
0x0063 0x086F
// 0.003012 // 0.065882
0x0063 0x086F

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,10 +1,10 @@
H H
4 4
// 0.291384 // 0.123046
0x254C 0x0FC0
// 0.326840 // 0.134261
0x29D6 0x112F
// 0.266990 // 0.135165
0x222D 0x114D
// 0.278624 // 0.237464
0x23AA 0x1E65

@ -1,8 +1,8 @@
H H
3 3
// 1 // 2
0x0001 0x0002
// 3 // 7
0x0003 0x0007
// 8 // 7
0x0008 0x0007

@ -1,8 +1,8 @@
W W
3 3
// 0.352374 // 0.254671
0x2D1A96B5 0x20990B68
// 0.530170 // 0.516980
0x43DC9BE7 0x422C699D
// 0.634745 // 0.516980
0x513F5458 0x422C699D

@ -2,7 +2,7 @@ H
3 3
// 0 // 0
0x0000 0x0000
// 7 // 4
0x0007 0x0004
// 7 // 4
0x0007 0x0004

@ -1,8 +1,8 @@
W W
3 3
// 0.132805 // 0.053227
0x10FFBE95 0x06D0231F
// 0.003898 // 0.003305
0x007FB95F 0x006C4DD3
// 0.003898 // 0.003305
0x007FB95F 0x006C4DD3

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,10 +1,10 @@
W W
4 4
// 0.066580 // 0.153783
0x0885AD96 0x13AF2B40
// 0.089078 // 0.209919
0x0B66E9B3 0x1ADE9F11
// 0.168307 // 0.155268
0x158B15E2 0x13DFD01C
// 0.196400 // 0.248101
0x19239FC7 0x1FC1C512

@ -1,10 +1,10 @@
H H
4 4
// 7 // 1
0x0007 0x0001
// 7 // 25
0x0007 0x0019
// 46 // 25
0x002E 0x0019
// 279 // 279
0x0117 0x0117

@ -1,10 +1,10 @@
B B
4 4
// 0.807620 // 0.619484
0x4F
// 0.802797
0x67 0x67
// 0.807620 // 0.802797
0x67 0x67
// 0.984827
0x7E
// 0.900000 // 0.900000
0x73 0x73

@ -1,10 +1,10 @@
H H
4 4
// 13 // 8
0x000D 0x0008
// 13 // 18
0x000D 0x0012
// 13 // 18
0x000D 0x0012
// 279 // 279
0x0117 0x0117

@ -1,10 +1,10 @@
B B
4 4
// 0.008109 // 0.008779
0x01
// 0.008109
0x01
// 0.008109
0x01 0x01
// 0.000193
0x00
// 0.000193
0x00
// 0.000000 // 0.000000
0x00 0x00

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,10 +1,10 @@
B B
4 4
// 0.191272 // 0.092336
0x18 0x0C
// 0.159547 // 0.121537
0x14 0x10
// 0.205092 // 0.162974
0x1A 0x15
// 0.257902 // 0.148534
0x21 0x13

@ -441,7 +441,28 @@ a double precision computation.
ASSERT_REL_ERROR(ref,output,REL_ERROR); ASSERT_REL_ERROR(ref,output,REL_ERROR);
} }
void StatsTestsF16::test_mse_f16()
{
const float16_t *inpA = inputA.ptr();
const float16_t *inpB = inputB.ptr();
float16_t result;
float16_t *refp = ref.ptr();
float16_t *outp = output.ptr();
arm_mse_f16(inpA,inpB,
inputA.nbSamples(),
&result);
outp[0] = result;
ASSERT_SNR(result,refp[this->refOffset],(float16_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(result,refp[this->refOffset],(float16_t)REL_ERROR);
}
void StatsTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr) void StatsTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
{ {
@ -1032,6 +1053,58 @@ a double precision computation.
refOffset = 2; refOffset = 2;
} }
break; break;
case StatsTestsF16::TEST_MSE_F16_49:
{
inputA.reload(StatsTestsF16::INPUTNEW1_F16_ID,mgr,7);
inputB.reload(StatsTestsF16::INPUTNEW2_F16_ID,mgr,7);
ref.reload(StatsTestsF16::MSE_F16_ID,mgr);
output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
refOffset = 0;
}
break;
case StatsTestsF16::TEST_MSE_F16_50:
{
inputA.reload(StatsTestsF16::INPUTNEW1_F16_ID,mgr,16);
inputB.reload(StatsTestsF16::INPUTNEW2_F16_ID,mgr,16);
ref.reload(StatsTestsF16::MSE_F16_ID,mgr);
output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
refOffset = 1;
}
break;
case StatsTestsF16::TEST_MSE_F16_51:
{
inputA.reload(StatsTestsF16::INPUTNEW1_F16_ID,mgr,23);
inputB.reload(StatsTestsF16::INPUTNEW2_F16_ID,mgr,23);
ref.reload(StatsTestsF16::MSE_F16_ID,mgr);
output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
refOffset = 2;
}
break;
case StatsTestsF16::TEST_MSE_F16_52:
{
inputA.reload(StatsTestsF16::INPUTNEW1_F16_ID,mgr,100);
inputB.reload(StatsTestsF16::INPUTNEW2_F16_ID,mgr,100);
ref.reload(StatsTestsF16::MSE_F16_ID,mgr);
output.create(1,StatsTestsF16::OUT_F16_ID,mgr);
refOffset = 3;
}
break;
} }
} }

@ -436,6 +436,29 @@ a double precision computation.
} }
void StatsTestsF32::test_mse_f32()
{
const float32_t *inpA = inputA.ptr();
const float32_t *inpB = inputB.ptr();
float32_t result;
float32_t *refp = ref.ptr();
float32_t *outp = output.ptr();
arm_mse_f32(inpA,inpB,
inputA.nbSamples(),
&result);
outp[0] = result;
ASSERT_SNR(result,refp[this->refOffset],(float32_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(result,refp[this->refOffset],(float32_t)REL_ERROR);
}
void StatsTestsF32::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr) void StatsTestsF32::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
{ {
@ -1027,6 +1050,58 @@ a double precision computation.
} }
break; break;
case StatsTestsF32::TEST_MSE_F32_49:
{
inputA.reload(StatsTestsF32::INPUTNEW1_F32_ID,mgr,3);
inputB.reload(StatsTestsF32::INPUTNEW2_F32_ID,mgr,3);
ref.reload(StatsTestsF32::MSE_F32_ID,mgr);
output.create(1,StatsTestsF32::OUT_F32_ID,mgr);
refOffset = 0;
}
break;
case StatsTestsF32::TEST_MSE_F32_50:
{
inputA.reload(StatsTestsF32::INPUTNEW1_F32_ID,mgr,8);
inputB.reload(StatsTestsF32::INPUTNEW2_F32_ID,mgr,8);
ref.reload(StatsTestsF32::MSE_F32_ID,mgr);
output.create(1,StatsTestsF32::OUT_F32_ID,mgr);
refOffset = 1;
}
break;
case StatsTestsF32::TEST_MSE_F32_51:
{
inputA.reload(StatsTestsF32::INPUTNEW1_F32_ID,mgr,11);
inputB.reload(StatsTestsF32::INPUTNEW2_F32_ID,mgr,11);
ref.reload(StatsTestsF32::MSE_F32_ID,mgr);
output.create(1,StatsTestsF32::OUT_F32_ID,mgr);
refOffset = 2;
}
break;
case StatsTestsF32::TEST_MSE_F32_52:
{
inputA.reload(StatsTestsF32::INPUTNEW1_F32_ID,mgr,100);
inputB.reload(StatsTestsF32::INPUTNEW2_F32_ID,mgr,100);
ref.reload(StatsTestsF32::MSE_F32_ID,mgr);
output.create(1,StatsTestsF32::OUT_F32_ID,mgr);
refOffset = 3;
}
break;
} }

@ -439,6 +439,29 @@ a double precision computation.
*/ */
void StatsTestsF64::test_mse_f64()
{
const float64_t *inpA = inputA.ptr();
const float64_t *inpB = inputB.ptr();
float64_t result;
float64_t *refp = ref.ptr();
float64_t *outp = output.ptr();
arm_mse_f64(inpA,inpB,
inputA.nbSamples(),
&result);
outp[0] = result;
ASSERT_SNR(result,refp[this->refOffset],(float64_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(result,refp[this->refOffset],(float64_t)REL_ERROR);
}
void StatsTestsF64::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr) void StatsTestsF64::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
{ {
(void)paramsArgs; (void)paramsArgs;
@ -1030,6 +1053,58 @@ a double precision computation.
} }
break; break;
case StatsTestsF64::TEST_MSE_F64_49:
{
inputA.reload(StatsTestsF64::INPUTNEW1_F64_ID,mgr,2);
inputB.reload(StatsTestsF64::INPUTNEW2_F64_ID,mgr,2);
ref.reload(StatsTestsF64::MSE_F64_ID,mgr);
output.create(1,StatsTestsF64::OUT_F64_ID,mgr);
refOffset = 0;
}
break;
case StatsTestsF64::TEST_MSE_F64_50:
{
inputA.reload(StatsTestsF64::INPUTNEW1_F64_ID,mgr,4);
inputB.reload(StatsTestsF64::INPUTNEW2_F64_ID,mgr,4);
ref.reload(StatsTestsF64::MSE_F64_ID,mgr);
output.create(1,StatsTestsF64::OUT_F64_ID,mgr);
refOffset = 1;
}
break;
case StatsTestsF64::TEST_MSE_F64_51:
{
inputA.reload(StatsTestsF64::INPUTNEW1_F64_ID,mgr,5);
inputB.reload(StatsTestsF64::INPUTNEW2_F64_ID,mgr,5);
ref.reload(StatsTestsF64::MSE_F64_ID,mgr);
output.create(1,StatsTestsF64::OUT_F64_ID,mgr);
refOffset = 2;
}
break;
case StatsTestsF64::TEST_MSE_F64_52:
{
inputA.reload(StatsTestsF64::INPUTNEW1_F64_ID,mgr,100);
inputB.reload(StatsTestsF64::INPUTNEW2_F64_ID,mgr,100);
ref.reload(StatsTestsF64::MSE_F64_ID,mgr);
output.create(1,StatsTestsF64::OUT_F64_ID,mgr);
refOffset = 3;
}
break;
} }

@ -6,6 +6,8 @@
//#include <cstdio> //#include <cstdio>
#define SNR_THRESHOLD 50 #define SNR_THRESHOLD 50
#define SNR_THRESHOLD_MSE 50
/* /*
Reference patterns are generated with Reference patterns are generated with
@ -13,6 +15,8 @@ a double precision computation.
*/ */
#define ABS_ERROR_Q15 ((q15_t)100) #define ABS_ERROR_Q15 ((q15_t)100)
#define ABS_ERROR_Q15_MSE ((q15_t)100)
#define ABS_ERROR_Q63 (1<<17) #define ABS_ERROR_Q63 (1<<17)
void StatsTestsQ15::test_max_q15() void StatsTestsQ15::test_max_q15()
@ -310,6 +314,29 @@ a double precision computation.
} }
void StatsTestsQ15::test_mse_q15()
{
const q15_t *inpA = inputA.ptr();
const q15_t *inpB = inputB.ptr();
q15_t result;
q15_t *refp = ref.ptr();
q15_t *outp = output.ptr();
arm_mse_q15(inpA,inpB,
inputA.nbSamples(),
&result);
outp[0] = result;
ASSERT_SNR(result,refp[this->refOffset],(float32_t)SNR_THRESHOLD_MSE);
ASSERT_NEAR_EQ(result,refp[this->refOffset],(q15_t)ABS_ERROR_Q15_MSE);
}
void StatsTestsQ15::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr) void StatsTestsQ15::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
{ {
@ -808,6 +835,58 @@ a double precision computation.
} }
break; break;
case StatsTestsQ15::TEST_MSE_Q15_40:
{
inputA.reload(StatsTestsQ15::INPUTNEW1_Q15_ID,mgr,7);
inputB.reload(StatsTestsQ15::INPUTNEW2_Q15_ID,mgr,7);
ref.reload(StatsTestsQ15::MSE_Q15_ID,mgr);
output.create(1,StatsTestsQ15::OUT_Q15_ID,mgr);
refOffset = 0;
}
break;
case StatsTestsQ15::TEST_MSE_Q15_41:
{
inputA.reload(StatsTestsQ15::INPUTNEW1_Q15_ID,mgr,16);
inputB.reload(StatsTestsQ15::INPUTNEW2_Q15_ID,mgr,16);
ref.reload(StatsTestsQ15::MSE_Q15_ID,mgr);
output.create(1,StatsTestsQ15::OUT_Q15_ID,mgr);
refOffset = 1;
}
break;
case StatsTestsQ15::TEST_MSE_Q15_42:
{
inputA.reload(StatsTestsQ15::INPUTNEW1_Q15_ID,mgr,23);
inputB.reload(StatsTestsQ15::INPUTNEW2_Q15_ID,mgr,23);
ref.reload(StatsTestsQ15::MSE_Q15_ID,mgr);
output.create(1,StatsTestsQ15::OUT_Q15_ID,mgr);
refOffset = 2;
}
break;
case StatsTestsQ15::TEST_MSE_Q15_43:
{
inputA.reload(StatsTestsQ15::INPUTNEW1_Q15_ID,mgr,100);
inputB.reload(StatsTestsQ15::INPUTNEW2_Q15_ID,mgr,100);
ref.reload(StatsTestsQ15::MSE_Q15_ID,mgr);
output.create(1,StatsTestsQ15::OUT_Q15_ID,mgr);
refOffset = 3;
}
break;
} }

@ -6,6 +6,8 @@
//#include <cstdio> //#include <cstdio>
#define SNR_THRESHOLD 100 #define SNR_THRESHOLD 100
#define SNR_THRESHOLD_MSE 100
/* /*
Reference patterns are generated with Reference patterns are generated with
@ -13,6 +15,8 @@ a double precision computation.
*/ */
#define ABS_ERROR_Q31 ((q31_t)(100)) #define ABS_ERROR_Q31 ((q31_t)(100))
#define ABS_ERROR_Q31_MSE ((q31_t)(100))
#define ABS_ERROR_Q63 ((q63_t)(1<<18)) #define ABS_ERROR_Q63 ((q63_t)(1<<18))
void StatsTestsQ31::test_max_q31() void StatsTestsQ31::test_max_q31()
@ -309,6 +313,29 @@ a double precision computation.
} }
void StatsTestsQ31::test_mse_q31()
{
const q31_t *inpA = inputA.ptr();
const q31_t *inpB = inputB.ptr();
q31_t result;
q31_t *refp = ref.ptr();
q31_t *outp = output.ptr();
arm_mse_q31(inpA,inpB,
inputA.nbSamples(),
&result);
outp[0] = result;
ASSERT_SNR(result,refp[this->refOffset],(float32_t)SNR_THRESHOLD_MSE);
ASSERT_NEAR_EQ(result,refp[this->refOffset],(q31_t)ABS_ERROR_Q31_MSE);
}
void StatsTestsQ31::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr) void StatsTestsQ31::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
@ -808,6 +835,58 @@ a double precision computation.
} }
break; break;
case StatsTestsQ31::TEST_MSE_Q31_40:
{
inputA.reload(StatsTestsQ31::INPUTNEW1_Q31_ID,mgr,3);
inputB.reload(StatsTestsQ31::INPUTNEW2_Q31_ID,mgr,3);
ref.reload(StatsTestsQ31::MSE_Q31_ID,mgr);
output.create(1,StatsTestsQ31::OUT_Q31_ID,mgr);
refOffset = 0;
}
break;
case StatsTestsQ31::TEST_MSE_Q31_41:
{
inputA.reload(StatsTestsQ31::INPUTNEW1_Q31_ID,mgr,8);
inputB.reload(StatsTestsQ31::INPUTNEW2_Q31_ID,mgr,8);
ref.reload(StatsTestsQ31::MSE_Q31_ID,mgr);
output.create(1,StatsTestsQ31::OUT_Q31_ID,mgr);
refOffset = 1;
}
break;
case StatsTestsQ31::TEST_MSE_Q31_42:
{
inputA.reload(StatsTestsQ31::INPUTNEW1_Q31_ID,mgr,11);
inputB.reload(StatsTestsQ31::INPUTNEW2_Q31_ID,mgr,11);
ref.reload(StatsTestsQ31::MSE_Q31_ID,mgr);
output.create(1,StatsTestsQ31::OUT_Q31_ID,mgr);
refOffset = 2;
}
break;
case StatsTestsQ31::TEST_MSE_Q31_43:
{
inputA.reload(StatsTestsQ31::INPUTNEW1_Q31_ID,mgr,100);
inputB.reload(StatsTestsQ31::INPUTNEW2_Q31_ID,mgr,100);
ref.reload(StatsTestsQ31::MSE_Q31_ID,mgr);
output.create(1,StatsTestsQ31::OUT_Q31_ID,mgr);
refOffset = 3;
}
break;
} }

@ -6,7 +6,7 @@
//#include <cstdio> //#include <cstdio>
#define SNR_THRESHOLD 20 #define SNR_THRESHOLD 20
#define SNR_THRESHOLD_MSE 14 #define SNR_THRESHOLD_MSE 20
/* /*

@ -16,6 +16,7 @@ group Root {
Pattern INPUT1_F64_ID : Input1_f64.txt Pattern INPUT1_F64_ID : Input1_f64.txt
Pattern INPUTNEW1_F64_ID : InputNew1_f64.txt Pattern INPUTNEW1_F64_ID : InputNew1_f64.txt
Pattern INPUTNEW2_F64_ID : InputNew2_f64.txt
Pattern INPUT2_F64_ID : Input2_f64.txt Pattern INPUT2_F64_ID : Input2_f64.txt
Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt
@ -52,6 +53,8 @@ group Root {
Pattern ABSMININDEXES_S16_ID : AbsMinIndexes27_s16.txt Pattern ABSMININDEXES_S16_ID : AbsMinIndexes27_s16.txt
Pattern ABSMINVALS_F64_ID : AbsMinVals27_f64.txt Pattern ABSMINVALS_F64_ID : AbsMinVals27_f64.txt
Pattern MSE_F64_ID : MSEVals28_f64.txt
Output OUT_F64_ID : Output Output OUT_F64_ID : Output
Output OUT_S16_ID : Index Output OUT_S16_ID : Index
@ -122,6 +125,11 @@ group Root {
Test nb=2n arm_absmin_no_idx_f64:test_absmin_no_idx_f64 Test nb=2n arm_absmin_no_idx_f64:test_absmin_no_idx_f64
Test nb=2n+1 arm_absmin_no_idx_f64:test_absmin_no_idx_f64 Test nb=2n+1 arm_absmin_no_idx_f64:test_absmin_no_idx_f64
Test nb=2 arm_mse_f64:test_mse_f64
Test nb=2n arm_mse_f64:test_mse_f64
Test nb=2n+1 arm_mse_f64:test_mse_f64
Test long arm_mse_f64:test_mse_f64
} }
@ -133,6 +141,7 @@ group Root {
Pattern INPUT1_F32_ID : Input1_f32.txt Pattern INPUT1_F32_ID : Input1_f32.txt
Pattern INPUTNEW1_F32_ID : InputNew1_f32.txt Pattern INPUTNEW1_F32_ID : InputNew1_f32.txt
Pattern INPUTNEW2_F32_ID : InputNew2_f32.txt
Pattern INPUT2_F32_ID : Input2_f32.txt Pattern INPUT2_F32_ID : Input2_f32.txt
Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt
@ -169,6 +178,8 @@ group Root {
Pattern ABSMININDEXES_S16_ID : AbsMinIndexes27_s16.txt Pattern ABSMININDEXES_S16_ID : AbsMinIndexes27_s16.txt
Pattern ABSMINVALS_F32_ID : AbsMinVals27_f32.txt Pattern ABSMINVALS_F32_ID : AbsMinVals27_f32.txt
Pattern MSE_F32_ID : MSEVals28_f32.txt
Output OUT_F32_ID : Output Output OUT_F32_ID : Output
Output OUT_S16_ID : Index Output OUT_S16_ID : Index
@ -231,13 +242,18 @@ group Root {
Test nb=4n arm_min_no_idx_f32:test_min_no_idx_f32 Test nb=4n arm_min_no_idx_f32:test_min_no_idx_f32
Test nb=4n+1 arm_min_no_idx_f32:test_min_no_idx_f32 Test nb=4n+1 arm_min_no_idx_f32:test_min_no_idx_f32
Test nb=2 arm_absmax_no_idx_f32:test_absmax_no_idx_f32 Test nb=3 arm_absmax_no_idx_f32:test_absmax_no_idx_f32
Test nb=2n arm_absmax_no_idx_f32:test_absmax_no_idx_f32 Test nb=4n arm_absmax_no_idx_f32:test_absmax_no_idx_f32
Test nb=2n+1 arm_absmax_no_idx_f32:test_absmax_no_idx_f32 Test nb=4n+1 arm_absmax_no_idx_f32:test_absmax_no_idx_f32
Test nb=3 arm_absmin_no_idx_f32:test_absmin_no_idx_f32
Test nb=4n arm_absmin_no_idx_f32:test_absmin_no_idx_f32
Test nb=4n+1 arm_absmin_no_idx_f32:test_absmin_no_idx_f32
Test nb=2 arm_absmin_no_idx_f32:test_absmin_no_idx_f32 Test nb=3 arm_mse_f32:test_mse_f32
Test nb=2n arm_absmin_no_idx_f32:test_absmin_no_idx_f32 Test nb=4n arm_mse_f32:test_mse_f32
Test nb=2n+1 arm_absmin_no_idx_f32:test_absmin_no_idx_f32 Test nb=4n+1 arm_mse_f32:test_mse_f32
Test long arm_mse_f32:test_mse_f32
} }
@ -252,6 +268,7 @@ group Root {
Pattern INPUT1_Q31_ID : Input1_q31.txt Pattern INPUT1_Q31_ID : Input1_q31.txt
Pattern INPUTNEW1_Q31_ID : InputNew1_q31.txt Pattern INPUTNEW1_Q31_ID : InputNew1_q31.txt
Pattern INPUTNEW2_Q31_ID : InputNew2_q31.txt
Pattern INPUT2_Q31_ID : Input2_q31.txt Pattern INPUT2_Q31_ID : Input2_q31.txt
Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt
@ -270,6 +287,8 @@ group Root {
Pattern ABSMININDEXES_S16_ID : AbsMinIndexes9_s16.txt Pattern ABSMININDEXES_S16_ID : AbsMinIndexes9_s16.txt
Pattern ABSMINVALS_Q31_ID : AbsMinVals9_q31.txt Pattern ABSMINVALS_Q31_ID : AbsMinVals9_q31.txt
Pattern MSE_Q31_ID : MSEVals10_q31.txt
Output OUT_Q31_ID : Output Output OUT_Q31_ID : Output
Output OUT_Q63_ID : Output Output OUT_Q63_ID : Output
Output OUT_S16_ID : Index Output OUT_S16_ID : Index
@ -320,13 +339,18 @@ group Root {
Test nb=4n arm_min_no_idx_q31:test_min_no_idx_q31 Test nb=4n arm_min_no_idx_q31:test_min_no_idx_q31
Test nb=4n+1 arm_min_no_idx_q31:test_min_no_idx_q31 Test nb=4n+1 arm_min_no_idx_q31:test_min_no_idx_q31
Test nb=2 arm_absmax_no_idx_q31:test_absmax_no_idx_q31 Test nb=3 arm_absmax_no_idx_q31:test_absmax_no_idx_q31
Test nb=2n arm_absmax_no_idx_q31:test_absmax_no_idx_q31 Test nb=4n arm_absmax_no_idx_q31:test_absmax_no_idx_q31
Test nb=2n+1 arm_absmax_no_idx_q31:test_absmax_no_idx_q31 Test nb=4n+1 arm_absmax_no_idx_q31:test_absmax_no_idx_q31
Test nb=3 arm_absmin_no_idx_q31:test_absmin_no_idx_q31
Test nb=4n arm_absmin_no_idx_q31:test_absmin_no_idx_q31
Test nb=4n+1 arm_absmin_no_idx_q31:test_absmin_no_idx_q31
Test nb=2 arm_absmin_no_idx_q31:test_absmin_no_idx_q31 Test nb=3 arm_mse_q31:test_mse_q31
Test nb=2n arm_absmin_no_idx_q31:test_absmin_no_idx_q31 Test nb=4n arm_mse_q31:test_mse_q31
Test nb=2n+1 arm_absmin_no_idx_q31:test_absmin_no_idx_q31 Test nb=4n+1 arm_mse_q31:test_mse_q31
Test long arm_mse_q31:test_mse_q31
} }
@ -338,6 +362,7 @@ group Root {
Pattern INPUT1_Q15_ID : Input1_q15.txt Pattern INPUT1_Q15_ID : Input1_q15.txt
Pattern INPUTNEW1_Q15_ID : InputNew1_q15.txt Pattern INPUTNEW1_Q15_ID : InputNew1_q15.txt
Pattern INPUTNEW2_Q15_ID : InputNew2_q15.txt
Pattern INPUT2_Q15_ID : Input2_q15.txt Pattern INPUT2_Q15_ID : Input2_q15.txt
Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt
@ -356,7 +381,7 @@ group Root {
Pattern ABSMININDEXES_S16_ID : AbsMinIndexes9_s16.txt Pattern ABSMININDEXES_S16_ID : AbsMinIndexes9_s16.txt
Pattern ABSMINVALS_Q15_ID : AbsMinVals9_q15.txt Pattern ABSMINVALS_Q15_ID : AbsMinVals9_q15.txt
Pattern MSE_Q15_ID : MSEVals10_q15.txt
Output OUT_Q15_ID : Output Output OUT_Q15_ID : Output
Output OUT_Q63_ID : Output Output OUT_Q63_ID : Output
@ -392,29 +417,34 @@ group Root {
Test nb=8n arm_var_q15:test_var_q15 Test nb=8n arm_var_q15:test_var_q15
Test nb=8n+1 arm_var_q15:test_var_q15 Test nb=8n+1 arm_var_q15:test_var_q15
Test nb=3 arm_absmax_q15:test_absmax_q15 Test nb=7 arm_absmax_q15:test_absmax_q15
Test nb=4n arm_absmax_q15:test_absmax_q15 Test nb=8n arm_absmax_q15:test_absmax_q15
Test nb=4n+1 arm_absmax_q15:test_absmax_q15 Test nb=8n+1 arm_absmax_q15:test_absmax_q15
Test nb=7 arm_absmin_q15:test_absmin_q15
Test nb=8n arm_absmin_q15:test_absmin_q15
Test nb=8n+1 arm_absmin_q15:test_absmin_q15
Test nb=3 arm_absmin_q15:test_absmin_q15 Test nb=7 arm_max_no_idx_q15:test_max_no_idx_q15
Test nb=4n arm_absmin_q15:test_absmin_q15 Test nb=8n arm_max_no_idx_q15:test_max_no_idx_q15
Test nb=4n+1 arm_absmin_q15:test_absmin_q15 Test nb=8n+1 arm_max_no_idx_q15:test_max_no_idx_q15
Test nb=3 arm_max_no_idx_q15:test_max_no_idx_q15 Test nb=7 arm_min_no_idx_q15:test_min_no_idx_q15
Test nb=4n arm_max_no_idx_q15:test_max_no_idx_q15 Test nb=8n arm_min_no_idx_q15:test_min_no_idx_q15
Test nb=4n+1 arm_max_no_idx_q15:test_max_no_idx_q15 Test nb=8n+1 arm_min_no_idx_q15:test_min_no_idx_q15
Test nb=3 arm_min_no_idx_q15:test_min_no_idx_q15 Test nb=7 arm_absmax_no_idx_q15:test_absmax_no_idx_q15
Test nb=4n arm_min_no_idx_q15:test_min_no_idx_q15 Test nb=8n arm_absmax_no_idx_q15:test_absmax_no_idx_q15
Test nb=4n+1 arm_min_no_idx_q15:test_min_no_idx_q15 Test nb=8n+1 arm_absmax_no_idx_q15:test_absmax_no_idx_q15
Test nb=2 arm_absmax_no_idx_q15:test_absmax_no_idx_q15 Test nb=7 arm_absmin_no_idx_q15:test_absmin_no_idx_q15
Test nb=2n arm_absmax_no_idx_q15:test_absmax_no_idx_q15 Test nb=8n arm_absmin_no_idx_q15:test_absmin_no_idx_q15
Test nb=2n+1 arm_absmax_no_idx_q15:test_absmax_no_idx_q15 Test nb=8n+1 arm_absmin_no_idx_q15:test_absmin_no_idx_q15
Test nb=2 arm_absmin_no_idx_q15:test_absmin_no_idx_q15 Test nb=7 arm_mse_q15:test_mse_q15
Test nb=2n arm_absmin_no_idx_q15:test_absmin_no_idx_q15 Test nb=8n arm_mse_q15:test_mse_q15
Test nb=2n+1 arm_absmin_no_idx_q15:test_absmin_no_idx_q15 Test nb=8n+1 arm_mse_q15:test_mse_q15
Test long arm_mse_q15:test_mse_q15
} }
@ -479,32 +509,32 @@ group Root {
Test big index arm_max_q7:test_max_q7 Test big index arm_max_q7:test_max_q7
Test big index arm_min_q7:test_min_q7 Test big index arm_min_q7:test_min_q7
Test nb=3 arm_absmax_q7:test_absmax_q7 Test nb=15 arm_absmax_q7:test_absmax_q7
Test nb=4n arm_absmax_q7:test_absmax_q7 Test nb=16n arm_absmax_q7:test_absmax_q7
Test nb=4n+1 arm_absmax_q7:test_absmax_q7 Test nb=16n+1 arm_absmax_q7:test_absmax_q7
Test nb=3 arm_absmin_q7:test_absmin_q7 Test nb=15 arm_absmin_q7:test_absmin_q7
Test nb=4n arm_absmin_q7:test_absmin_q7 Test nb=16n arm_absmin_q7:test_absmin_q7
Test nb=4n+1 arm_absmin_q7:test_absmin_q7 Test nb=16n+1 arm_absmin_q7:test_absmin_q7
Test big index arm_absmax_q7:test_absmax_q7 Test big index arm_absmax_q7:test_absmax_q7
Test big index arm_absmin_q7:test_absmin_q7 Test big index arm_absmin_q7:test_absmin_q7
Test nb=3 arm_max_no_idx_q7:test_max_no_idx_q7 Test nb=15 arm_max_no_idx_q7:test_max_no_idx_q7
Test nb=4n arm_max_no_idx_q7:test_max_no_idx_q7 Test nb=16n arm_max_no_idx_q7:test_max_no_idx_q7
Test nb=4n+1 arm_max_no_idx_q7:test_max_no_idx_q7 Test nb=16n+1 arm_max_no_idx_q7:test_max_no_idx_q7
Test nb=3 arm_min_no_idx_q7:test_min_no_idx_q7 Test nb=15 arm_min_no_idx_q7:test_min_no_idx_q7
Test nb=4n arm_min_no_idx_q7:test_min_no_idx_q7 Test nb=16n arm_min_no_idx_q7:test_min_no_idx_q7
Test nb=4n+1 arm_min_no_idx_q7:test_min_no_idx_q7 Test nb=16n+1 arm_min_no_idx_q7:test_min_no_idx_q7
Test nb=2 arm_absmax_no_idx_q7:test_absmax_no_idx_q7 Test nb=15 arm_absmax_no_idx_q7:test_absmax_no_idx_q7
Test nb=2n arm_absmax_no_idx_q7:test_absmax_no_idx_q7 Test nb=16n arm_absmax_no_idx_q7:test_absmax_no_idx_q7
Test nb=2n+1 arm_absmax_no_idx_q7:test_absmax_no_idx_q7 Test nb=16n+1 arm_absmax_no_idx_q7:test_absmax_no_idx_q7
Test nb=2 arm_absmin_no_idx_q7:test_absmin_no_idx_q7 Test nb=15 arm_absmin_no_idx_q7:test_absmin_no_idx_q7
Test nb=2n arm_absmin_no_idx_q7:test_absmin_no_idx_q7 Test nb=16n arm_absmin_no_idx_q7:test_absmin_no_idx_q7
Test nb=2n+1 arm_absmin_no_idx_q7:test_absmin_no_idx_q7 Test nb=16n+1 arm_absmin_no_idx_q7:test_absmin_no_idx_q7
Test nb=15 arm_mse_q7:test_mse_q7 Test nb=15 arm_mse_q7:test_mse_q7
Test nb=16n arm_mse_q7:test_mse_q7 Test nb=16n arm_mse_q7:test_mse_q7

@ -15,6 +15,7 @@ group Root {
Pattern INPUT1_F16_ID : Input1_f16.txt Pattern INPUT1_F16_ID : Input1_f16.txt
Pattern INPUTNEW1_F16_ID : InputNew1_f16.txt Pattern INPUTNEW1_F16_ID : InputNew1_f16.txt
Pattern INPUTNEW2_F16_ID : InputNew2_f16.txt
Pattern INPUT2_F16_ID : Input2_f16.txt Pattern INPUT2_F16_ID : Input2_f16.txt
Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt Pattern MAXINDEXES_S16_ID : MaxIndexes1_s16.txt
@ -51,6 +52,8 @@ group Root {
Pattern ABSMININDEXES_S16_ID : AbsMinIndexes27_s16.txt Pattern ABSMININDEXES_S16_ID : AbsMinIndexes27_s16.txt
Pattern ABSMINVALS_F16_ID : AbsMinVals27_f16.txt Pattern ABSMINVALS_F16_ID : AbsMinVals27_f16.txt
Pattern MSE_F16_ID : MSEVals28_f16.txt
Output OUT_F16_ID : Output Output OUT_F16_ID : Output
Output OUT_S16_ID : Index Output OUT_S16_ID : Index
Output TMP_F16_ID : Temp Output TMP_F16_ID : Temp
@ -100,25 +103,30 @@ group Root {
Test stability arm_std_f16:test_std_stability_f16 Test stability arm_std_f16:test_std_stability_f16
Test nb=3 arm_absmax_f16:test_absmax_f16 Test nb=7 arm_absmax_f16:test_absmax_f16
Test nb=4n arm_absmax_f16:test_absmax_f16 Test nb=8n arm_absmax_f16:test_absmax_f16
Test nb=4n+1 arm_absmax_f16:test_absmax_f16 Test nb=8n+1 arm_absmax_f16:test_absmax_f16
Test nb=3 arm_absmin_f16:test_absmin_f16 Test nb=7 arm_absmin_f16:test_absmin_f16
Test nb=4n arm_absmin_f16:test_absmin_f16 Test nb=8n arm_absmin_f16:test_absmin_f16
Test nb=4n+1 arm_absmin_f16:test_absmin_f16 Test nb=8n+1 arm_absmin_f16:test_absmin_f16
Test nb=7 arm_min_no_idx_f16:test_min_no_idx_f16 Test nb=7 arm_min_no_idx_f16:test_min_no_idx_f16
Test nb=8n arm_min_no_idx_f16:test_min_no_idx_f16 Test nb=8n arm_min_no_idx_f16:test_min_no_idx_f16
Test nb=8n+1 arm_min_no_idx_f16:test_min_no_idx_f16 Test nb=8n+1 arm_min_no_idx_f16:test_min_no_idx_f16
Test nb=2 arm_absmax_no_idx_f16:test_absmax_no_idx_f16 Test nb=7 arm_absmax_no_idx_f16:test_absmax_no_idx_f16
Test nb=2n arm_absmax_no_idx_f16:test_absmax_no_idx_f16 Test nb=8n arm_absmax_no_idx_f16:test_absmax_no_idx_f16
Test nb=2n+1 arm_absmax_no_idx_f16:test_absmax_no_idx_f16 Test nb=8n+1 arm_absmax_no_idx_f16:test_absmax_no_idx_f16
Test nb=7 arm_absmin_no_idx_f16:test_absmin_no_idx_f16
Test nb=8n arm_absmin_no_idx_f16:test_absmin_no_idx_f16
Test nb=8n+1 arm_absmin_no_idx_f16:test_absmin_no_idx_f16
Test nb=2 arm_absmin_no_idx_f16:test_absmin_no_idx_f16 Test nb=7 arm_mse_f16:test_mse_f16
Test nb=2n arm_absmin_no_idx_f16:test_absmin_no_idx_f16 Test nb=8n arm_mse_f16:test_mse_f16
Test nb=2n+1 arm_absmin_no_idx_f16:test_absmin_no_idx_f16 Test nb=8n+1 arm_mse_f16:test_mse_f16
Test long arm_mse_f16:test_mse_f16
} }
} }
} }

Loading…
Cancel
Save