CMSIS-DSP: Mean square error for q15, q31, f16, f32, f64.
Reworked q7 to have a bit more accuracy.pull/19/head
parent
47a987217f
commit
8dcdb350a4
@ -0,0 +1,203 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mse_f16.c
|
||||
* Description: Half floating point mean square error
|
||||
*
|
||||
* $Date: 05 April 2022
|
||||
* $Revision: V1.10.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/statistics_functions_f16.h"
|
||||
|
||||
/**
|
||||
@ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup MSE
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Mean square error between two half floating point vectors.
|
||||
@param[in] pSrcA points to the first input vector
|
||||
@param[in] pSrcB points to the second input vector
|
||||
@param[in] blockSize number of samples in input vector
|
||||
@param[out] result mean square error
|
||||
@return none
|
||||
*/
|
||||
|
||||
#if !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#if defined(ARM_MATH_MVE_FLOAT16)
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_mse_f16(
|
||||
const float16_t * pSrcA,
|
||||
const float16_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
float16_t * result)
|
||||
|
||||
{
|
||||
float16x8_t vecA, vecB;
|
||||
float16x8_t vecSum;
|
||||
uint32_t blkCnt;
|
||||
_Float16 sum = 0.0f16;
|
||||
vecSum = vdupq_n_f16(0.0f16);
|
||||
|
||||
blkCnt = (blockSize) >> 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecA = vld1q(pSrcA);
|
||||
pSrcA += 8;
|
||||
|
||||
vecB = vld1q(pSrcB);
|
||||
pSrcB += 8;
|
||||
|
||||
vecA = vsubq(vecA, vecB);
|
||||
|
||||
vecSum = vfmaq(vecSum, vecA, vecA);
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt --;
|
||||
}
|
||||
|
||||
|
||||
blkCnt = (blockSize) & 7;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
|
||||
vecA = vsubq(vecA, vecB);
|
||||
vecSum = vfmaq_m(vecSum, vecA, vecA, p0);
|
||||
}
|
||||
|
||||
sum = vecAddAcrossF16Mve(vecSum);
|
||||
|
||||
/* Store result in destination buffer */
|
||||
*result = (_Float16)sum / (_Float16)blockSize;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/
|
||||
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
#if (!defined(ARM_MATH_MVE_FLOAT16)) || defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
|
||||
|
||||
void arm_mse_f16(
|
||||
const float16_t * pSrcA,
|
||||
const float16_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
float16_t * result)
|
||||
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
_Float16 inA, inB;
|
||||
_Float16 sum = 0.0f16; /* Temporary return variable */
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
blkCnt = (blockSize) >> 3;
|
||||
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = (_Float16)inA - (_Float16)inB;
|
||||
sum += (_Float16)inA * (_Float16)inA;
|
||||
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = (_Float16)inA - (_Float16)inB;
|
||||
sum += (_Float16)inA * (_Float16)inA;
|
||||
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = (_Float16)inA - (_Float16)inB;
|
||||
sum += (_Float16)inA * (_Float16)inA;
|
||||
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = (_Float16)inA - (_Float16)inB;
|
||||
sum += (_Float16)inA * (_Float16)inA;
|
||||
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = (_Float16)inA - (_Float16)inB;
|
||||
sum += (_Float16)inA * (_Float16)inA;
|
||||
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = (_Float16)inA - (_Float16)inB;
|
||||
sum += (_Float16)inA * (_Float16)inA;
|
||||
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = (_Float16)inA - (_Float16)inB;
|
||||
sum += (_Float16)inA * (_Float16)inA;
|
||||
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = (_Float16)inA - (_Float16)inB;
|
||||
sum += (_Float16)inA * (_Float16)inA;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = (blockSize) & 7;
|
||||
#else
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
#endif
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = (_Float16)inA - (_Float16)inB;
|
||||
sum += (_Float16)inA * (_Float16)inA;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store result in destination buffer */
|
||||
*result = (_Float16)sum / (_Float16)blockSize;
|
||||
}
|
||||
|
||||
#endif /* end of test for vector instruction availability */
|
||||
|
||||
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
|
||||
/**
|
||||
@} end of MSE group
|
||||
*/
|
||||
@ -0,0 +1,246 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mse_f32.c
|
||||
* Description: Floating point mean square error
|
||||
*
|
||||
* $Date: 05 April 2022
|
||||
* $Revision: V1.10.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/statistics_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup MSE
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Mean square error between two floating point vectors.
|
||||
@param[in] pSrcA points to the first input vector
|
||||
@param[in] pSrcB points to the second input vector
|
||||
@param[in] blockSize number of samples in input vector
|
||||
@param[out] result mean square error
|
||||
@return none
|
||||
*/
|
||||
|
||||
#if !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#if defined(ARM_MATH_MVEF)
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
void arm_mse_f32(
|
||||
const float32_t * pSrcA,
|
||||
const float32_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
float32_t * result)
|
||||
|
||||
{
|
||||
float32x4_t vecA, vecB;
|
||||
float32x4_t vecSum;
|
||||
uint32_t blkCnt;
|
||||
float32_t sum = 0.0f;
|
||||
vecSum = vdupq_n_f32(0.0f);
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = (blockSize) >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecA = vld1q(pSrcA);
|
||||
pSrcA += 4;
|
||||
|
||||
vecB = vld1q(pSrcB);
|
||||
pSrcB += 4;
|
||||
|
||||
vecA = vsubq(vecA, vecB);
|
||||
|
||||
vecSum = vfmaq(vecSum, vecA, vecA);
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt --;
|
||||
}
|
||||
|
||||
|
||||
blkCnt = (blockSize) & 3;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecA = vld1q(pSrcA);
|
||||
vecB = vld1q(pSrcB);
|
||||
|
||||
vecA = vsubq(vecA, vecB);
|
||||
vecSum = vfmaq_m(vecSum, vecA, vecA, p0);
|
||||
}
|
||||
|
||||
sum = vecAddAcrossF32Mve(vecSum);
|
||||
|
||||
/* Store result in destination buffer */
|
||||
*result = sum / blockSize;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(ARM_MATH_NEON)
|
||||
void arm_mse_f32(
|
||||
const float32_t * pSrcA,
|
||||
const float32_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
float32_t * result)
|
||||
|
||||
{
|
||||
float32x4_t vecA, vecB;
|
||||
float32x4_t vecSum;
|
||||
uint32_t blkCnt;
|
||||
float32_t sum = 0.0f;
|
||||
vecSum = vdupq_n_f32(0.0f);
|
||||
#if !defined(__aarch64__)
|
||||
f32x2_t tmp = vdup_n_f32(0.0f);
|
||||
#endif
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = (blockSize) >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecA = vld1q_f32(pSrcA);
|
||||
pSrcA += 4;
|
||||
|
||||
vecB = vld1q_f32(pSrcB);
|
||||
pSrcB += 4;
|
||||
|
||||
vecA = vsubq_f32(vecA, vecB);
|
||||
|
||||
vecSum = vfmaq_f32(vecSum, vecA, vecA);
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt --;
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
sum = vpadds_f32(vpadd_f32(vget_low_f32(vecSum), vget_high_f32(vecSum)));
|
||||
#else
|
||||
tmp = vpadd_f32(vget_low_f32(vecSum), vget_high_f32(vecSum));
|
||||
sum = vget_lane_f32(tmp, 0) + vget_lane_f32(tmp, 1);
|
||||
|
||||
#endif
|
||||
|
||||
blkCnt = (blockSize) & 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* Calculate dot product and store result in a temporary buffer. */
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = inA - inB;
|
||||
sum += inA * inA;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store result in destination buffer */
|
||||
*result = sum / blockSize;
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/
|
||||
|
||||
|
||||
|
||||
#if (!defined(ARM_MATH_MVEF) && !defined(ARM_MATH_NEON)) || defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
|
||||
void arm_mse_f32(
|
||||
const float32_t * pSrcA,
|
||||
const float32_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
float32_t * result)
|
||||
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
float32_t inA, inB;
|
||||
float32_t sum = 0.0f; /* Temporary return variable */
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = (blockSize) >> 2;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = inA - inB;
|
||||
sum += inA * inA;
|
||||
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = inA - inB;
|
||||
sum += inA * inA;
|
||||
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = inA - inB;
|
||||
sum += inA * inA;
|
||||
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = inA - inB;
|
||||
sum += inA * inA;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = (blockSize) & 3;
|
||||
#else
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
#endif
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = inA - inB;
|
||||
sum += inA * inA;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store result in destination buffer */
|
||||
*result = sum / blockSize;
|
||||
}
|
||||
|
||||
#endif /* end of test for vector instruction availability */
|
||||
|
||||
/**
|
||||
@} end of MSE group
|
||||
*/
|
||||
@ -0,0 +1,110 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mse_f64.c
|
||||
* Description: Double floating point mean square error
|
||||
*
|
||||
* $Date: 05 April 2022
|
||||
* $Revision: V1.10.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/statistics_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupStats
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup MSE
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Mean square error between two double floating point vectors.
|
||||
@param[in] pSrcA points to the first input vector
|
||||
@param[in] pSrcB points to the second input vector
|
||||
@param[in] blockSize number of samples in input vector
|
||||
@param[out] result mean square error
|
||||
@return none
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void arm_mse_f64(
|
||||
const float64_t * pSrcA,
|
||||
const float64_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
float64_t * result)
|
||||
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
float64_t inA, inB;
|
||||
float64_t sum = 0.0; /* Temporary return variable */
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
blkCnt = (blockSize) >> 1;
|
||||
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = inA - inB;
|
||||
sum += inA * inA;
|
||||
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = inA - inB;
|
||||
sum += inA * inA;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = (blockSize) & 1;
|
||||
#else
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
#endif
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
inA = *pSrcA++;
|
||||
inB = *pSrcB++;
|
||||
inA = inA - inB;
|
||||
sum += inA * inA;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store result in destination buffer */
|
||||
*result = sum / blockSize;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@} end of MSE group
|
||||
*/
|
||||
@ -0,0 +1,175 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mse_q15.c
|
||||
* Description: Mean square error between two Q15 vectors
|
||||
*
|
||||
* $Date: 04 April 2022
|
||||
* $Revision: V1.10.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/statistics_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupStats
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup MSE
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Mean square error between two Q15 vectors.
|
||||
@param[in] pSrcA points to the first input vector
|
||||
@param[in] pSrcB points to the second input vector
|
||||
@param[in] blockSize number of samples in input vector
|
||||
@param[out] pResult mean square error
|
||||
@return none
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_mse_q15(
|
||||
const q15_t * pSrcA,
|
||||
const q15_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
q15_t * pResult)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q15x8_t vecSrcA,vecSrcB;
|
||||
q63_t sum = 0LL;
|
||||
|
||||
blkCnt = blockSize >> 3U;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
|
||||
vecSrcA = vshrq(vecSrcA,1);
|
||||
vecSrcB = vshrq(vecSrcB,1);
|
||||
|
||||
vecSrcA = vqsubq(vecSrcA,vecSrcB);
|
||||
/*
|
||||
* sum lanes
|
||||
*/
|
||||
sum = vmlaldavaq(sum, vecSrcA, vecSrcA);
|
||||
|
||||
blkCnt--;
|
||||
pSrcA += 8;
|
||||
pSrcB += 8;
|
||||
}
|
||||
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 7;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
|
||||
vecSrcA = vshrq(vecSrcA,1);
|
||||
vecSrcB = vshrq(vecSrcB,1);
|
||||
|
||||
vecSrcA = vqsubq(vecSrcA,vecSrcB);
|
||||
|
||||
sum = vmlaldavaq_p(sum, vecSrcA, vecSrcA, p0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
*pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
|
||||
}
|
||||
#else
|
||||
void arm_mse_q15(
|
||||
const q15_t * pSrcA,
|
||||
const q15_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
q15_t * pResult)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
q63_t sum = 0; /* Temporary result storage */
|
||||
q15_t inA,inB; /* Temporary variable to store input value */
|
||||
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
inA = *pSrcA++ >> 1;
|
||||
inB = *pSrcB++ >> 1;
|
||||
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
|
||||
sum += (q63_t)((q31_t) inA * inA);
|
||||
|
||||
inA = *pSrcA++ >> 1;
|
||||
inB = *pSrcB++ >> 1;
|
||||
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
|
||||
sum += (q63_t)((q31_t) inA * inA);
|
||||
|
||||
inA = *pSrcA++ >> 1;
|
||||
inB = *pSrcB++ >> 1;
|
||||
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
|
||||
sum += (q63_t)((q31_t) inA * inA);
|
||||
|
||||
inA = *pSrcA++ >> 1;
|
||||
inB = *pSrcB++ >> 1;
|
||||
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
|
||||
sum += (q63_t)((q31_t) inA * inA);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
inA = *pSrcA++ >> 1;
|
||||
inB = *pSrcB++ >> 1;
|
||||
inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
|
||||
sum += (q63_t)((q31_t) inA * inA);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store result in q15 format */
|
||||
*pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of MSE group
|
||||
*/
|
||||
@ -0,0 +1,176 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mse_q31.c
|
||||
* Description: Mean square error between two Q31 vectors
|
||||
*
|
||||
* $Date: 04 April 2022
|
||||
* $Revision: V1.10.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/statistics_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupStats
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup MSE
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Mean square error between two Q31 vectors.
|
||||
@param[in] pSrcA points to the first input vector
|
||||
@param[in] pSrcB points to the second input vector
|
||||
@param[in] blockSize number of samples in input vector
|
||||
@param[out] pResult mean square error
|
||||
@return none
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_mse_q31(
|
||||
const q31_t * pSrcA,
|
||||
const q31_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
q31_t * pResult)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q31x4_t vecSrcA,vecSrcB;
|
||||
q63_t sum = 0LL;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
|
||||
vecSrcA = vshrq(vecSrcA,1);
|
||||
vecSrcB = vshrq(vecSrcB,1);
|
||||
|
||||
|
||||
vecSrcA = vqsubq(vecSrcA,vecSrcB);
|
||||
/*
|
||||
* sum lanes
|
||||
*/
|
||||
sum = vrmlaldavhaq(sum, vecSrcA, vecSrcA);
|
||||
|
||||
blkCnt--;
|
||||
pSrcA += 4;
|
||||
pSrcB += 4;
|
||||
}
|
||||
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 3;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
vecSrcA = vld1q(pSrcA);
|
||||
vecSrcB = vld1q(pSrcB);
|
||||
|
||||
vecSrcA = vshrq(vecSrcA,1);
|
||||
vecSrcB = vshrq(vecSrcB,1);
|
||||
|
||||
vecSrcA = vqsubq(vecSrcA,vecSrcB);
|
||||
|
||||
sum = vrmlaldavhaq_p(sum, vecSrcA, vecSrcA, p0);
|
||||
}
|
||||
|
||||
|
||||
*pResult = (q31_t) ((sum / blockSize)>>21);
|
||||
|
||||
}
|
||||
#else
|
||||
void arm_mse_q31(
|
||||
const q31_t * pSrcA,
|
||||
const q31_t * pSrcB,
|
||||
uint32_t blockSize,
|
||||
q31_t * pResult)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
q63_t sum = 0; /* Temporary result storage */
|
||||
|
||||
q31_t inA32,inB32; /* Temporary variable to store packed input value */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
inA32 = *pSrcA++ >> 1;
|
||||
inB32 = *pSrcB++ >> 1;
|
||||
inA32 = __QSUB(inA32, inB32);
|
||||
sum += ((q63_t) inA32 * inA32) >> 14U;
|
||||
|
||||
inA32 = *pSrcA++ >> 1;
|
||||
inB32 = *pSrcB++ >> 1;
|
||||
inA32 = __QSUB(inA32, inB32);
|
||||
sum += ((q63_t) inA32 * inA32) >> 14U;
|
||||
|
||||
inA32 = *pSrcA++ >> 1;
|
||||
inB32 = *pSrcB++ >> 1;
|
||||
inA32 = __QSUB(inA32, inB32);
|
||||
sum += ((q63_t) inA32 * inA32) >> 14U;
|
||||
|
||||
inA32 = *pSrcA++ >> 1;
|
||||
inB32 = *pSrcB++ >> 1;
|
||||
inA32 = __QSUB(inA32, inB32);
|
||||
sum += ((q63_t) inA32 * inA32) >> 14U;
|
||||
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
inA32 = *pSrcA++ >> 1;
|
||||
inB32 = *pSrcB++ >> 1;
|
||||
inA32 = __QSUB(inA32, inB32);
|
||||
sum += ((q63_t) inA32 * inA32) >> 14U;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Store result in q31 format */
|
||||
*pResult = (q31_t) ((sum / blockSize)>>15);
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of MSE group
|
||||
*/
|
||||
@ -1,8 +1,8 @@
|
||||
H
|
||||
3
|
||||
// 4
|
||||
0x0004
|
||||
// 4
|
||||
0x0004
|
||||
// 4
|
||||
0x0004
|
||||
// 6
|
||||
0x0006
|
||||
// 6
|
||||
0x0006
|
||||
// 18
|
||||
0x0012
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
H
|
||||
3
|
||||
// 0.423138
|
||||
0x36c5
|
||||
// 0.423138
|
||||
0x36c5
|
||||
// 0.423138
|
||||
0x36c5
|
||||
// 0.640755
|
||||
0x3920
|
||||
// 0.640755
|
||||
0x3920
|
||||
// 0.887109
|
||||
0x3b19
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
H
|
||||
3
|
||||
// 0.027578
|
||||
0x270f
|
||||
// 0.007974
|
||||
0x2015
|
||||
// 0.007974
|
||||
0x2015
|
||||
// 0.107198
|
||||
0x2edc
|
||||
// 0.021092
|
||||
0x2566
|
||||
// 0.002011
|
||||
0x181e
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,10 +1,10 @@
|
||||
H
|
||||
4
|
||||
// 0.038705
|
||||
0x28f4
|
||||
// 0.092517
|
||||
0x2dec
|
||||
// 0.106867
|
||||
0x2ed7
|
||||
// 0.225679
|
||||
0x3339
|
||||
// 0.211855
|
||||
0x32c8
|
||||
// 0.182973
|
||||
0x31db
|
||||
// 0.268630
|
||||
0x344c
|
||||
// 0.234421
|
||||
0x3380
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
H
|
||||
3
|
||||
// 1
|
||||
0x0001
|
||||
// 1
|
||||
0x0001
|
||||
// 1
|
||||
0x0001
|
||||
// 0
|
||||
0x0000
|
||||
// 7
|
||||
0x0007
|
||||
// 7
|
||||
0x0007
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
W
|
||||
3
|
||||
// 0.476185
|
||||
0x3ef3ce78
|
||||
// 0.476185
|
||||
0x3ef3ce78
|
||||
// 0.476185
|
||||
0x3ef3ce78
|
||||
// 0.725166
|
||||
0x3f39a47a
|
||||
// 0.817687
|
||||
0x3f5153ed
|
||||
// 0.817687
|
||||
0x3f5153ed
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
H
|
||||
3
|
||||
// 0
|
||||
0x0000
|
||||
// 7
|
||||
0x0007
|
||||
// 7
|
||||
0x0007
|
||||
// 1
|
||||
0x0001
|
||||
// 5
|
||||
0x0005
|
||||
// 9
|
||||
0x0009
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
W
|
||||
3
|
||||
// 0.184919
|
||||
0x3e3d5b69
|
||||
// 0.008792
|
||||
0x3c100d1c
|
||||
// 0.008792
|
||||
0x3c100d1c
|
||||
// 0.198876
|
||||
0x3e4ba63c
|
||||
// 0.035481
|
||||
0x3d1154a3
|
||||
// 0.034200
|
||||
0x3d0c1510
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,10 +1,10 @@
|
||||
W
|
||||
4
|
||||
// 0.125231
|
||||
0x3e003c73
|
||||
// 0.122919
|
||||
0x3dfbbceb
|
||||
// 0.145740
|
||||
0x3e153cd2
|
||||
// 0.189820
|
||||
0x3e426031
|
||||
// 0.072747
|
||||
0x3d94fc3e
|
||||
// 0.176808
|
||||
0x3e350d0d
|
||||
// 0.207669
|
||||
0x3e54a726
|
||||
// 0.183645
|
||||
0x3e3c0d87
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
D
|
||||
3
|
||||
// 0.203055
|
||||
0x3fc9fdb6e0c81ee0
|
||||
// 0.360222
|
||||
0x3fd70de0df777efb
|
||||
// 0.360222
|
||||
0x3fd70de0df777efb
|
||||
// 0.579795
|
||||
0x3fe28dad67519d3d
|
||||
// 0.783610
|
||||
0x3fe91356237f16f6
|
||||
// 0.783610
|
||||
0x3fe91356237f16f6
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
D
|
||||
3
|
||||
// 0.003692
|
||||
0x3f6e3f80ef9e8a83
|
||||
// 0.003692
|
||||
0x3f6e3f80ef9e8a83
|
||||
// 0.003692
|
||||
0x3f6e3f80ef9e8a83
|
||||
// 0.310923
|
||||
0x3fd3e6286ed8195c
|
||||
// 0.310923
|
||||
0x3fd3e6286ed8195c
|
||||
// 0.150640
|
||||
0x3fc34828d25e0053
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,10 +1,10 @@
|
||||
D
|
||||
4
|
||||
// 0.001072
|
||||
0x3f518f8a7ed015a2
|
||||
// 0.073015
|
||||
0x3fb2b11b5caa023a
|
||||
// 0.060567
|
||||
0x3faf02a5beb935ad
|
||||
// 0.198414
|
||||
0x3fc9659ffa60ff3b
|
||||
// 0.221944
|
||||
0x3fcc68ab519cbb08
|
||||
// 0.487606
|
||||
0x3fdf34ef9e2840ea
|
||||
// 0.411797
|
||||
0x3fda5ae1181a5066
|
||||
// 0.186577
|
||||
0x3fc7e1bdbcffc958
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
H
|
||||
3
|
||||
// 4
|
||||
0x0004
|
||||
// 4
|
||||
0x0004
|
||||
// 18
|
||||
0x0012
|
||||
// 5
|
||||
0x0005
|
||||
// 15
|
||||
0x000F
|
||||
// 15
|
||||
0x000F
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
H
|
||||
3
|
||||
// 0.540886
|
||||
0x453C
|
||||
// 0.540886
|
||||
0x453C
|
||||
// 0.701466
|
||||
0x59CA
|
||||
// 0.511444
|
||||
0x4177
|
||||
// 0.572485
|
||||
0x4947
|
||||
// 0.572485
|
||||
0x4947
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
H
|
||||
3
|
||||
// 6
|
||||
0x0006
|
||||
// 6
|
||||
0x0006
|
||||
// 6
|
||||
0x0006
|
||||
// 1
|
||||
0x0001
|
||||
// 1
|
||||
0x0001
|
||||
// 1
|
||||
0x0001
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
H
|
||||
3
|
||||
// 0.003012
|
||||
0x0063
|
||||
// 0.003012
|
||||
0x0063
|
||||
// 0.003012
|
||||
0x0063
|
||||
// 0.065882
|
||||
0x086F
|
||||
// 0.065882
|
||||
0x086F
|
||||
// 0.065882
|
||||
0x086F
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,10 +1,10 @@
|
||||
H
|
||||
4
|
||||
// 0.291384
|
||||
0x254C
|
||||
// 0.326840
|
||||
0x29D6
|
||||
// 0.266990
|
||||
0x222D
|
||||
// 0.278624
|
||||
0x23AA
|
||||
// 0.123046
|
||||
0x0FC0
|
||||
// 0.134261
|
||||
0x112F
|
||||
// 0.135165
|
||||
0x114D
|
||||
// 0.237464
|
||||
0x1E65
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
H
|
||||
3
|
||||
// 1
|
||||
0x0001
|
||||
// 3
|
||||
0x0003
|
||||
// 8
|
||||
0x0008
|
||||
// 2
|
||||
0x0002
|
||||
// 7
|
||||
0x0007
|
||||
// 7
|
||||
0x0007
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
W
|
||||
3
|
||||
// 0.352374
|
||||
0x2D1A96B5
|
||||
// 0.530170
|
||||
0x43DC9BE7
|
||||
// 0.634745
|
||||
0x513F5458
|
||||
// 0.254671
|
||||
0x20990B68
|
||||
// 0.516980
|
||||
0x422C699D
|
||||
// 0.516980
|
||||
0x422C699D
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
W
|
||||
3
|
||||
// 0.132805
|
||||
0x10FFBE95
|
||||
// 0.003898
|
||||
0x007FB95F
|
||||
// 0.003898
|
||||
0x007FB95F
|
||||
// 0.053227
|
||||
0x06D0231F
|
||||
// 0.003305
|
||||
0x006C4DD3
|
||||
// 0.003305
|
||||
0x006C4DD3
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,10 +1,10 @@
|
||||
W
|
||||
4
|
||||
// 0.066580
|
||||
0x0885AD96
|
||||
// 0.089078
|
||||
0x0B66E9B3
|
||||
// 0.168307
|
||||
0x158B15E2
|
||||
// 0.196400
|
||||
0x19239FC7
|
||||
// 0.153783
|
||||
0x13AF2B40
|
||||
// 0.209919
|
||||
0x1ADE9F11
|
||||
// 0.155268
|
||||
0x13DFD01C
|
||||
// 0.248101
|
||||
0x1FC1C512
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
H
|
||||
4
|
||||
// 7
|
||||
0x0007
|
||||
// 7
|
||||
0x0007
|
||||
// 46
|
||||
0x002E
|
||||
// 1
|
||||
0x0001
|
||||
// 25
|
||||
0x0019
|
||||
// 25
|
||||
0x0019
|
||||
// 279
|
||||
0x0117
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
B
|
||||
4
|
||||
// 0.807620
|
||||
// 0.619484
|
||||
0x4F
|
||||
// 0.802797
|
||||
0x67
|
||||
// 0.807620
|
||||
// 0.802797
|
||||
0x67
|
||||
// 0.984827
|
||||
0x7E
|
||||
// 0.900000
|
||||
0x73
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
H
|
||||
4
|
||||
// 13
|
||||
0x000D
|
||||
// 13
|
||||
0x000D
|
||||
// 13
|
||||
0x000D
|
||||
// 8
|
||||
0x0008
|
||||
// 18
|
||||
0x0012
|
||||
// 18
|
||||
0x0012
|
||||
// 279
|
||||
0x0117
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
B
|
||||
4
|
||||
// 0.008109
|
||||
0x01
|
||||
// 0.008109
|
||||
0x01
|
||||
// 0.008109
|
||||
// 0.008779
|
||||
0x01
|
||||
// 0.000193
|
||||
0x00
|
||||
// 0.000193
|
||||
0x00
|
||||
// 0.000000
|
||||
0x00
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,10 +1,10 @@
|
||||
B
|
||||
4
|
||||
// 0.191272
|
||||
0x18
|
||||
// 0.159547
|
||||
0x14
|
||||
// 0.205092
|
||||
0x1A
|
||||
// 0.257902
|
||||
0x21
|
||||
// 0.092336
|
||||
0x0C
|
||||
// 0.121537
|
||||
0x10
|
||||
// 0.162974
|
||||
0x15
|
||||
// 0.148534
|
||||
0x13
|
||||
|
||||
Loading…
Reference in New Issue