CMSIS-DSP: Added complex math f16

pull/19/head
Christophe Favergeon 6 years ago
parent 8b465544a1
commit ebf9104c4e

@ -80,6 +80,8 @@ __STATIC_FORCEINLINE float16_t vecAddAcrossF16Mve(float16x8_t in)
/* newton initial guess */
#define INVSQRT_MAGIC_F32 0x5f3759df
#define INV_NEWTON_INIT_F32 0x7EF127EA
#define INVSQRT_NEWTON_MVE_F32(invSqrt, xHalf, xStart)\
{ \
@ -95,6 +97,74 @@ __STATIC_FORCEINLINE float16_t vecAddAcrossF16Mve(float16x8_t in)
}
#endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) */
/***************************************
Definitions available for f16 datatype with HW acceleration only
***************************************/
#if defined (ARM_MATH_MVE_FLOAT16)
__STATIC_FORCEINLINE float16x8_t __mve_cmplx_sum_intra_vec_f16(
float16x8_t vecIn)
{
float16x8_t vecTmp, vecOut;
uint32_t tmp;
vecTmp = (float16x8_t) vrev64q_s32((int32x4_t) vecIn);
// TO TRACK : using canonical addition leads to unefficient code generation for f16
// vecTmp = vecTmp + vecAccCpx0;
/*
* Compute
* re0+re1 | im0+im1 | re0+re1 | im0+im1
* re2+re3 | im2+im3 | re2+re3 | im2+im3
*/
vecTmp = vaddq(vecTmp, vecIn);
vecOut = vecTmp;
/*
* shift left, random tmp insertion in bottom
*/
vecOut = vreinterpretq_f16_s32(vshlcq_s32(vreinterpretq_s32_f16(vecOut) , &tmp, 32));
/*
* Compute:
* DONTCARE | DONTCARE | re0+re1+re0+re1 |im0+im1+im0+im1
* re0+re1+re2+re3 | im0+im1+im2+im3 | re2+re3+re2+re3 |im2+im3+im2+im3
*/
vecOut = vaddq(vecOut, vecTmp);
/*
* Cmplx sum is in 4rd & 5th f16 elt
* return full vector
*/
return vecOut;
}
#define mve_cmplx_sum_intra_r_i_f16(vec, Re, Im) \
{ \
float16x8_t vecOut = __mve_cmplx_sum_intra_vec_f16(vec); \
Re = vgetq_lane(vecOut, 4); \
Im = vgetq_lane(vecOut, 5); \
}
#define INVSQRT_MAGIC_F16 0x59ba /* ( 0x1ba = 0x3759df >> 13) */
#define INV_NEWTON_INIT_F16 0x7773
/* canonical version of INVSQRT_NEWTON_MVE_F16 leads to bad performance */
#define INVSQRT_NEWTON_MVE_F16(invSqrt, xHalf, xStart) \
{ \
float16x8_t tmp; \
\
/* tmp = xhalf * x * x */ \
tmp = vmulq(xStart, xStart); \
tmp = vmulq(tmp, xHalf); \
/* (1.5f - xhalf * x * x) */ \
tmp = vsubq(vdupq_n_f16((float16_t)1.5), tmp); \
/* x = x*(1.5f-xhalf*x*x); */ \
invSqrt = vmulq(tmp, xStart); \
}
#endif
/***************************************
Definitions available for MVEI only

@ -36,6 +36,7 @@ extern "C"
#include "dsp/none.h"
#include "dsp/utils.h"
#include "dsp/fast_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)

@ -26,12 +26,94 @@
#ifndef _COMPLEX_MATH_FUNCTIONS_F16_H_
#define _COMPLEX_MATH_FUNCTIONS_F16_H_
#include "arm_math_types_f16.h"
#include "arm_math_memory.h"
#include "dsp/none.h"
#include "dsp/utils.h"
#include "dsp/fast_math_functions_f16.h"
#ifdef __cplusplus
extern "C"
{
#endif
#if defined(ARM_FLOAT16_SUPPORTED)
/**
* @brief Floating-point complex conjugate.
* @param[in] pSrc points to the input vector
* @param[out] pDst points to the output vector
* @param[in] numSamples number of complex samples in each vector
*/
void arm_cmplx_conj_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples);
/**
* @brief Floating-point complex magnitude squared
* @param[in] pSrc points to the complex input vector
* @param[out] pDst points to the real output vector
* @param[in] numSamples number of complex samples in the input vector
*/
void arm_cmplx_mag_squared_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples);
/**
* @brief Floating-point complex magnitude
* @param[in] pSrc points to the complex input vector
* @param[out] pDst points to the real output vector
* @param[in] numSamples number of complex samples in the input vector
*/
void arm_cmplx_mag_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples);
/**
* @brief Floating-point complex dot product
* @param[in] pSrcA points to the first input vector
* @param[in] pSrcB points to the second input vector
* @param[in] numSamples number of complex samples in each vector
* @param[out] realResult real part of the result returned here
* @param[out] imagResult imaginary part of the result returned here
*/
void arm_cmplx_dot_prod_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t numSamples,
float16_t * realResult,
float16_t * imagResult);
/**
* @brief Floating-point complex-by-real multiplication
* @param[in] pSrcCmplx points to the complex input vector
* @param[in] pSrcReal points to the real input vector
* @param[out] pCmplxDst points to the complex output vector
* @param[in] numSamples number of samples in each vector
*/
void arm_cmplx_mult_real_f16(
const float16_t * pSrcCmplx,
const float16_t * pSrcReal,
float16_t * pCmplxDst,
uint32_t numSamples);
/**
* @brief Floating-point complex-by-complex multiplication
* @param[in] pSrcA points to the first input vector
* @param[in] pSrcB points to the second input vector
* @param[out] pDst points to the output vector
* @param[in] numSamples number of complex samples in each vector
*/
void arm_cmplx_mult_cmplx_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
float16_t * pDst,
uint32_t numSamples);
#endif /*defined(ARM_FLOAT16_SUPPORTED)*/
#ifdef __cplusplus
}

@ -26,12 +26,46 @@
#ifndef _FAST_MATH_FUNCTIONS_F16_H_
#define _FAST_MATH_FUNCTIONS_F16_H_
#include "arm_math_types_f16.h"
#include "arm_math_memory.h"
#include "dsp/none.h"
#include "dsp/utils.h"
#include "dsp/fast_math_functions.h"
#ifdef __cplusplus
extern "C"
{
#endif
#if defined(ARM_FLOAT16_SUPPORTED)
/**
* @addtogroup SQRT
* @{
*/
/**
@brief Floating-point square root function.
@param[in] in input value
@param[out] pOut square root of input value
@return execution status
- \ref ARM_MATH_SUCCESS : input value is positive
- \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
*/
__STATIC_FORCEINLINE arm_status arm_sqrt_f16(
float16_t in,
float16_t * pOut)
{
float32_t r;
arm_status status;
status=arm_sqrt_f32((float32_t)in,&r);
*pOut=(float16_t)r;
return(status);
}
#endif /*defined(ARM_FLOAT16_SUPPORTED)*/
#ifdef __cplusplus
}

@ -5,8 +5,6 @@ project(CMSISDSPComplexMath)
include(configLib)
include(configDsp)
file(GLOB SRC "./*_*.c")
add_library(CMSISDSPComplexMath STATIC)
configLib(CMSISDSPComplexMath ${ROOT})
@ -56,6 +54,14 @@ target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_f32.c)
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_q15.c)
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_q31.c)
if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_conj_f16.c)
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_dot_prod_f16.c)
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mag_f16.c)
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mag_squared_f16.c)
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_cmplx_f16.c)
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_f16.c)
endif()
### Includes
target_include_directories(CMSISDSPComplexMath PUBLIC "${DSP}/Include")

@ -0,0 +1,32 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: CompexMathFunctionsF16.c
* Description: Combination of all complex math function f16 source files.
*
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_cmplx_conj_f16.c"
#include "arm_cmplx_dot_prod_f16.c"
#include "arm_cmplx_mag_f16.c"
#include "arm_cmplx_mag_squared_f16.c"
#include "arm_cmplx_mult_cmplx_f16.c"
#include "arm_cmplx_mult_real_f16.c"

@ -0,0 +1,183 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_conj_f16.c
* Description: Floating-point complex conjugate
*
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/complex_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupCmplxMath
*/
/**
@defgroup cmplx_conj Complex Conjugate
Conjugates the elements of a complex data vector.
The <code>pSrc</code> points to the source data and
<code>pDst</code> points to the destination data where the result should be written.
<code>numSamples</code> specifies the number of complex samples
and the data in each array is stored in an interleaved fashion
(real, imag, real, imag, ...).
Each array has a total of <code>2*numSamples</code> values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pDst[(2*n) ] = pSrc[(2*n) ]; // real part
pDst[(2*n)+1] = -pSrc[(2*n)+1]; // imag part
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup cmplx_conj
@{
*/
/**
@brief Floating-point complex conjugate.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] numSamples number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_cmplx_conj_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples)
{
static const float16_t cmplx_conj_sign[8] = { 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f };
uint32_t blockSize = numSamples * CMPLX_DIM; /* loop counters */
uint32_t blkCnt;
f16x8_t vecSrc;
f16x8_t vecSign;
/*
* load sign vector
*/
vecSign = *(f16x8_t *) cmplx_conj_sign;
/* Compute 4 real samples at a time */
blkCnt = blockSize >> 3U;
while (blkCnt > 0U)
{
vecSrc = vld1q(pSrc);
vst1q(pDst,vmulq(vecSrc, vecSign));
/*
* Decrement the blkCnt loop counter
* Advance vector source and destination pointers
*/
pSrc += 8;
pDst += 8;
blkCnt--;
}
/* Tail */
blkCnt = (blockSize & 0x7) >> 1;
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
#else
void arm_cmplx_conj_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of cmplx_conj group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -0,0 +1,236 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_dot_prod_f16.c
* Description: Floating-point complex dot product
*
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/complex_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupCmplxMath
*/
/**
@defgroup cmplx_dot_prod Complex Dot Product
Computes the dot product of two complex vectors.
The vectors are multiplied element-by-element and then summed.
The <code>pSrcA</code> points to the first complex input vector and
<code>pSrcB</code> points to the second complex input vector.
<code>numSamples</code> specifies the number of complex samples
and the data in each array is stored in an interleaved fashion
(real, imag, real, imag, ...).
Each array has a total of <code>2*numSamples</code> values.
The underlying algorithm is used:
<pre>
realResult = 0;
imagResult = 0;
for (n = 0; n < numSamples; n++) {
realResult += pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
imagResult += pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup cmplx_dot_prod
@{
*/
/**
@brief Floating-point complex dot product.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] numSamples number of samples in each vector
@param[out] realResult real part of the result returned here
@param[out] imagResult imaginary part of the result returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_cmplx_dot_prod_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t numSamples,
float16_t * realResult,
float16_t * imagResult)
{
uint32_t blockSize = numSamples * CMPLX_DIM; /* loop counters */
uint32_t blkCnt;
float16_t real_sum, imag_sum;
f16x8_t vecSrcA, vecSrcB;
f16x8_t vec_acc = vdupq_n_f16(0.0f);
/* Compute 2 complex samples at a time */
blkCnt = blockSize >> 3U;
while (blkCnt > 0U)
{
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vec_acc = vcmlaq(vec_acc, vecSrcA, vecSrcB);
vec_acc = vcmlaq_rot90(vec_acc, vecSrcA, vecSrcB);
/*
* Decrement the blkCnt loop counter
* Advance vector source and destination pointers
*/
pSrcA += 8;
pSrcB += 8;
blkCnt--;
}
/* Tail */
blkCnt = (blockSize & 7);
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vec_acc = vcmlaq_m(vec_acc, vecSrcA, vecSrcB, p0);
vec_acc = vcmlaq_rot90_m(vec_acc, vecSrcA, vecSrcB, p0);
}
/* Sum the partial parts */
mve_cmplx_sum_intra_r_i_f16(vec_acc, real_sum, imag_sum);
/*
* Store the real and imaginary results in the destination buffers
*/
*realResult = real_sum;
*imagResult = imag_sum;
}
#else
void arm_cmplx_dot_prod_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t numSamples,
float16_t * realResult,
float16_t * imagResult)
{
uint32_t blkCnt; /* Loop counter */
float16_t real_sum = 0.0f, imag_sum = 0.0f; /* Temporary result variables */
float16_t a0,b0,c0,d0;
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
/* Decrement loop counter */
blkCnt--;
}
/* Store real and imaginary result in destination buffer. */
*realResult = real_sum;
*imagResult = imag_sum;
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of cmplx_dot_prod group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -0,0 +1,239 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mag_f16.c
* Description: Floating-point complex magnitude
*
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/complex_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupCmplxMath
*/
/**
@defgroup cmplx_mag Complex Magnitude
Computes the magnitude of the elements of a complex data vector.
The <code>pSrc</code> points to the source data and
<code>pDst</code> points to the where the result should be written.
<code>numSamples</code> specifies the number of complex samples
in the input array and the data is stored in an interleaved fashion
(real, imag, real, imag, ...).
The input array has a total of <code>2*numSamples</code> values;
the output array has a total of <code>numSamples</code> values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pDst[n] = sqrt(pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2);
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup cmplx_mag
@{
*/
/**
@brief Floating-point complex magnitude.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_cmplx_mag_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples)
{
int32_t blockSize = numSamples; /* loop counters */
uint32_t blkCnt; /* loop counters */
f16x8x2_t vecSrc;
f16x8_t sum;
/* Compute 4 complex samples at a time */
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
q15x8_t newtonStartVec;
f16x8_t sumHalf, invSqrt;
vecSrc = vld2q(pSrc);
pSrc += 16;
sum = vmulq(vecSrc.val[0], vecSrc.val[0]);
sum = vfmaq(sum, vecSrc.val[1], vecSrc.val[1]);
/*
* inlined Fast SQRT using inverse SQRT newton-raphson method
*/
/* compute initial value */
newtonStartVec = vdupq_n_s16(INVSQRT_MAGIC_F16) - vshrq((q15x8_t) sum, 1);
sumHalf = sum * 0.5f;
/*
* compute 3 x iterations
*
* The more iterations, the more accuracy.
* If you need to trade a bit of accuracy for more performance,
* you can comment out the 3rd use of the macro.
*/
INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, (f16x8_t) newtonStartVec);
INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, invSqrt);
INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, invSqrt);
/*
* set negative values to 0
*/
invSqrt = vdupq_m(invSqrt, (float16_t)0.0f, vcmpltq(invSqrt, (float16_t)0.0f));
/*
* sqrt(x) = x * invSqrt(x)
*/
sum = vmulq(sum, invSqrt);
vstrhq_f16(pDst, sum);
pDst += 8;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
q15x8_t newtonStartVec;
f16x8_t sumHalf, invSqrt;
vecSrc = vld2q((float16_t const *)pSrc);
sum = vmulq(vecSrc.val[0], vecSrc.val[0]);
sum = vfmaq(sum, vecSrc.val[1], vecSrc.val[1]);
/*
* inlined Fast SQRT using inverse SQRT newton-raphson method
*/
/* compute initial value */
newtonStartVec = vdupq_n_s16(INVSQRT_MAGIC_F16) - vshrq((q15x8_t) sum, 1);
sumHalf = vmulq(sum, (float16_t)0.5);
/*
* compute 2 x iterations
*/
INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, (f16x8_t) newtonStartVec);
INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, invSqrt);
/*
* set negative values to 0
*/
invSqrt = vdupq_m(invSqrt, (float16_t)0.0, vcmpltq(invSqrt, (float16_t)0.0));
/*
* sqrt(x) = x * invSqrt(x)
*/
sum = vmulq(sum, invSqrt);
vstrhq_p_f16(pDst, sum, p0);
}
}
#else
void arm_cmplx_mag_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* loop counter */
float16_t real, imag; /* Temporary variables to hold input values */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
/* store result in destination buffer. */
arm_sqrt_f16((real * real) + (imag * imag), pDst++);
real = *pSrc++;
imag = *pSrc++;
arm_sqrt_f16((real * real) + (imag * imag), pDst++);
real = *pSrc++;
imag = *pSrc++;
arm_sqrt_f16((real * real) + (imag * imag), pDst++);
real = *pSrc++;
imag = *pSrc++;
arm_sqrt_f16((real * real) + (imag * imag), pDst++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
/* store result in destination buffer. */
arm_sqrt_f16((real * real) + (imag * imag), pDst++);
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of cmplx_mag group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -0,0 +1,172 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mag_squared_f16.c
* Description: Floating-point complex magnitude squared
*
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/complex_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupCmplxMath
*/
/**
@defgroup cmplx_mag_squared Complex Magnitude Squared
Computes the magnitude squared of the elements of a complex data vector.
The <code>pSrc</code> points to the source data and
<code>pDst</code> points to the where the result should be written.
<code>numSamples</code> specifies the number of complex samples
in the input array and the data is stored in an interleaved fashion
(real, imag, real, imag, ...).
The input array has a total of <code>2*numSamples</code> values;
the output array has a total of <code>numSamples</code> values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup cmplx_mag_squared
@{
*/
/**
@brief Floating-point complex magnitude squared.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_cmplx_mag_squared_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples)
{
int32_t blockSize = numSamples; /* loop counters */
f16x8x2_t vecSrc;
f16x8_t sum;
/* Compute 4 complex samples at a time */
while (blockSize > 0)
{
mve_pred16_t p = vctp16q(blockSize);
vecSrc = vld2q(pSrc);
sum = vmulq_m(vuninitializedq_f16(),vecSrc.val[0], vecSrc.val[0],p);
sum = vfmaq_m(sum, vecSrc.val[1], vecSrc.val[1],p);
vstrhq_p_f16(pDst, sum,p);
pSrc += 16;
pDst += 8;
/*
* Decrement the blockSize loop counter
*/
blockSize-= 8;
}
}
#else
void arm_cmplx_mag_squared_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
float16_t real, imag; /* Temporary input variables */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
*pDst++ = (real * real) + (imag * imag);
real = *pSrc++;
imag = *pSrc++;
*pDst++ = (real * real) + (imag * imag);
real = *pSrc++;
imag = *pSrc++;
*pDst++ = (real * real) + (imag * imag);
real = *pSrc++;
imag = *pSrc++;
*pDst++ = (real * real) + (imag * imag);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
/* store result in destination buffer. */
*pDst++ = (real * real) + (imag * imag);
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of cmplx_mag_squared group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -0,0 +1,217 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mult_cmplx_f16.c
* Description: Floating-point complex-by-complex multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/complex_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupCmplxMath
*/
/**
@defgroup CmplxByCmplxMult Complex-by-Complex Multiplication
Multiplies a complex vector by another complex vector and generates a complex result.
The data in the complex arrays is stored in an interleaved fashion
(real, imag, real, imag, ...).
The parameter <code>numSamples</code> represents the number of complex
samples processed. The complex arrays have a total of <code>2*numSamples</code>
real values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pDst[(2*n)+0] = pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
pDst[(2*n)+1] = pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup CmplxByCmplxMult
@{
*/
/**
@brief Floating-point complex-by-complex multiplication.
@param[in] pSrcA points to first input vector
@param[in] pSrcB points to second input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_cmplx_mult_cmplx_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
float16_t * pDst,
uint32_t numSamples)
{
int32_t blkCnt; /* loop counters */
int32_t blockSize = numSamples;
f16x8_t vecA;
f16x8_t vecB;
f16x8_t vecDst;
blkCnt = blockSize * CMPLX_DIM;
blkCnt = blkCnt >> 3;
while (blkCnt > 0)
{
vecA = vldrhq_f16(pSrcA);
vecB = vldrhq_f16(pSrcB);
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
vecDst = vcmulq(vecA, vecB);
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
vecDst = vcmlaq_rot90(vecDst, vecA, vecB);
vstrhq_f16(pDst, vecDst);
blkCnt--;
pSrcA += 8;
pSrcB += 8;
pDst += 8;
}
float16_t a, b, c, d; /* Temporary variables to store real and imaginary values */
/* Tail */
blkCnt = (blockSize & 7) >> 1;
while (blkCnt > 0)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in destination buffer. */
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
/* Decrement loop counter */
blkCnt--;
}
}
#else
void arm_cmplx_mult_cmplx_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
float16_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
float16_t a, b, c, d; /* Temporary variables to store real and imaginary values */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in destination buffer. */
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in destination buffer. */
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of CmplxByCmplxMult group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -0,0 +1,192 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mult_real_f16.c
* Description: Floating-point complex by real multiplication
*
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/complex_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupCmplxMath
*/
/**
@defgroup CmplxByRealMult Complex-by-Real Multiplication
Multiplies a complex vector by a real vector and generates a complex result.
The data in the complex arrays is stored in an interleaved fashion
(real, imag, real, imag, ...).
The parameter <code>numSamples</code> represents the number of complex
samples processed. The complex arrays have a total of <code>2*numSamples</code>
real values while the real array has a total of <code>numSamples</code>
real values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n];
pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n];
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup CmplxByRealMult
@{
*/
/**
@brief Floating-point complex-by-real multiplication.
@param[in] pSrcCmplx points to complex input vector
@param[in] pSrcReal points to real input vector
@param[out] pCmplxDst points to complex output vector
@param[in] numSamples number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_cmplx_mult_real_f16(
const float16_t * pSrcCmplx,
const float16_t * pSrcReal,
float16_t * pCmplxDst,
uint32_t numSamples)
{
const static uint16_t stride_cmplx_x_real_16[8] = {
0, 0, 1, 1, 2, 2, 3, 3
};
uint32_t blockSizeC = numSamples * CMPLX_DIM; /* loop counters */
uint32_t blkCnt;
f16x8_t rVec;
f16x8_t cmplxVec;
f16x8_t dstVec;
uint16x8_t strideVec;
/* stride vector for pairs of real generation */
strideVec = vld1q(stride_cmplx_x_real_16);
/* Compute 4 complex outputs at a time */
blkCnt = blockSizeC >> 3;
while (blkCnt > 0U)
{
cmplxVec = vld1q(pSrcCmplx);
rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec);
dstVec = vmulq(cmplxVec, rVec);
vst1q(pCmplxDst, dstVec);
pSrcReal += 4;
pSrcCmplx += 8;
pCmplxDst += 8;
blkCnt--;
}
blkCnt = blockSizeC & 7;
if (blkCnt > 0U) {
mve_pred16_t p0 = vctp16q(blkCnt);
cmplxVec = vld1q(pSrcCmplx);
rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec);
dstVec = vmulq(cmplxVec, rVec);
vstrhq_p_f16(pCmplxDst, dstVec, p0);
}
}
#else
void arm_cmplx_mult_real_f16(
const float16_t * pSrcCmplx,
const float16_t * pSrcReal,
float16_t * pCmplxDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
float16_t in; /* Temporary variable */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i ] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
in = *pSrcReal++;
/* store result in destination buffer. */
*pCmplxDst++ = *pSrcCmplx++ * in;
*pCmplxDst++ = *pSrcCmplx++ * in;
in = *pSrcReal++;
*pCmplxDst++ = *pSrcCmplx++ * in;
*pCmplxDst++ = *pSrcCmplx++ * in;
in = *pSrcReal++;
*pCmplxDst++ = *pSrcCmplx++ * in;
*pCmplxDst++ = *pSrcCmplx++ * in;
in = *pSrcReal++;
*pCmplxDst++ = *pSrcCmplx++* in;
*pCmplxDst++ = *pSrcCmplx++ * in;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i ] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
in = *pSrcReal++;
/* store result in destination buffer. */
*pCmplxDst++ = *pSrcCmplx++ * in;
*pCmplxDst++ = *pSrcCmplx++ * in;
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of CmplxByRealMult group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -327,6 +327,7 @@ set(TESTSRC
if ((NOT ARMAC5) AND (FLOAT16TESTS) AND ((FLOAT16) OR (MVEF) OR (HELIUM) OR (NEON) OR (NEONEXPERIMENTAL)))
set(TESTSRC16
Source/Tests/BasicTestsF16.cpp
Source/Tests/ComplexTestsF16.cpp
Source/Tests/TransformCF16.cpp
Source/Tests/TransformRF16.cpp
)

@ -0,0 +1,21 @@
#include "Test.h"
#include "Pattern.h"
#include "dsp/complex_math_functions_f16.h"
class ComplexTestsF16:public Client::Suite
{
public:
ComplexTestsF16(Testing::testID_t id);
virtual void setUp(Testing::testID_t,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr);
virtual void tearDown(Testing::testID_t,Client::PatternMgr *mgr);
private:
#include "ComplexTestsF16_decl.h"
Client::Pattern<float16_t> input1;
Client::Pattern<float16_t> input2;
Client::LocalPattern<float16_t> output;
// Reference patterns are not loaded when we are in dump mode
Client::RefPattern<float16_t> ref;
};

@ -105,11 +105,13 @@ def generatePatterns():
PARAMDIR = os.path.join("Parameters","DSP","ComplexMaths","ComplexMaths")
configf32=Tools.Config(PATTERNDIR,PARAMDIR,"f32")
configf16=Tools.Config(PATTERNDIR,PARAMDIR,"f16")
configq31=Tools.Config(PATTERNDIR,PARAMDIR,"q31")
configq15=Tools.Config(PATTERNDIR,PARAMDIR,"q15")
writeTests(configf32,0)
writeTests(configf16,16)
writeTests(configq31,31)
writeTests(configq15,15)

@ -0,0 +1,514 @@
H
256
// 0.606399
0x38da
// 0.073125
0x2cae
// -0.140834
0xb082
// -0.900755
0xbb35
// 0.148560
0x30c1
// -0.261878
0xb431
// -0.545170
0xb85d
// -0.628326
0xb907
// 0.495511
0x37ee
// 0.063174
0x2c0b
// -0.169679
0xb16e
// 0.145953
0x30ac
// 0.496673
0x37f2
// 1.000000
0x3c00
// 0.131807
0x3038
// 0.036478
0x28ab
// 0.427452
0x36d7
// -0.526852
0xb837
// -0.153413
0xb0e9
// -0.180253
0xb1c5
// 0.192358
0x3228
// 0.534224
0x3846
// -0.143569
0xb098
// -0.378102
0xb60d
// -0.387182
0xb632
// -0.181926
0xb1d2
// 0.627082
0x3904
// -0.782546
0xba43
// 0.227872
0x334b
// -0.920057
0xbb5c
// 0.028790
0x275f
// -0.002111
0x9853
// -0.448033
0xb72b
// 0.259638
0x3427
// -0.284893
0xb48f
// -0.172468
0xb185
// 0.304282
0x34de
// 0.130491
0x302d
// -0.206776
0xb29e
// 0.384068
0x3625
// 0.208473
0x32ac
// -0.097562
0xae3e
// 0.121821
0x2fcc
// -0.611990
0xb8e5
// -0.062335
0xabfb
// 0.614710
0x38eb
// 0.438926
0x3706
// 0.195912
0x3245
// 0.081082
0x2d30
// 0.012720
0x2283
// 0.258657
0x3423
// -0.268801
0xb44d
// -0.183050
0xb1dc
// 0.087615
0x2d9b
// -0.427434
0xb6d7
// 0.301905
0x34d5
// -0.236141
0xb38e
// 0.188230
0x3206
// -0.089373
0xadb8
// -0.294443
0xb4b6
// 0.506253
0x380d
// 0.841140
0x3abb
// 0.104046
0x2ea9
// 0.285712
0x3492
// -0.725796
0xb9ce
// 0.649891
0x3933
// 0.663543
0x394f
// -0.062934
0xac07
// -0.387710
0xb634
// 0.531890
0x3841
// 0.277675
0x3471
// 0.319026
0x351b
// 0.072518
0x2ca4
// 0.637432
0x3919
// -0.316837
0xb512
// 0.031885
0x2815
// -0.109017
0xaefa
// -0.531561
0xb841
// -0.116513
0xaf75
// 0.005209
0x1d56
// -0.178215
0xb1b4
// 0.096452
0x2e2c
// -0.089155
0xadb5
// -0.193966
0xb235
// -0.318435
0xb518
// -0.137340
0xb065
// 0.325371
0x3535
// -0.413025
0xb69c
// -0.007530
0x9fb6
// -0.011499
0xa1e3
// -0.359144
0xb5bf
// -0.317911
0xb516
// 0.018568
0x24c1
// -0.366742
0xb5de
// 0.221775
0x3319
// 0.420017
0x36b8
// -0.040939
0xa93d
// -0.362740
0xb5ce
// 0.471219
0x378a
// -0.099621
0xae60
// 0.326052
0x3538
// 0.311047
0x34fa
// 0.156421
0x3101
// -0.068640
0xac65
// -0.095922
0xae24
// -0.013959
0xa326
// 0.607825
0x38dd
// 0.093427
0x2dfb
// 0.329927
0x3547
// 0.084851
0x2d6e
// 0.726006
0x39cf
// 0.248801
0x33f6
// 0.620044
0x38f6
// 0.164699
0x3145
// 0.045769
0x29dc
// 0.264445
0x343b
// -0.238087
0xb39e
// 0.220883
0x3311
// 0.018551
0x24c0
// -0.057134
0xab50
// -0.155357
0xb0f9
// 0.038525
0x28ee
// 0.208492
0x32ac
// -0.098819
0xae53
// 0.027140
0x26f3
// -0.333135
0xb555
// -0.423594
0xb6c7
// 0.521231
0x382b
// 0.406043
0x367f
// -0.457325
0xb751
// 0.089960
0x2dc2
// -0.107212
0xaedd
// 0.089652
0x2dbd
// -0.269460
0xb450
// 0.155036
0x30f6
// 0.024048
0x2628
// 0.220735
0x3310
// 0.032031
0x281a
// -0.567049
0xb889
// 0.145897
0x30ab
// -0.094783
0xae11
// 0.319032
0x351b
// -0.091891
0xade2
// 0.416962
0x36ac
// 0.093970
0x2e04
// 0.564895
0x3885
// -0.296964
0xb4c0
// -0.209322
0xb2b3
// 0.265009
0x343d
// 0.093215
0x2df7
// 0.622832
0x38fc
// -0.085788
0xad7e
// 0.670554
0x395d
// 0.032468
0x2828
// 0.118023
0x2f8e
// -0.269207
0xb44f
// 0.217617
0x32f7
// 0.213691
0x32d7
// 0.439040
0x3706
// 0.241885
0x33be
// -0.424515
0xb6cb
// 0.352380
0x35a3
// 0.588583
0x38b5
// -0.264797
0xb43d
// 0.329184
0x3544
// 0.034001
0x285a
// -0.423064
0xb6c5
// -0.608316
0xb8de
// -0.338928
0xb56c
// 0.419995
0x36b8
// 0.200555
0x326b
// 0.329638
0x3546
// -0.294240
0xb4b5
// -0.897858
0xbb2f
// 0.160219
0x3121
// 0.131756
0x3037
// 0.206411
0x329b
// 0.109237
0x2efe
// -0.367268
0xb5e0
// 0.292430
0x34ae
// -0.414400
0xb6a1
// -0.642448
0xb924
// 0.238399
0x33a1
// 0.090387
0x2dc9
// -0.512754
0xb81a
// 0.301373
0x34d2
// -0.466867
0xb778
// 0.204287
0x328a
// -0.229499
0xb358
// -0.119896
0xafac
// 0.440248
0x370b
// 0.649995
0x3933
// 0.129477
0x3025
// 0.241037
0x33b7
// -0.411964
0xb697
// 0.228133
0x334d
// 0.942283
0x3b8a
// -0.390976
0xb641
// 0.182779
0x31d9
// 0.228995
0x3354
// 0.126382
0x300b
// 0.225140
0x3334
// -0.214251
0xb2db
// 0.439711
0x3709
// -0.638072
0xb91b
// -0.667301
0xb957
// -0.353387
0xb5a7
// 0.329438
0x3545
// -0.543036
0xb858
// -0.195706
0xb243
// -0.000314
0x8d26
// -0.346311
0xb58a
// -0.040030
0xa920
// 0.309919
0x34f5
// 0.214685
0x32df
// -0.256227
0xb41a
// 0.256241
0x341a
// 0.423187
0x36c5
// -0.070894
0xac8a
// -0.408192
0xb688
// 0.258732
0x3424
// 0.743039
0x39f2
// -0.328534
0xb542
// -0.502412
0xb805
// -0.550943
0xb868
// 0.461636
0x3763
// -0.098335
0xae4b
// -0.331961
0xb550
// 0.502005
0x3804
// -0.060550
0xabc0
// -0.218616
0xb2ff
// 0.206607
0x329d
// 0.509390
0x3813
// 0.331278
0x354d
// -0.143708
0xb099
// 0.008236
0x2038
// -0.256486
0xb41b
// -0.154828
0xb0f4
// -0.606731
0xb8db
// 0.043363
0x298d
// 0.416313
0x36a9
// 0.132691
0x303f
// 0.716789
0x39bc
// 0.827380
0x3a9e
// 0.109746
0x2f06
// 0.480993
0x37b2
// -0.424777
0xb6cc
// -0.169704
0xb16e
// -0.095902
0xae23
// 0.022081
0x25a7
// -0.227175
0xb345
// 0.382023
0x361d
// 0.316215
0x350f
// -0.027787
0xa71d
// 0.107868
0x2ee7
// -0.091834
0xade1

@ -0,0 +1,6 @@
H
2
// -0.584459
0xb8ad
// 0.027514
0x270b

@ -0,0 +1,6 @@
H
2
// -1.063155
0xbc41
// 0.204536
0x328c

@ -0,0 +1,6 @@
H
2
// -2.020148
0xc00a
// -0.083691
0xad5b

@ -0,0 +1,514 @@
H
256
// 0.211657
0x32c6
// 0.164180
0x3141
// 0.394092
0x364e
// 0.837031
0x3ab2
// 0.203042
0x327f
// 0.594290
0x38c1
// 0.425438
0x36cf
// 0.692208
0x398a
// 0.478904
0x37aa
// 0.731179
0x39d9
// 0.228972
0x3354
// 0.423292
0x36c6
// 0.687425
0x3980
// 0.637676
0x391a
// 0.400567
0x3669
// 0.284900
0x348f
// 0.800826
0x3a68
// 0.530918
0x383f
// 0.899030
0x3b31
// 0.067538
0x2c53
// 0.418694
0x36b3
// 0.422016
0x36c1
// 0.600117
0x38cd
// 0.096654
0x2e30
// 0.382849
0x3620
// 0.553403
0x386d
// 0.109789
0x2f07
// 0.740600
0x39ed
// 0.830944
0x3aa6
// 0.426594
0x36d3
// 0.281352
0x3480
// 0.270642
0x3455
// 0.354207
0x35ab
// 1.046783
0x3c30
// 0.937881
0x3b81
// 0.476101
0x379e
// 0.369946
0x35eb
// 0.334837
0x355b
// 0.319491
0x351d
// 0.119662
0x2fa9
// 0.664848
0x3952
// 0.641369
0x3922
// 0.414986
0x36a4
// 0.824391
0x3a98
// 0.404576
0x3679
// 0.283363
0x3489
// 0.467747
0x377c
// 0.467828
0x377c
// 0.140330
0x307e
// 0.402280
0x3670
// 1.003391
0x3c03
// 0.144475
0x30a0
// 0.461048
0x3760
// 0.468613
0x377f
// 0.609266
0x38e0
// 0.560596
0x387c
// 0.410914
0x3693
// 0.196930
0x324d
// 0.256623
0x341b
// 0.435163
0x36f6
// 0.324492
0x3531
// 0.371618
0x35f2
// 0.218667
0x32ff
// 0.324592
0x3532
// 0.095539
0x2e1d
// 0.268452
0x344c
// 0.399025
0x3662
// 0.410560
0x3692
// 0.535278
0x3848
// 0.300637
0x34cf
// 0.413966
0x36a0
// 0.263911
0x3439
// 0.227363
0x3347
// 0.245900
0x33de
// 0.444928
0x371e
// 0.486777
0x37ca
// 0.388128
0x3636
// 0.303929
0x34dd
// 0.320661
0x3521
// 0.225175
0x3335
// 0.817364
0x3a8a
// 0.479373
0x37ac
// 0.857010
0x3adb
// 0.356348
0x35b4
// 0.134285
0x304c
// 0.324278
0x3530
// 0.568790
0x388d
// 0.302070
0x34d5
// 0.224479
0x332f
// 0.394179
0x364f
// 0.577446
0x389f
// 0.250274
0x3401
// 0.531677
0x3841
// 0.424735
0x36cc
// 0.653466
0x393a
// 0.316197
0x350f
// 0.227789
0x334a
// 0.184250
0x31e5
// 0.090642
0x2dcd
// 0.462727
0x3767
// 0.344118
0x3582
// 0.245310
0x33da
// 0.100303
0x2e6b
// 0.447675
0x372a
// 0.691568
0x3988
// 0.363492
0x35d1
// 0.459596
0x375b
// 0.559206
0x3879
// 0.119232
0x2fa1
// 0.691377
0x3988
// 0.423896
0x36c8
// 0.092387
0x2dea
// 0.016503
0x243a
// 0.389855
0x363d
// 0.310975
0x34fa
// 0.508563
0x3812
// 0.446708
0x3726
// 0.292022
0x34ac
// 0.280445
0x347d
// 0.371688
0x35f2
// 0.231185
0x3366
// 0.468586
0x377f
// 0.037421
0x28ca
// 0.483864
0x37be
// 0.434319
0x36f3
// 0.164206
0x3141
// 0.584744
0x38ae
// 0.264999
0x343d
// 0.371643
0x35f2
// 0.662050
0x394c
// 0.566702
0x3889
// 0.496748
0x37f3
// 0.529369
0x383c
// 0.397883
0x365e
// 0.503458
0x3807
// 0.154150
0x30ef
// 0.435808
0x36f9
// 0.939636
0x3b84
// 0.347287
0x358e
// 0.202447
0x327a
// 0.834662
0x3aad
// 0.522830
0x382f
// 0.539743
0x3851
// 0.390797
0x3641
// 0.269937
0x3452
// 0.528511
0x383a
// 0.142120
0x308c
// 0.507329
0x380f
// 0.206810
0x329e
// 0.793935
0x3a5a
// 0.835657
0x3aaf
// 0.670280
0x395d
// 0.113984
0x2f4c
// 0.472458
0x378f
// 0.575082
0x389a
// 0.732740
0x39dd
// 0.591795
0x38bc
// 0.546775
0x3860
// 0.296527
0x34bf
// 0.261418
0x342f
// 0.571263
0x3892
// 0.589192
0x38b7
// 0.822382
0x3a94
// 0.361338
0x35c8
// 0.153735
0x30eb
// 0.466681
0x3778
// 0.393489
0x364c
// 0.721908
0x39c6
// 0.086062
0x2d82
// 0.331373
0x354d
// 0.621595
0x38f9
// 0.516218
0x3821
// 0.771446
0x3a2c
// 0.420211
0x36b9
// 0.422246
0x36c2
// 0.338301
0x356a
// 0.745863
0x39f8
// 0.650429
0x3934
// 0.550667
0x3868
// 0.412579
0x369a
// 0.288576
0x349e
// 0.353537
0x35a8
// 0.225252
0x3335
// 0.194284
0x3238
// 0.547930
0x3862
// 0.300970
0x34d1
// 0.504146
0x3808
// 1.026896
0x3c1c
// 0.418343
0x36b2
// 0.627205
0x3905
// 0.230780
0x3363
// 0.145178
0x30a5
// 0.384564
0x3627
// 0.278455
0x3475
// 0.341747
0x3578
// 0.448953
0x372f
// 0.233506
0x3379
// 0.229044
0x3354
// 0.261151
0x342e
// 0.582315
0x38a9
// 0.214435
0x32dd
// 0.195282
0x3240
// 0.422011
0x36c1
// 1.204601
0x3cd2
// 0.681809
0x3974
// 0.108645
0x2ef4
// 0.054477
0x2af9
// 0.269794
0x3451
// 0.138462
0x306e
// 0.381523
0x361b
// 0.225135
0x3334
// 0.204132
0x3288
// 0.285641
0x3492
// 0.521935
0x382d
// 0.670786
0x395e
// 0.601863
0x38d1
// 0.117244
0x2f81
// 0.804322
0x3a6f
// 0.522114
0x382d
// 0.456285
0x374d
// 0.325889
0x3537
// 0.052211
0x2aaf
// 0.471914
0x378d
// 0.620330
0x38f6
// 0.183729
0x31e1
// 0.612658
0x38e7
// 0.364133
0x35d3
// 0.520880
0x382b
// 0.693829
0x398d
// 0.146768
0x30b2
// 0.350727
0x359d
// 0.572416
0x3894
// 0.145950
0x30ac
// 0.659216
0x3946
// 0.232441
0x3370
// 0.365908
0x35db
// 0.533640
0x3845
// 0.578456
0x38a1
// 0.166925
0x3157
// 0.439703
0x3709
// 0.427486
0x36d7
// 0.461784
0x3763
// 0.292177
0x34ad
// 0.461312
0x3762
// 0.543533
0x3859
// 0.435610
0x36f8
// 0.237356
0x3398
// 0.366056
0x35db
// 0.196488
0x324a
// 0.022935
0x25df
// 0.359284
0x35c0
// 0.409479
0x368d
// 0.228371
0x334f
// 0.124286
0x2ff4
// 0.434761
0x36f5
// 0.629995
0x390a

@ -0,0 +1,514 @@
H
256
// 0.044799
0x29bc
// 0.026955
0x26e7
// 0.155308
0x30f8
// 0.700622
0x399b
// 0.041226
0x2947
// 0.353181
0x35a7
// 0.180997
0x31cb
// 0.479153
0x37ab
// 0.229349
0x3357
// 0.534623
0x3847
// 0.052428
0x2ab6
// 0.179176
0x31bc
// 0.472553
0x3790
// 0.406630
0x3682
// 0.160454
0x3122
// 0.081168
0x2d32
// 0.641322
0x3921
// 0.281874
0x3483
// 0.808254
0x3a77
// 0.004561
0x1cac
// 0.175304
0x319c
// 0.178098
0x31b3
// 0.360140
0x35c3
// 0.009342
0x20c8
// 0.146573
0x30b1
// 0.306255
0x34e6
// 0.012054
0x222c
// 0.548488
0x3863
// 0.690467
0x3986
// 0.181983
0x31d3
// 0.079159
0x2d11
// 0.073247
0x2cb0
// 0.125462
0x3004
// 1.095754
0x3c62
// 0.879620
0x3b09
// 0.226672
0x3341
// 0.136860
0x3061
// 0.112116
0x2f2d
// 0.102075
0x2e88
// 0.014319
0x2355
// 0.442023
0x3713
// 0.411355
0x3695
// 0.172214
0x3183
// 0.679620
0x3970
// 0.163681
0x313d
// 0.080295
0x2d24
// 0.218787
0x3300
// 0.218863
0x3301
// 0.019693
0x250b
// 0.161829
0x312e
// 1.006794
0x3c07
// 0.020873
0x2558
// 0.212565
0x32cd
// 0.219598
0x3307
// 0.371205
0x35f0
// 0.314268
0x3507
// 0.168850
0x3167
// 0.038782
0x28f7
// 0.065855
0x2c37
// 0.189367
0x320f
// 0.105295
0x2ebd
// 0.138100
0x306b
// 0.047815
0x2a1f
// 0.105360
0x2ebe
// 0.009128
0x20ac
// 0.072067
0x2c9d
// 0.159221
0x3118
// 0.168559
0x3165
// 0.286522
0x3496
// 0.090382
0x2dc9
// 0.171368
0x317c
// 0.069649
0x2c75
// 0.051694
0x2a9e
// 0.060467
0x2bbd
// 0.197961
0x3256
// 0.236952
0x3395
// 0.150644
0x30d2
// 0.092373
0x2de9
// 0.102823
0x2e95
// 0.050704
0x2a7d
// 0.668084
0x3958
// 0.229798
0x335b
// 0.734466
0x39e0
// 0.126984
0x3010
// 0.018033
0x249e
// 0.105157
0x2ebb
// 0.323522
0x352d
// 0.091246
0x2dd7
// 0.050391
0x2a73
// 0.155377
0x30f9
// 0.333444
0x3556
// 0.062637
0x2c02
// 0.282681
0x3486
// 0.180400
0x31c6
// 0.427018
0x36d5
// 0.099980
0x2e66
// 0.051888
0x2aa4
// 0.033948
0x2858
// 0.008216
0x2035
// 0.214116
0x32da
// 0.118417
0x2f94
// 0.060177
0x2bb4
// 0.010061
0x2127
// 0.200412
0x326a
// 0.478267
0x37a7
// 0.132127
0x303a
// 0.211228
0x32c2
// 0.312711
0x3501
// 0.014216
0x2347
// 0.478003
0x37a6
// 0.179688
0x31c0
// 0.008535
0x205f
// 0.000272
0xc76
// 0.151987
0x30dd
// 0.096705
0x2e30
// 0.258636
0x3423
// 0.199548
0x3263
// 0.085277
0x2d75
// 0.078650
0x2d09
// 0.138152
0x306c
// 0.053447
0x2ad7
// 0.219573
0x3307
// 0.001400
0x15bc
// 0.234124
0x337e
// 0.188633
0x3209
// 0.026963
0x26e7
// 0.341926
0x3579
// 0.070225
0x2c7f
// 0.138118
0x306b
// 0.438310
0x3703
// 0.321151
0x3523
// 0.246759
0x33e5
// 0.280232
0x347c
// 0.158311
0x3111
// 0.253470
0x340e
// 0.023762
0x2615
// 0.189928
0x3214
// 0.882917
0x3b10
// 0.120608
0x2fb8
// 0.040985
0x293f
// 0.696661
0x3993
// 0.273351
0x3460
// 0.291322
0x34a9
// 0.152722
0x30e3
// 0.072866
0x2caa
// 0.279324
0x3478
// 0.020198
0x252c
// 0.257383
0x341e
// 0.042770
0x2979
// 0.630333
0x390b
// 0.698322
0x3996
// 0.449275
0x3730
// 0.012992
0x22a7
// 0.223217
0x3325
// 0.330719
0x354b
// 0.536907
0x384c
// 0.350221
0x359b
// 0.298963
0x34c9
// 0.087928
0x2da1
// 0.068340
0x2c60
// 0.326342
0x3539
// 0.347147
0x358e
// 0.676312
0x3969
// 0.130565
0x302e
// 0.023634
0x260d
// 0.217791
0x32f8
// 0.154834
0x30f4
// 0.521151
0x382b
// 0.007407
0x1f96
// 0.109808
0x2f07
// 0.386380
0x362f
// 0.266481
0x3444
// 0.595129
0x38c3
// 0.176577
0x31a7
// 0.178291
0x31b5
// 0.114447
0x2f53
// 0.556312
0x3873
// 0.423058
0x36c5
// 0.303234
0x34da
// 0.170221
0x3172
// 0.083276
0x2d54
// 0.124988
0x3000
// 0.050738
0x2a7f
// 0.037746
0x28d5
// 0.300227
0x34ce
// 0.090583
0x2dcc
// 0.254164
0x3411
// 1.054516
0x3c38
// 0.175011
0x319a
// 0.393386
0x364b
// 0.053260
0x2ad1
// 0.021077
0x2565
// 0.147889
0x30bc
// 0.077537
0x2cf6
// 0.116791
0x2f7a
// 0.201558
0x3273
// 0.054525
0x2afb
// 0.052461
0x2ab7
// 0.068200
0x2c5d
// 0.339091
0x356d
// 0.045982
0x29e3
// 0.038135
0x28e2
// 0.178093
0x31b3
// 1.451062
0x3dce
// 0.464864
0x3770
// 0.011804
0x220b
// 0.002968
0x1a14
// 0.072789
0x2ca9
// 0.019172
0x24e8
// 0.145560
0x30a8
// 0.050686
0x2a7d
// 0.041670
0x2955
// 0.081591
0x2d39
// 0.272416
0x345c
// 0.449953
0x3733
// 0.362239
0x35cc
// 0.013746
0x230a
// 0.646934
0x392d
// 0.272603
0x345d
// 0.208196
0x32aa
// 0.106203
0x2ecc
// 0.002726
0x1995
// 0.222703
0x3320
// 0.384809
0x3628
// 0.033756
0x2852
// 0.375349
0x3601
// 0.132593
0x303e
// 0.271316
0x3457
// 0.481399
0x37b4
// 0.021541
0x2584
// 0.123010
0x2fdf
// 0.327660
0x353e
// 0.021302
0x2574
// 0.434565
0x36f4
// 0.054029
0x2aea
// 0.133889
0x3049
// 0.284771
0x348e
// 0.334612
0x355b
// 0.027864
0x2722
// 0.193338
0x3230
// 0.182744
0x31d9
// 0.213245
0x32d3
// 0.085367
0x2d77
// 0.212809
0x32cf
// 0.295428
0x34ba
// 0.189756
0x3212
// 0.056338
0x2b36
// 0.133997
0x304a
// 0.038607
0x28f1
// 0.000526
0x104f
// 0.129085
0x3021
// 0.167673
0x315e
// 0.052153
0x2aad
// 0.015447
0x23e9
// 0.189017
0x320c
// 0.396894
0x365a

@ -0,0 +1,6 @@
H
2
// -0.368091
0xb5e4
// -0.861249
0xbae4

@ -0,0 +1,308 @@
#include "ComplexTestsF16.h"
#include <stdio.h>
#include "Error.h"
#define SNR_THRESHOLD 40
#define REL_ERROR (6.0e-2)
void ComplexTestsF16::test_cmplx_conj_f16()
{
const float16_t *inp1=input1.ptr();
float16_t *outp=output.ptr();
arm_cmplx_conj_f16(inp1,outp,input1.nbSamples() >> 1 );
ASSERT_EMPTY_TAIL(output);
ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(output,ref,REL_ERROR);
}
void ComplexTestsF16::test_cmplx_dot_prod_f16()
{
float16_t re,im;
const float16_t *inp1=input1.ptr();
const float16_t *inp2=input2.ptr();
float16_t *outp=output.ptr();
arm_cmplx_dot_prod_f16(inp1,inp2,input1.nbSamples() >> 1,&re,&im);
outp[0] = re;
outp[1] = im;
ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(output,ref,REL_ERROR);
ASSERT_EMPTY_TAIL(output);
}
void ComplexTestsF16::test_cmplx_mag_f16()
{
const float16_t *inp1=input1.ptr();
float16_t *outp=output.ptr();
arm_cmplx_mag_f16(inp1,outp,input1.nbSamples() >> 1 );
ASSERT_EMPTY_TAIL(output);
ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(output,ref,REL_ERROR);
}
void ComplexTestsF16::test_cmplx_mag_squared_f16()
{
const float16_t *inp1=input1.ptr();
float16_t *outp=output.ptr();
arm_cmplx_mag_squared_f16(inp1,outp,input1.nbSamples() >> 1 );
ASSERT_EMPTY_TAIL(output);
ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(output,ref,REL_ERROR);
}
void ComplexTestsF16::test_cmplx_mult_cmplx_f16()
{
const float16_t *inp1=input1.ptr();
const float16_t *inp2=input2.ptr();
float16_t *outp=output.ptr();
arm_cmplx_mult_cmplx_f16(inp1,inp2,outp,input1.nbSamples() >> 1 );
ASSERT_EMPTY_TAIL(output);
ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(output,ref,REL_ERROR);
}
void ComplexTestsF16::test_cmplx_mult_real_f16()
{
const float16_t *inp1=input1.ptr();
const float16_t *inp2=input2.ptr();
float16_t *outp=output.ptr();
arm_cmplx_mult_real_f16(inp1,inp2,outp,input1.nbSamples() >> 1 );
ASSERT_EMPTY_TAIL(output);
ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(output,ref,REL_ERROR);
}
void ComplexTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr)
{
Testing::nbSamples_t nb=MAX_NB_SAMPLES;
(void)params;
switch(id)
{
case ComplexTestsF16::TEST_CMPLX_CONJ_F16_1:
nb = 7;
ref.reload(ComplexTestsF16::REF_CONJ_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_CONJ_F16_2:
nb = 16;
ref.reload(ComplexTestsF16::REF_CONJ_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_CONJ_F16_3:
nb = 23;
ref.reload(ComplexTestsF16::REF_CONJ_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_DOT_PROD_F16_4:
nb = 7;
ref.reload(ComplexTestsF16::REF_DOT_PROD_3_F16_ID,mgr);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_DOT_PROD_F16_5:
nb = 16;
ref.reload(ComplexTestsF16::REF_DOT_PROD_4N_F16_ID,mgr);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_DOT_PROD_F16_6:
nb = 23;
ref.reload(ComplexTestsF16::REF_DOT_PROD_4N1_F16_ID,mgr);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MAG_F16_7:
nb = 7;
ref.reload(ComplexTestsF16::REF_MAG_F16_ID,mgr,nb);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MAG_F16_8:
nb = 16;
ref.reload(ComplexTestsF16::REF_MAG_F16_ID,mgr,nb);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MAG_F16_9:
nb = 23;
ref.reload(ComplexTestsF16::REF_MAG_F16_ID,mgr,nb);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MAG_SQUARED_F16_10:
nb = 7;
ref.reload(ComplexTestsF16::REF_MAG_SQUARED_F16_ID,mgr,nb);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MAG_SQUARED_F16_11:
nb = 16;
ref.reload(ComplexTestsF16::REF_MAG_SQUARED_F16_ID,mgr,nb);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MAG_SQUARED_F16_12:
nb = 23;
ref.reload(ComplexTestsF16::REF_MAG_SQUARED_F16_ID,mgr,nb);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MULT_CMPLX_F16_13:
nb = 7;
ref.reload(ComplexTestsF16::REF_CMPLX_MULT_CMPLX_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MULT_CMPLX_F16_14:
nb = 16;
ref.reload(ComplexTestsF16::REF_CMPLX_MULT_CMPLX_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MULT_CMPLX_F16_15:
nb = 23;
ref.reload(ComplexTestsF16::REF_CMPLX_MULT_CMPLX_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MULT_REAL_F16_16:
nb = 7;
ref.reload(ComplexTestsF16::REF_CMPLX_MULT_REAL_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT3_F16_ID,mgr,nb);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MULT_REAL_F16_17:
nb = 16;
ref.reload(ComplexTestsF16::REF_CMPLX_MULT_REAL_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT3_F16_ID,mgr,nb);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MULT_REAL_F16_18:
nb = 23;
ref.reload(ComplexTestsF16::REF_CMPLX_MULT_REAL_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT3_F16_ID,mgr,nb);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_CONJ_F16_19:
ref.reload(ComplexTestsF16::REF_CONJ_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_DOT_PROD_F16_20:
ref.reload(ComplexTestsF16::REF_DOT_PROD_LONG_F16_ID,mgr);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MAG_F16_21:
ref.reload(ComplexTestsF16::REF_MAG_F16_ID,mgr,nb);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MAG_SQUARED_F16_22:
ref.reload(ComplexTestsF16::REF_MAG_SQUARED_F16_ID,mgr,nb);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MULT_CMPLX_F16_23:
ref.reload(ComplexTestsF16::REF_CMPLX_MULT_CMPLX_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MULT_REAL_F16_24:
ref.reload(ComplexTestsF16::REF_CMPLX_MULT_REAL_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT3_F16_ID,mgr,nb);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
}
}
void ComplexTestsF16::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)
{
(void)id;
output.dump(mgr);
}

@ -77,6 +77,69 @@ group Root {
}
group Complex Tests {
class = ComplexTests
folder = ComplexMaths
suite Complex Tests F16{
class = ComplexTestsF16
folder = ComplexMathsF16
Pattern INPUT1_F16_ID : Input1_f16.txt
Pattern INPUT2_F16_ID : Input2_f16.txt
Pattern INPUT3_F16_ID : Input3_f16.txt
Pattern REF_CONJ_F16_ID : Reference1_f16.txt
Pattern REF_DOT_PROD_3_F16_ID : Reference2_f16.txt
Pattern REF_DOT_PROD_4N_F16_ID : Reference3_f16.txt
Pattern REF_DOT_PROD_4N1_F16_ID : Reference4_f16.txt
Pattern REF_MAG_F16_ID : Reference5_f16.txt
Pattern REF_MAG_SQUARED_F16_ID : Reference6_f16.txt
Pattern REF_CMPLX_MULT_CMPLX_F16_ID : Reference7_f16.txt
Pattern REF_CMPLX_MULT_REAL_F16_ID : Reference8_f16.txt
Pattern REF_DOT_PROD_LONG_F16_ID : Reference9_f16.txt
Output OUT_SAMPLES_F16_ID : Output
Output OUT_STATE_F16_ID : State
Functions {
Test nb=3 arm_cmplx_conj_f16:test_cmplx_conj_f16
Test nb=4n arm_cmplx_conj_f16:test_cmplx_conj_f16
Test nb=4n+1 arm_cmplx_conj_f16:test_cmplx_conj_f16
Test nb=3 arm_cmplx_dot_prod_f16:test_cmplx_dot_prod_f16
Test nb=4n arm_cmplx_dot_prod_f16:test_cmplx_dot_prod_f16
Test nb=4n+1 arm_cmplx_dot_prod_f16:test_cmplx_dot_prod_f16
Test nb=3 arm_cmplx_mag_f16:test_cmplx_mag_f16
Test nb=4n arm_cmplx_mag_f16:test_cmplx_mag_f16
Test nb=4n+1 arm_cmplx_mag_f16:test_cmplx_mag_f16
Test nb=3 arm_cmplx_mag_squared_f16:test_cmplx_mag_squared_f16
Test nb=4n arm_cmplx_mag_squared_f16:test_cmplx_mag_squared_f16
Test nb=4n+1 arm_cmplx_mag_squared_f16:test_cmplx_mag_squared_f16
Test nb=3 arm_cmplx_mult_cmplx_f16:test_cmplx_mult_cmplx_f16
Test nb=4n arm_cmplx_mult_cmplx_f16:test_cmplx_mult_cmplx_f16
Test nb=4n+1 arm_cmplx_mult_cmplx_f16:test_cmplx_mult_cmplx_f16
Test nb=3 arm_cmplx_mult_real_f16:test_cmplx_mult_real_f16
Test nb=4n arm_cmplx_mult_real_f16:test_cmplx_mult_real_f16
Test nb=4n+1 arm_cmplx_mult_real_f16:test_cmplx_mult_real_f16
Test long arm_cmplx_conj_f16:test_cmplx_conj_f16
Test long arm_cmplx_dot_prod_f16:test_cmplx_dot_prod_f16
Test long arm_cmplx_mag_f16:test_cmplx_mag_f16
Test long arm_cmplx_mag_squared_f16:test_cmplx_mag_squared_f16
Test long arm_cmplx_mult_cmplx_f16:test_cmplx_mult_cmplx_f16
Test long arm_cmplx_mult_real_f16:test_cmplx_mult_real_f16
}
}
}
group Transform Tests {
class = TransformTests
folder = Transform

Loading…
Cancel
Save