CMSIS-DSP: Added complex math f16

pull/19/head
Christophe Favergeon 6 years ago
parent 8b465544a1
commit ebf9104c4e

@ -80,6 +80,8 @@ __STATIC_FORCEINLINE float16_t vecAddAcrossF16Mve(float16x8_t in)
/* newton initial guess */ /* newton initial guess */
#define INVSQRT_MAGIC_F32 0x5f3759df #define INVSQRT_MAGIC_F32 0x5f3759df
#define INV_NEWTON_INIT_F32 0x7EF127EA
#define INVSQRT_NEWTON_MVE_F32(invSqrt, xHalf, xStart)\ #define INVSQRT_NEWTON_MVE_F32(invSqrt, xHalf, xStart)\
{ \ { \
@ -95,6 +97,74 @@ __STATIC_FORCEINLINE float16_t vecAddAcrossF16Mve(float16x8_t in)
} }
#endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) */ #endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) */
/***************************************
Definitions available for f16 datatype with HW acceleration only
***************************************/
#if defined (ARM_MATH_MVE_FLOAT16)
__STATIC_FORCEINLINE float16x8_t __mve_cmplx_sum_intra_vec_f16(
float16x8_t vecIn)
{
float16x8_t vecTmp, vecOut;
uint32_t tmp;
vecTmp = (float16x8_t) vrev64q_s32((int32x4_t) vecIn);
// TO TRACK : using canonical addition leads to unefficient code generation for f16
// vecTmp = vecTmp + vecAccCpx0;
/*
* Compute
* re0+re1 | im0+im1 | re0+re1 | im0+im1
* re2+re3 | im2+im3 | re2+re3 | im2+im3
*/
vecTmp = vaddq(vecTmp, vecIn);
vecOut = vecTmp;
/*
* shift left, random tmp insertion in bottom
*/
vecOut = vreinterpretq_f16_s32(vshlcq_s32(vreinterpretq_s32_f16(vecOut) , &tmp, 32));
/*
* Compute:
* DONTCARE | DONTCARE | re0+re1+re0+re1 |im0+im1+im0+im1
* re0+re1+re2+re3 | im0+im1+im2+im3 | re2+re3+re2+re3 |im2+im3+im2+im3
*/
vecOut = vaddq(vecOut, vecTmp);
/*
* Cmplx sum is in 4rd & 5th f16 elt
* return full vector
*/
return vecOut;
}
#define mve_cmplx_sum_intra_r_i_f16(vec, Re, Im) \
{ \
float16x8_t vecOut = __mve_cmplx_sum_intra_vec_f16(vec); \
Re = vgetq_lane(vecOut, 4); \
Im = vgetq_lane(vecOut, 5); \
}
#define INVSQRT_MAGIC_F16 0x59ba /* ( 0x1ba = 0x3759df >> 13) */
#define INV_NEWTON_INIT_F16 0x7773
/* canonical version of INVSQRT_NEWTON_MVE_F16 leads to bad performance */
#define INVSQRT_NEWTON_MVE_F16(invSqrt, xHalf, xStart) \
{ \
float16x8_t tmp; \
\
/* tmp = xhalf * x * x */ \
tmp = vmulq(xStart, xStart); \
tmp = vmulq(tmp, xHalf); \
/* (1.5f - xhalf * x * x) */ \
tmp = vsubq(vdupq_n_f16((float16_t)1.5), tmp); \
/* x = x*(1.5f-xhalf*x*x); */ \
invSqrt = vmulq(tmp, xStart); \
}
#endif
/*************************************** /***************************************
Definitions available for MVEI only Definitions available for MVEI only

@ -36,6 +36,7 @@ extern "C"
#include "dsp/none.h" #include "dsp/none.h"
#include "dsp/utils.h" #include "dsp/utils.h"
#include "dsp/fast_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED) #if defined(ARM_FLOAT16_SUPPORTED)

@ -26,12 +26,94 @@
#ifndef _COMPLEX_MATH_FUNCTIONS_F16_H_ #ifndef _COMPLEX_MATH_FUNCTIONS_F16_H_
#define _COMPLEX_MATH_FUNCTIONS_F16_H_ #define _COMPLEX_MATH_FUNCTIONS_F16_H_
#include "arm_math_types_f16.h"
#include "arm_math_memory.h"
#include "dsp/none.h"
#include "dsp/utils.h"
#include "dsp/fast_math_functions_f16.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" extern "C"
{ {
#endif #endif
#if defined(ARM_FLOAT16_SUPPORTED) #if defined(ARM_FLOAT16_SUPPORTED)
/**
* @brief Floating-point complex conjugate.
* @param[in] pSrc points to the input vector
* @param[out] pDst points to the output vector
* @param[in] numSamples number of complex samples in each vector
*/
void arm_cmplx_conj_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples);
/**
* @brief Floating-point complex magnitude squared
* @param[in] pSrc points to the complex input vector
* @param[out] pDst points to the real output vector
* @param[in] numSamples number of complex samples in the input vector
*/
void arm_cmplx_mag_squared_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples);
/**
* @brief Floating-point complex magnitude
* @param[in] pSrc points to the complex input vector
* @param[out] pDst points to the real output vector
* @param[in] numSamples number of complex samples in the input vector
*/
void arm_cmplx_mag_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples);
/**
* @brief Floating-point complex dot product
* @param[in] pSrcA points to the first input vector
* @param[in] pSrcB points to the second input vector
* @param[in] numSamples number of complex samples in each vector
* @param[out] realResult real part of the result returned here
* @param[out] imagResult imaginary part of the result returned here
*/
void arm_cmplx_dot_prod_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t numSamples,
float16_t * realResult,
float16_t * imagResult);
/**
* @brief Floating-point complex-by-real multiplication
* @param[in] pSrcCmplx points to the complex input vector
* @param[in] pSrcReal points to the real input vector
* @param[out] pCmplxDst points to the complex output vector
* @param[in] numSamples number of samples in each vector
*/
void arm_cmplx_mult_real_f16(
const float16_t * pSrcCmplx,
const float16_t * pSrcReal,
float16_t * pCmplxDst,
uint32_t numSamples);
/**
* @brief Floating-point complex-by-complex multiplication
* @param[in] pSrcA points to the first input vector
* @param[in] pSrcB points to the second input vector
* @param[out] pDst points to the output vector
* @param[in] numSamples number of complex samples in each vector
*/
void arm_cmplx_mult_cmplx_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
float16_t * pDst,
uint32_t numSamples);
#endif /*defined(ARM_FLOAT16_SUPPORTED)*/ #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
#ifdef __cplusplus #ifdef __cplusplus
} }

@ -26,12 +26,46 @@
#ifndef _FAST_MATH_FUNCTIONS_F16_H_ #ifndef _FAST_MATH_FUNCTIONS_F16_H_
#define _FAST_MATH_FUNCTIONS_F16_H_ #define _FAST_MATH_FUNCTIONS_F16_H_
#include "arm_math_types_f16.h"
#include "arm_math_memory.h"
#include "dsp/none.h"
#include "dsp/utils.h"
#include "dsp/fast_math_functions.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" extern "C"
{ {
#endif #endif
#if defined(ARM_FLOAT16_SUPPORTED) #if defined(ARM_FLOAT16_SUPPORTED)
/**
* @addtogroup SQRT
* @{
*/
/**
@brief Floating-point square root function.
@param[in] in input value
@param[out] pOut square root of input value
@return execution status
- \ref ARM_MATH_SUCCESS : input value is positive
- \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
*/
__STATIC_FORCEINLINE arm_status arm_sqrt_f16(
float16_t in,
float16_t * pOut)
{
float32_t r;
arm_status status;
status=arm_sqrt_f32((float32_t)in,&r);
*pOut=(float16_t)r;
return(status);
}
#endif /*defined(ARM_FLOAT16_SUPPORTED)*/ #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
#ifdef __cplusplus #ifdef __cplusplus
} }

@ -5,8 +5,6 @@ project(CMSISDSPComplexMath)
include(configLib) include(configLib)
include(configDsp) include(configDsp)
file(GLOB SRC "./*_*.c")
add_library(CMSISDSPComplexMath STATIC) add_library(CMSISDSPComplexMath STATIC)
configLib(CMSISDSPComplexMath ${ROOT}) configLib(CMSISDSPComplexMath ${ROOT})
@ -56,6 +54,14 @@ target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_f32.c)
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_q15.c) target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_q15.c)
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_q31.c) target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_q31.c)
if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_conj_f16.c)
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_dot_prod_f16.c)
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mag_f16.c)
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mag_squared_f16.c)
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_cmplx_f16.c)
target_sources(CMSISDSPComplexMath PRIVATE arm_cmplx_mult_real_f16.c)
endif()
### Includes ### Includes
target_include_directories(CMSISDSPComplexMath PUBLIC "${DSP}/Include") target_include_directories(CMSISDSPComplexMath PUBLIC "${DSP}/Include")

@ -0,0 +1,32 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: CompexMathFunctionsF16.c
* Description: Combination of all complex math function f16 source files.
*
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_cmplx_conj_f16.c"
#include "arm_cmplx_dot_prod_f16.c"
#include "arm_cmplx_mag_f16.c"
#include "arm_cmplx_mag_squared_f16.c"
#include "arm_cmplx_mult_cmplx_f16.c"
#include "arm_cmplx_mult_real_f16.c"

@ -0,0 +1,183 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_conj_f16.c
* Description: Floating-point complex conjugate
*
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/complex_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupCmplxMath
*/
/**
@defgroup cmplx_conj Complex Conjugate
Conjugates the elements of a complex data vector.
The <code>pSrc</code> points to the source data and
<code>pDst</code> points to the destination data where the result should be written.
<code>numSamples</code> specifies the number of complex samples
and the data in each array is stored in an interleaved fashion
(real, imag, real, imag, ...).
Each array has a total of <code>2*numSamples</code> values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pDst[(2*n) ] = pSrc[(2*n) ]; // real part
pDst[(2*n)+1] = -pSrc[(2*n)+1]; // imag part
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup cmplx_conj
@{
*/
/**
@brief Floating-point complex conjugate.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] numSamples number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_cmplx_conj_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples)
{
static const float16_t cmplx_conj_sign[8] = { 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f };
uint32_t blockSize = numSamples * CMPLX_DIM; /* loop counters */
uint32_t blkCnt;
f16x8_t vecSrc;
f16x8_t vecSign;
/*
* load sign vector
*/
vecSign = *(f16x8_t *) cmplx_conj_sign;
/* Compute 4 real samples at a time */
blkCnt = blockSize >> 3U;
while (blkCnt > 0U)
{
vecSrc = vld1q(pSrc);
vst1q(pDst,vmulq(vecSrc, vecSign));
/*
* Decrement the blkCnt loop counter
* Advance vector source and destination pointers
*/
pSrc += 8;
pDst += 8;
blkCnt--;
}
/* Tail */
blkCnt = (blockSize & 0x7) >> 1;
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
#else
void arm_cmplx_conj_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] + jC[1] = A[0]+ j(-1)A[1] */
/* Calculate Complex Conjugate and store result in destination buffer. */
*pDst++ = *pSrc++;
*pDst++ = -*pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of cmplx_conj group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -0,0 +1,236 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_dot_prod_f16.c
* Description: Floating-point complex dot product
*
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/complex_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupCmplxMath
*/
/**
@defgroup cmplx_dot_prod Complex Dot Product
Computes the dot product of two complex vectors.
The vectors are multiplied element-by-element and then summed.
The <code>pSrcA</code> points to the first complex input vector and
<code>pSrcB</code> points to the second complex input vector.
<code>numSamples</code> specifies the number of complex samples
and the data in each array is stored in an interleaved fashion
(real, imag, real, imag, ...).
Each array has a total of <code>2*numSamples</code> values.
The underlying algorithm is used:
<pre>
realResult = 0;
imagResult = 0;
for (n = 0; n < numSamples; n++) {
realResult += pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
imagResult += pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup cmplx_dot_prod
@{
*/
/**
@brief Floating-point complex dot product.
@param[in] pSrcA points to the first input vector
@param[in] pSrcB points to the second input vector
@param[in] numSamples number of samples in each vector
@param[out] realResult real part of the result returned here
@param[out] imagResult imaginary part of the result returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_cmplx_dot_prod_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t numSamples,
float16_t * realResult,
float16_t * imagResult)
{
uint32_t blockSize = numSamples * CMPLX_DIM; /* loop counters */
uint32_t blkCnt;
float16_t real_sum, imag_sum;
f16x8_t vecSrcA, vecSrcB;
f16x8_t vec_acc = vdupq_n_f16(0.0f);
/* Compute 2 complex samples at a time */
blkCnt = blockSize >> 3U;
while (blkCnt > 0U)
{
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vec_acc = vcmlaq(vec_acc, vecSrcA, vecSrcB);
vec_acc = vcmlaq_rot90(vec_acc, vecSrcA, vecSrcB);
/*
* Decrement the blkCnt loop counter
* Advance vector source and destination pointers
*/
pSrcA += 8;
pSrcB += 8;
blkCnt--;
}
/* Tail */
blkCnt = (blockSize & 7);
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecSrcA = vld1q(pSrcA);
vecSrcB = vld1q(pSrcB);
vec_acc = vcmlaq_m(vec_acc, vecSrcA, vecSrcB, p0);
vec_acc = vcmlaq_rot90_m(vec_acc, vecSrcA, vecSrcB, p0);
}
/* Sum the partial parts */
mve_cmplx_sum_intra_r_i_f16(vec_acc, real_sum, imag_sum);
/*
* Store the real and imaginary results in the destination buffers
*/
*realResult = real_sum;
*imagResult = imag_sum;
}
#else
void arm_cmplx_dot_prod_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t numSamples,
float16_t * realResult,
float16_t * imagResult)
{
uint32_t blkCnt; /* Loop counter */
float16_t real_sum = 0.0f, imag_sum = 0.0f; /* Temporary result variables */
float16_t a0,b0,c0,d0;
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
a0 = *pSrcA++;
b0 = *pSrcA++;
c0 = *pSrcB++;
d0 = *pSrcB++;
real_sum += a0 * c0;
imag_sum += a0 * d0;
real_sum -= b0 * d0;
imag_sum += b0 * c0;
/* Decrement loop counter */
blkCnt--;
}
/* Store real and imaginary result in destination buffer. */
*realResult = real_sum;
*imagResult = imag_sum;
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of cmplx_dot_prod group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -0,0 +1,239 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mag_f16.c
* Description: Floating-point complex magnitude
*
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/complex_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupCmplxMath
*/
/**
@defgroup cmplx_mag Complex Magnitude
Computes the magnitude of the elements of a complex data vector.
The <code>pSrc</code> points to the source data and
<code>pDst</code> points to the where the result should be written.
<code>numSamples</code> specifies the number of complex samples
in the input array and the data is stored in an interleaved fashion
(real, imag, real, imag, ...).
The input array has a total of <code>2*numSamples</code> values;
the output array has a total of <code>numSamples</code> values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pDst[n] = sqrt(pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2);
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup cmplx_mag
@{
*/
/**
@brief Floating-point complex magnitude.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_cmplx_mag_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples)
{
int32_t blockSize = numSamples; /* loop counters */
uint32_t blkCnt; /* loop counters */
f16x8x2_t vecSrc;
f16x8_t sum;
/* Compute 4 complex samples at a time */
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
q15x8_t newtonStartVec;
f16x8_t sumHalf, invSqrt;
vecSrc = vld2q(pSrc);
pSrc += 16;
sum = vmulq(vecSrc.val[0], vecSrc.val[0]);
sum = vfmaq(sum, vecSrc.val[1], vecSrc.val[1]);
/*
* inlined Fast SQRT using inverse SQRT newton-raphson method
*/
/* compute initial value */
newtonStartVec = vdupq_n_s16(INVSQRT_MAGIC_F16) - vshrq((q15x8_t) sum, 1);
sumHalf = sum * 0.5f;
/*
* compute 3 x iterations
*
* The more iterations, the more accuracy.
* If you need to trade a bit of accuracy for more performance,
* you can comment out the 3rd use of the macro.
*/
INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, (f16x8_t) newtonStartVec);
INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, invSqrt);
INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, invSqrt);
/*
* set negative values to 0
*/
invSqrt = vdupq_m(invSqrt, (float16_t)0.0f, vcmpltq(invSqrt, (float16_t)0.0f));
/*
* sqrt(x) = x * invSqrt(x)
*/
sum = vmulq(sum, invSqrt);
vstrhq_f16(pDst, sum);
pDst += 8;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
q15x8_t newtonStartVec;
f16x8_t sumHalf, invSqrt;
vecSrc = vld2q((float16_t const *)pSrc);
sum = vmulq(vecSrc.val[0], vecSrc.val[0]);
sum = vfmaq(sum, vecSrc.val[1], vecSrc.val[1]);
/*
* inlined Fast SQRT using inverse SQRT newton-raphson method
*/
/* compute initial value */
newtonStartVec = vdupq_n_s16(INVSQRT_MAGIC_F16) - vshrq((q15x8_t) sum, 1);
sumHalf = vmulq(sum, (float16_t)0.5);
/*
* compute 2 x iterations
*/
INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, (f16x8_t) newtonStartVec);
INVSQRT_NEWTON_MVE_F16(invSqrt, sumHalf, invSqrt);
/*
* set negative values to 0
*/
invSqrt = vdupq_m(invSqrt, (float16_t)0.0, vcmpltq(invSqrt, (float16_t)0.0));
/*
* sqrt(x) = x * invSqrt(x)
*/
sum = vmulq(sum, invSqrt);
vstrhq_p_f16(pDst, sum, p0);
}
}
#else
void arm_cmplx_mag_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* loop counter */
float16_t real, imag; /* Temporary variables to hold input values */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
/* store result in destination buffer. */
arm_sqrt_f16((real * real) + (imag * imag), pDst++);
real = *pSrc++;
imag = *pSrc++;
arm_sqrt_f16((real * real) + (imag * imag), pDst++);
real = *pSrc++;
imag = *pSrc++;
arm_sqrt_f16((real * real) + (imag * imag), pDst++);
real = *pSrc++;
imag = *pSrc++;
arm_sqrt_f16((real * real) + (imag * imag), pDst++);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
/* store result in destination buffer. */
arm_sqrt_f16((real * real) + (imag * imag), pDst++);
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of cmplx_mag group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -0,0 +1,172 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mag_squared_f16.c
* Description: Floating-point complex magnitude squared
*
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/complex_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupCmplxMath
*/
/**
@defgroup cmplx_mag_squared Complex Magnitude Squared
Computes the magnitude squared of the elements of a complex data vector.
The <code>pSrc</code> points to the source data and
<code>pDst</code> points to the where the result should be written.
<code>numSamples</code> specifies the number of complex samples
in the input array and the data is stored in an interleaved fashion
(real, imag, real, imag, ...).
The input array has a total of <code>2*numSamples</code> values;
the output array has a total of <code>numSamples</code> values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup cmplx_mag_squared
@{
*/
/**
@brief Floating-point complex magnitude squared.
@param[in] pSrc points to input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_cmplx_mag_squared_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples)
{
int32_t blockSize = numSamples; /* loop counters */
f16x8x2_t vecSrc;
f16x8_t sum;
/* Compute 4 complex samples at a time */
while (blockSize > 0)
{
mve_pred16_t p = vctp16q(blockSize);
vecSrc = vld2q(pSrc);
sum = vmulq_m(vuninitializedq_f16(),vecSrc.val[0], vecSrc.val[0],p);
sum = vfmaq_m(sum, vecSrc.val[1], vecSrc.val[1],p);
vstrhq_p_f16(pDst, sum,p);
pSrc += 16;
pDst += 8;
/*
* Decrement the blockSize loop counter
*/
blockSize-= 8;
}
}
#else
void arm_cmplx_mag_squared_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
float16_t real, imag; /* Temporary input variables */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
*pDst++ = (real * real) + (imag * imag);
real = *pSrc++;
imag = *pSrc++;
*pDst++ = (real * real) + (imag * imag);
real = *pSrc++;
imag = *pSrc++;
*pDst++ = (real * real) + (imag * imag);
real = *pSrc++;
imag = *pSrc++;
*pDst++ = (real * real) + (imag * imag);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
real = *pSrc++;
imag = *pSrc++;
/* store result in destination buffer. */
*pDst++ = (real * real) + (imag * imag);
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of cmplx_mag_squared group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -0,0 +1,217 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mult_cmplx_f16.c
* Description: Floating-point complex-by-complex multiplication
*
* $Date: 18. March 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/complex_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupCmplxMath
*/
/**
@defgroup CmplxByCmplxMult Complex-by-Complex Multiplication
Multiplies a complex vector by another complex vector and generates a complex result.
The data in the complex arrays is stored in an interleaved fashion
(real, imag, real, imag, ...).
The parameter <code>numSamples</code> represents the number of complex
samples processed. The complex arrays have a total of <code>2*numSamples</code>
real values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pDst[(2*n)+0] = pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
pDst[(2*n)+1] = pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup CmplxByCmplxMult
@{
*/
/**
@brief Floating-point complex-by-complex multiplication.
@param[in] pSrcA points to first input vector
@param[in] pSrcB points to second input vector
@param[out] pDst points to output vector
@param[in] numSamples number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_cmplx_mult_cmplx_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
float16_t * pDst,
uint32_t numSamples)
{
int32_t blkCnt; /* loop counters */
int32_t blockSize = numSamples;
f16x8_t vecA;
f16x8_t vecB;
f16x8_t vecDst;
blkCnt = blockSize * CMPLX_DIM;
blkCnt = blkCnt >> 3;
while (blkCnt > 0)
{
vecA = vldrhq_f16(pSrcA);
vecB = vldrhq_f16(pSrcB);
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
vecDst = vcmulq(vecA, vecB);
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
vecDst = vcmlaq_rot90(vecDst, vecA, vecB);
vstrhq_f16(pDst, vecDst);
blkCnt--;
pSrcA += 8;
pSrcB += 8;
pDst += 8;
}
float16_t a, b, c, d; /* Temporary variables to store real and imaginary values */
/* Tail */
blkCnt = (blockSize & 7) >> 1;
while (blkCnt > 0)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in destination buffer. */
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
/* Decrement loop counter */
blkCnt--;
}
}
#else
void arm_cmplx_mult_cmplx_f16(
const float16_t * pSrcA,
const float16_t * pSrcB,
float16_t * pDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
float16_t a, b, c, d; /* Temporary variables to store real and imaginary values */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in destination buffer. */
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i] * B[2 * i ] - A[2 * i + 1] * B[2 * i + 1]. */
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i ]. */
a = *pSrcA++;
b = *pSrcA++;
c = *pSrcB++;
d = *pSrcB++;
/* store result in destination buffer. */
*pDst++ = (a * c) - (b * d);
*pDst++ = (a * d) + (b * c);
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of CmplxByCmplxMult group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -0,0 +1,192 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cmplx_mult_real_f16.c
* Description: Floating-point complex by real multiplication
*
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/complex_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupCmplxMath
*/
/**
@defgroup CmplxByRealMult Complex-by-Real Multiplication
Multiplies a complex vector by a real vector and generates a complex result.
The data in the complex arrays is stored in an interleaved fashion
(real, imag, real, imag, ...).
The parameter <code>numSamples</code> represents the number of complex
samples processed. The complex arrays have a total of <code>2*numSamples</code>
real values while the real array has a total of <code>numSamples</code>
real values.
The underlying algorithm is used:
<pre>
for (n = 0; n < numSamples; n++) {
pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n];
pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n];
}
</pre>
There are separate functions for floating-point, Q15, and Q31 data types.
*/
/**
@addtogroup CmplxByRealMult
@{
*/
/**
@brief Floating-point complex-by-real multiplication.
@param[in] pSrcCmplx points to complex input vector
@param[in] pSrcReal points to real input vector
@param[out] pCmplxDst points to complex output vector
@param[in] numSamples number of samples in each vector
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_cmplx_mult_real_f16(
const float16_t * pSrcCmplx,
const float16_t * pSrcReal,
float16_t * pCmplxDst,
uint32_t numSamples)
{
const static uint16_t stride_cmplx_x_real_16[8] = {
0, 0, 1, 1, 2, 2, 3, 3
};
uint32_t blockSizeC = numSamples * CMPLX_DIM; /* loop counters */
uint32_t blkCnt;
f16x8_t rVec;
f16x8_t cmplxVec;
f16x8_t dstVec;
uint16x8_t strideVec;
/* stride vector for pairs of real generation */
strideVec = vld1q(stride_cmplx_x_real_16);
/* Compute 4 complex outputs at a time */
blkCnt = blockSizeC >> 3;
while (blkCnt > 0U)
{
cmplxVec = vld1q(pSrcCmplx);
rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec);
dstVec = vmulq(cmplxVec, rVec);
vst1q(pCmplxDst, dstVec);
pSrcReal += 4;
pSrcCmplx += 8;
pCmplxDst += 8;
blkCnt--;
}
blkCnt = blockSizeC & 7;
if (blkCnt > 0U) {
mve_pred16_t p0 = vctp16q(blkCnt);
cmplxVec = vld1q(pSrcCmplx);
rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec);
dstVec = vmulq(cmplxVec, rVec);
vstrhq_p_f16(pCmplxDst, dstVec, p0);
}
}
#else
void arm_cmplx_mult_real_f16(
const float16_t * pSrcCmplx,
const float16_t * pSrcReal,
float16_t * pCmplxDst,
uint32_t numSamples)
{
uint32_t blkCnt; /* Loop counter */
float16_t in; /* Temporary variable */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = numSamples >> 2U;
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i ] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
in = *pSrcReal++;
/* store result in destination buffer. */
*pCmplxDst++ = *pSrcCmplx++ * in;
*pCmplxDst++ = *pSrcCmplx++ * in;
in = *pSrcReal++;
*pCmplxDst++ = *pSrcCmplx++ * in;
*pCmplxDst++ = *pSrcCmplx++ * in;
in = *pSrcReal++;
*pCmplxDst++ = *pSrcCmplx++ * in;
*pCmplxDst++ = *pSrcCmplx++ * in;
in = *pSrcReal++;
*pCmplxDst++ = *pSrcCmplx++* in;
*pCmplxDst++ = *pSrcCmplx++ * in;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = numSamples % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = numSamples;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C[2 * i ] = A[2 * i ] * B[i]. */
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
in = *pSrcReal++;
/* store result in destination buffer. */
*pCmplxDst++ = *pSrcCmplx++ * in;
*pCmplxDst++ = *pSrcCmplx++ * in;
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of CmplxByRealMult group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

@ -327,6 +327,7 @@ set(TESTSRC
if ((NOT ARMAC5) AND (FLOAT16TESTS) AND ((FLOAT16) OR (MVEF) OR (HELIUM) OR (NEON) OR (NEONEXPERIMENTAL))) if ((NOT ARMAC5) AND (FLOAT16TESTS) AND ((FLOAT16) OR (MVEF) OR (HELIUM) OR (NEON) OR (NEONEXPERIMENTAL)))
set(TESTSRC16 set(TESTSRC16
Source/Tests/BasicTestsF16.cpp Source/Tests/BasicTestsF16.cpp
Source/Tests/ComplexTestsF16.cpp
Source/Tests/TransformCF16.cpp Source/Tests/TransformCF16.cpp
Source/Tests/TransformRF16.cpp Source/Tests/TransformRF16.cpp
) )

@ -0,0 +1,21 @@
#include "Test.h"
#include "Pattern.h"
#include "dsp/complex_math_functions_f16.h"
class ComplexTestsF16:public Client::Suite
{
public:
ComplexTestsF16(Testing::testID_t id);
virtual void setUp(Testing::testID_t,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr);
virtual void tearDown(Testing::testID_t,Client::PatternMgr *mgr);
private:
#include "ComplexTestsF16_decl.h"
Client::Pattern<float16_t> input1;
Client::Pattern<float16_t> input2;
Client::LocalPattern<float16_t> output;
// Reference patterns are not loaded when we are in dump mode
Client::RefPattern<float16_t> ref;
};

@ -105,11 +105,13 @@ def generatePatterns():
PARAMDIR = os.path.join("Parameters","DSP","ComplexMaths","ComplexMaths") PARAMDIR = os.path.join("Parameters","DSP","ComplexMaths","ComplexMaths")
configf32=Tools.Config(PATTERNDIR,PARAMDIR,"f32") configf32=Tools.Config(PATTERNDIR,PARAMDIR,"f32")
configf16=Tools.Config(PATTERNDIR,PARAMDIR,"f16")
configq31=Tools.Config(PATTERNDIR,PARAMDIR,"q31") configq31=Tools.Config(PATTERNDIR,PARAMDIR,"q31")
configq15=Tools.Config(PATTERNDIR,PARAMDIR,"q15") configq15=Tools.Config(PATTERNDIR,PARAMDIR,"q15")
writeTests(configf32,0) writeTests(configf32,0)
writeTests(configf16,16)
writeTests(configq31,31) writeTests(configq31,31)
writeTests(configq15,15) writeTests(configq15,15)

@ -0,0 +1,514 @@
H
256
// 0.606399
0x38da
// 0.073125
0x2cae
// -0.140834
0xb082
// -0.900755
0xbb35
// 0.148560
0x30c1
// -0.261878
0xb431
// -0.545170
0xb85d
// -0.628326
0xb907
// 0.495511
0x37ee
// 0.063174
0x2c0b
// -0.169679
0xb16e
// 0.145953
0x30ac
// 0.496673
0x37f2
// 1.000000
0x3c00
// 0.131807
0x3038
// 0.036478
0x28ab
// 0.427452
0x36d7
// -0.526852
0xb837
// -0.153413
0xb0e9
// -0.180253
0xb1c5
// 0.192358
0x3228
// 0.534224
0x3846
// -0.143569
0xb098
// -0.378102
0xb60d
// -0.387182
0xb632
// -0.181926
0xb1d2
// 0.627082
0x3904
// -0.782546
0xba43
// 0.227872
0x334b
// -0.920057
0xbb5c
// 0.028790
0x275f
// -0.002111
0x9853
// -0.448033
0xb72b
// 0.259638
0x3427
// -0.284893
0xb48f
// -0.172468
0xb185
// 0.304282
0x34de
// 0.130491
0x302d
// -0.206776
0xb29e
// 0.384068
0x3625
// 0.208473
0x32ac
// -0.097562
0xae3e
// 0.121821
0x2fcc
// -0.611990
0xb8e5
// -0.062335
0xabfb
// 0.614710
0x38eb
// 0.438926
0x3706
// 0.195912
0x3245
// 0.081082
0x2d30
// 0.012720
0x2283
// 0.258657
0x3423
// -0.268801
0xb44d
// -0.183050
0xb1dc
// 0.087615
0x2d9b
// -0.427434
0xb6d7
// 0.301905
0x34d5
// -0.236141
0xb38e
// 0.188230
0x3206
// -0.089373
0xadb8
// -0.294443
0xb4b6
// 0.506253
0x380d
// 0.841140
0x3abb
// 0.104046
0x2ea9
// 0.285712
0x3492
// -0.725796
0xb9ce
// 0.649891
0x3933
// 0.663543
0x394f
// -0.062934
0xac07
// -0.387710
0xb634
// 0.531890
0x3841
// 0.277675
0x3471
// 0.319026
0x351b
// 0.072518
0x2ca4
// 0.637432
0x3919
// -0.316837
0xb512
// 0.031885
0x2815
// -0.109017
0xaefa
// -0.531561
0xb841
// -0.116513
0xaf75
// 0.005209
0x1d56
// -0.178215
0xb1b4
// 0.096452
0x2e2c
// -0.089155
0xadb5
// -0.193966
0xb235
// -0.318435
0xb518
// -0.137340
0xb065
// 0.325371
0x3535
// -0.413025
0xb69c
// -0.007530
0x9fb6
// -0.011499
0xa1e3
// -0.359144
0xb5bf
// -0.317911
0xb516
// 0.018568
0x24c1
// -0.366742
0xb5de
// 0.221775
0x3319
// 0.420017
0x36b8
// -0.040939
0xa93d
// -0.362740
0xb5ce
// 0.471219
0x378a
// -0.099621
0xae60
// 0.326052
0x3538
// 0.311047
0x34fa
// 0.156421
0x3101
// -0.068640
0xac65
// -0.095922
0xae24
// -0.013959
0xa326
// 0.607825
0x38dd
// 0.093427
0x2dfb
// 0.329927
0x3547
// 0.084851
0x2d6e
// 0.726006
0x39cf
// 0.248801
0x33f6
// 0.620044
0x38f6
// 0.164699
0x3145
// 0.045769
0x29dc
// 0.264445
0x343b
// -0.238087
0xb39e
// 0.220883
0x3311
// 0.018551
0x24c0
// -0.057134
0xab50
// -0.155357
0xb0f9
// 0.038525
0x28ee
// 0.208492
0x32ac
// -0.098819
0xae53
// 0.027140
0x26f3
// -0.333135
0xb555
// -0.423594
0xb6c7
// 0.521231
0x382b
// 0.406043
0x367f
// -0.457325
0xb751
// 0.089960
0x2dc2
// -0.107212
0xaedd
// 0.089652
0x2dbd
// -0.269460
0xb450
// 0.155036
0x30f6
// 0.024048
0x2628
// 0.220735
0x3310
// 0.032031
0x281a
// -0.567049
0xb889
// 0.145897
0x30ab
// -0.094783
0xae11
// 0.319032
0x351b
// -0.091891
0xade2
// 0.416962
0x36ac
// 0.093970
0x2e04
// 0.564895
0x3885
// -0.296964
0xb4c0
// -0.209322
0xb2b3
// 0.265009
0x343d
// 0.093215
0x2df7
// 0.622832
0x38fc
// -0.085788
0xad7e
// 0.670554
0x395d
// 0.032468
0x2828
// 0.118023
0x2f8e
// -0.269207
0xb44f
// 0.217617
0x32f7
// 0.213691
0x32d7
// 0.439040
0x3706
// 0.241885
0x33be
// -0.424515
0xb6cb
// 0.352380
0x35a3
// 0.588583
0x38b5
// -0.264797
0xb43d
// 0.329184
0x3544
// 0.034001
0x285a
// -0.423064
0xb6c5
// -0.608316
0xb8de
// -0.338928
0xb56c
// 0.419995
0x36b8
// 0.200555
0x326b
// 0.329638
0x3546
// -0.294240
0xb4b5
// -0.897858
0xbb2f
// 0.160219
0x3121
// 0.131756
0x3037
// 0.206411
0x329b
// 0.109237
0x2efe
// -0.367268
0xb5e0
// 0.292430
0x34ae
// -0.414400
0xb6a1
// -0.642448
0xb924
// 0.238399
0x33a1
// 0.090387
0x2dc9
// -0.512754
0xb81a
// 0.301373
0x34d2
// -0.466867
0xb778
// 0.204287
0x328a
// -0.229499
0xb358
// -0.119896
0xafac
// 0.440248
0x370b
// 0.649995
0x3933
// 0.129477
0x3025
// 0.241037
0x33b7
// -0.411964
0xb697
// 0.228133
0x334d
// 0.942283
0x3b8a
// -0.390976
0xb641
// 0.182779
0x31d9
// 0.228995
0x3354
// 0.126382
0x300b
// 0.225140
0x3334
// -0.214251
0xb2db
// 0.439711
0x3709
// -0.638072
0xb91b
// -0.667301
0xb957
// -0.353387
0xb5a7
// 0.329438
0x3545
// -0.543036
0xb858
// -0.195706
0xb243
// -0.000314
0x8d26
// -0.346311
0xb58a
// -0.040030
0xa920
// 0.309919
0x34f5
// 0.214685
0x32df
// -0.256227
0xb41a
// 0.256241
0x341a
// 0.423187
0x36c5
// -0.070894
0xac8a
// -0.408192
0xb688
// 0.258732
0x3424
// 0.743039
0x39f2
// -0.328534
0xb542
// -0.502412
0xb805
// -0.550943
0xb868
// 0.461636
0x3763
// -0.098335
0xae4b
// -0.331961
0xb550
// 0.502005
0x3804
// -0.060550
0xabc0
// -0.218616
0xb2ff
// 0.206607
0x329d
// 0.509390
0x3813
// 0.331278
0x354d
// -0.143708
0xb099
// 0.008236
0x2038
// -0.256486
0xb41b
// -0.154828
0xb0f4
// -0.606731
0xb8db
// 0.043363
0x298d
// 0.416313
0x36a9
// 0.132691
0x303f
// 0.716789
0x39bc
// 0.827380
0x3a9e
// 0.109746
0x2f06
// 0.480993
0x37b2
// -0.424777
0xb6cc
// -0.169704
0xb16e
// -0.095902
0xae23
// 0.022081
0x25a7
// -0.227175
0xb345
// 0.382023
0x361d
// 0.316215
0x350f
// -0.027787
0xa71d
// 0.107868
0x2ee7
// -0.091834
0xade1

@ -0,0 +1,6 @@
H
2
// -0.584459
0xb8ad
// 0.027514
0x270b

@ -0,0 +1,6 @@
H
2
// -1.063155
0xbc41
// 0.204536
0x328c

@ -0,0 +1,6 @@
H
2
// -2.020148
0xc00a
// -0.083691
0xad5b

@ -0,0 +1,514 @@
H
256
// 0.211657
0x32c6
// 0.164180
0x3141
// 0.394092
0x364e
// 0.837031
0x3ab2
// 0.203042
0x327f
// 0.594290
0x38c1
// 0.425438
0x36cf
// 0.692208
0x398a
// 0.478904
0x37aa
// 0.731179
0x39d9
// 0.228972
0x3354
// 0.423292
0x36c6
// 0.687425
0x3980
// 0.637676
0x391a
// 0.400567
0x3669
// 0.284900
0x348f
// 0.800826
0x3a68
// 0.530918
0x383f
// 0.899030
0x3b31
// 0.067538
0x2c53
// 0.418694
0x36b3
// 0.422016
0x36c1
// 0.600117
0x38cd
// 0.096654
0x2e30
// 0.382849
0x3620
// 0.553403
0x386d
// 0.109789
0x2f07
// 0.740600
0x39ed
// 0.830944
0x3aa6
// 0.426594
0x36d3
// 0.281352
0x3480
// 0.270642
0x3455
// 0.354207
0x35ab
// 1.046783
0x3c30
// 0.937881
0x3b81
// 0.476101
0x379e
// 0.369946
0x35eb
// 0.334837
0x355b
// 0.319491
0x351d
// 0.119662
0x2fa9
// 0.664848
0x3952
// 0.641369
0x3922
// 0.414986
0x36a4
// 0.824391
0x3a98
// 0.404576
0x3679
// 0.283363
0x3489
// 0.467747
0x377c
// 0.467828
0x377c
// 0.140330
0x307e
// 0.402280
0x3670
// 1.003391
0x3c03
// 0.144475
0x30a0
// 0.461048
0x3760
// 0.468613
0x377f
// 0.609266
0x38e0
// 0.560596
0x387c
// 0.410914
0x3693
// 0.196930
0x324d
// 0.256623
0x341b
// 0.435163
0x36f6
// 0.324492
0x3531
// 0.371618
0x35f2
// 0.218667
0x32ff
// 0.324592
0x3532
// 0.095539
0x2e1d
// 0.268452
0x344c
// 0.399025
0x3662
// 0.410560
0x3692
// 0.535278
0x3848
// 0.300637
0x34cf
// 0.413966
0x36a0
// 0.263911
0x3439
// 0.227363
0x3347
// 0.245900
0x33de
// 0.444928
0x371e
// 0.486777
0x37ca
// 0.388128
0x3636
// 0.303929
0x34dd
// 0.320661
0x3521
// 0.225175
0x3335
// 0.817364
0x3a8a
// 0.479373
0x37ac
// 0.857010
0x3adb
// 0.356348
0x35b4
// 0.134285
0x304c
// 0.324278
0x3530
// 0.568790
0x388d
// 0.302070
0x34d5
// 0.224479
0x332f
// 0.394179
0x364f
// 0.577446
0x389f
// 0.250274
0x3401
// 0.531677
0x3841
// 0.424735
0x36cc
// 0.653466
0x393a
// 0.316197
0x350f
// 0.227789
0x334a
// 0.184250
0x31e5
// 0.090642
0x2dcd
// 0.462727
0x3767
// 0.344118
0x3582
// 0.245310
0x33da
// 0.100303
0x2e6b
// 0.447675
0x372a
// 0.691568
0x3988
// 0.363492
0x35d1
// 0.459596
0x375b
// 0.559206
0x3879
// 0.119232
0x2fa1
// 0.691377
0x3988
// 0.423896
0x36c8
// 0.092387
0x2dea
// 0.016503
0x243a
// 0.389855
0x363d
// 0.310975
0x34fa
// 0.508563
0x3812
// 0.446708
0x3726
// 0.292022
0x34ac
// 0.280445
0x347d
// 0.371688
0x35f2
// 0.231185
0x3366
// 0.468586
0x377f
// 0.037421
0x28ca
// 0.483864
0x37be
// 0.434319
0x36f3
// 0.164206
0x3141
// 0.584744
0x38ae
// 0.264999
0x343d
// 0.371643
0x35f2
// 0.662050
0x394c
// 0.566702
0x3889
// 0.496748
0x37f3
// 0.529369
0x383c
// 0.397883
0x365e
// 0.503458
0x3807
// 0.154150
0x30ef
// 0.435808
0x36f9
// 0.939636
0x3b84
// 0.347287
0x358e
// 0.202447
0x327a
// 0.834662
0x3aad
// 0.522830
0x382f
// 0.539743
0x3851
// 0.390797
0x3641
// 0.269937
0x3452
// 0.528511
0x383a
// 0.142120
0x308c
// 0.507329
0x380f
// 0.206810
0x329e
// 0.793935
0x3a5a
// 0.835657
0x3aaf
// 0.670280
0x395d
// 0.113984
0x2f4c
// 0.472458
0x378f
// 0.575082
0x389a
// 0.732740
0x39dd
// 0.591795
0x38bc
// 0.546775
0x3860
// 0.296527
0x34bf
// 0.261418
0x342f
// 0.571263
0x3892
// 0.589192
0x38b7
// 0.822382
0x3a94
// 0.361338
0x35c8
// 0.153735
0x30eb
// 0.466681
0x3778
// 0.393489
0x364c
// 0.721908
0x39c6
// 0.086062
0x2d82
// 0.331373
0x354d
// 0.621595
0x38f9
// 0.516218
0x3821
// 0.771446
0x3a2c
// 0.420211
0x36b9
// 0.422246
0x36c2
// 0.338301
0x356a
// 0.745863
0x39f8
// 0.650429
0x3934
// 0.550667
0x3868
// 0.412579
0x369a
// 0.288576
0x349e
// 0.353537
0x35a8
// 0.225252
0x3335
// 0.194284
0x3238
// 0.547930
0x3862
// 0.300970
0x34d1
// 0.504146
0x3808
// 1.026896
0x3c1c
// 0.418343
0x36b2
// 0.627205
0x3905
// 0.230780
0x3363
// 0.145178
0x30a5
// 0.384564
0x3627
// 0.278455
0x3475
// 0.341747
0x3578
// 0.448953
0x372f
// 0.233506
0x3379
// 0.229044
0x3354
// 0.261151
0x342e
// 0.582315
0x38a9
// 0.214435
0x32dd
// 0.195282
0x3240
// 0.422011
0x36c1
// 1.204601
0x3cd2
// 0.681809
0x3974
// 0.108645
0x2ef4
// 0.054477
0x2af9
// 0.269794
0x3451
// 0.138462
0x306e
// 0.381523
0x361b
// 0.225135
0x3334
// 0.204132
0x3288
// 0.285641
0x3492
// 0.521935
0x382d
// 0.670786
0x395e
// 0.601863
0x38d1
// 0.117244
0x2f81
// 0.804322
0x3a6f
// 0.522114
0x382d
// 0.456285
0x374d
// 0.325889
0x3537
// 0.052211
0x2aaf
// 0.471914
0x378d
// 0.620330
0x38f6
// 0.183729
0x31e1
// 0.612658
0x38e7
// 0.364133
0x35d3
// 0.520880
0x382b
// 0.693829
0x398d
// 0.146768
0x30b2
// 0.350727
0x359d
// 0.572416
0x3894
// 0.145950
0x30ac
// 0.659216
0x3946
// 0.232441
0x3370
// 0.365908
0x35db
// 0.533640
0x3845
// 0.578456
0x38a1
// 0.166925
0x3157
// 0.439703
0x3709
// 0.427486
0x36d7
// 0.461784
0x3763
// 0.292177
0x34ad
// 0.461312
0x3762
// 0.543533
0x3859
// 0.435610
0x36f8
// 0.237356
0x3398
// 0.366056
0x35db
// 0.196488
0x324a
// 0.022935
0x25df
// 0.359284
0x35c0
// 0.409479
0x368d
// 0.228371
0x334f
// 0.124286
0x2ff4
// 0.434761
0x36f5
// 0.629995
0x390a

@ -0,0 +1,514 @@
H
256
// 0.044799
0x29bc
// 0.026955
0x26e7
// 0.155308
0x30f8
// 0.700622
0x399b
// 0.041226
0x2947
// 0.353181
0x35a7
// 0.180997
0x31cb
// 0.479153
0x37ab
// 0.229349
0x3357
// 0.534623
0x3847
// 0.052428
0x2ab6
// 0.179176
0x31bc
// 0.472553
0x3790
// 0.406630
0x3682
// 0.160454
0x3122
// 0.081168
0x2d32
// 0.641322
0x3921
// 0.281874
0x3483
// 0.808254
0x3a77
// 0.004561
0x1cac
// 0.175304
0x319c
// 0.178098
0x31b3
// 0.360140
0x35c3
// 0.009342
0x20c8
// 0.146573
0x30b1
// 0.306255
0x34e6
// 0.012054
0x222c
// 0.548488
0x3863
// 0.690467
0x3986
// 0.181983
0x31d3
// 0.079159
0x2d11
// 0.073247
0x2cb0
// 0.125462
0x3004
// 1.095754
0x3c62
// 0.879620
0x3b09
// 0.226672
0x3341
// 0.136860
0x3061
// 0.112116
0x2f2d
// 0.102075
0x2e88
// 0.014319
0x2355
// 0.442023
0x3713
// 0.411355
0x3695
// 0.172214
0x3183
// 0.679620
0x3970
// 0.163681
0x313d
// 0.080295
0x2d24
// 0.218787
0x3300
// 0.218863
0x3301
// 0.019693
0x250b
// 0.161829
0x312e
// 1.006794
0x3c07
// 0.020873
0x2558
// 0.212565
0x32cd
// 0.219598
0x3307
// 0.371205
0x35f0
// 0.314268
0x3507
// 0.168850
0x3167
// 0.038782
0x28f7
// 0.065855
0x2c37
// 0.189367
0x320f
// 0.105295
0x2ebd
// 0.138100
0x306b
// 0.047815
0x2a1f
// 0.105360
0x2ebe
// 0.009128
0x20ac
// 0.072067
0x2c9d
// 0.159221
0x3118
// 0.168559
0x3165
// 0.286522
0x3496
// 0.090382
0x2dc9
// 0.171368
0x317c
// 0.069649
0x2c75
// 0.051694
0x2a9e
// 0.060467
0x2bbd
// 0.197961
0x3256
// 0.236952
0x3395
// 0.150644
0x30d2
// 0.092373
0x2de9
// 0.102823
0x2e95
// 0.050704
0x2a7d
// 0.668084
0x3958
// 0.229798
0x335b
// 0.734466
0x39e0
// 0.126984
0x3010
// 0.018033
0x249e
// 0.105157
0x2ebb
// 0.323522
0x352d
// 0.091246
0x2dd7
// 0.050391
0x2a73
// 0.155377
0x30f9
// 0.333444
0x3556
// 0.062637
0x2c02
// 0.282681
0x3486
// 0.180400
0x31c6
// 0.427018
0x36d5
// 0.099980
0x2e66
// 0.051888
0x2aa4
// 0.033948
0x2858
// 0.008216
0x2035
// 0.214116
0x32da
// 0.118417
0x2f94
// 0.060177
0x2bb4
// 0.010061
0x2127
// 0.200412
0x326a
// 0.478267
0x37a7
// 0.132127
0x303a
// 0.211228
0x32c2
// 0.312711
0x3501
// 0.014216
0x2347
// 0.478003
0x37a6
// 0.179688
0x31c0
// 0.008535
0x205f
// 0.000272
0xc76
// 0.151987
0x30dd
// 0.096705
0x2e30
// 0.258636
0x3423
// 0.199548
0x3263
// 0.085277
0x2d75
// 0.078650
0x2d09
// 0.138152
0x306c
// 0.053447
0x2ad7
// 0.219573
0x3307
// 0.001400
0x15bc
// 0.234124
0x337e
// 0.188633
0x3209
// 0.026963
0x26e7
// 0.341926
0x3579
// 0.070225
0x2c7f
// 0.138118
0x306b
// 0.438310
0x3703
// 0.321151
0x3523
// 0.246759
0x33e5
// 0.280232
0x347c
// 0.158311
0x3111
// 0.253470
0x340e
// 0.023762
0x2615
// 0.189928
0x3214
// 0.882917
0x3b10
// 0.120608
0x2fb8
// 0.040985
0x293f
// 0.696661
0x3993
// 0.273351
0x3460
// 0.291322
0x34a9
// 0.152722
0x30e3
// 0.072866
0x2caa
// 0.279324
0x3478
// 0.020198
0x252c
// 0.257383
0x341e
// 0.042770
0x2979
// 0.630333
0x390b
// 0.698322
0x3996
// 0.449275
0x3730
// 0.012992
0x22a7
// 0.223217
0x3325
// 0.330719
0x354b
// 0.536907
0x384c
// 0.350221
0x359b
// 0.298963
0x34c9
// 0.087928
0x2da1
// 0.068340
0x2c60
// 0.326342
0x3539
// 0.347147
0x358e
// 0.676312
0x3969
// 0.130565
0x302e
// 0.023634
0x260d
// 0.217791
0x32f8
// 0.154834
0x30f4
// 0.521151
0x382b
// 0.007407
0x1f96
// 0.109808
0x2f07
// 0.386380
0x362f
// 0.266481
0x3444
// 0.595129
0x38c3
// 0.176577
0x31a7
// 0.178291
0x31b5
// 0.114447
0x2f53
// 0.556312
0x3873
// 0.423058
0x36c5
// 0.303234
0x34da
// 0.170221
0x3172
// 0.083276
0x2d54
// 0.124988
0x3000
// 0.050738
0x2a7f
// 0.037746
0x28d5
// 0.300227
0x34ce
// 0.090583
0x2dcc
// 0.254164
0x3411
// 1.054516
0x3c38
// 0.175011
0x319a
// 0.393386
0x364b
// 0.053260
0x2ad1
// 0.021077
0x2565
// 0.147889
0x30bc
// 0.077537
0x2cf6
// 0.116791
0x2f7a
// 0.201558
0x3273
// 0.054525
0x2afb
// 0.052461
0x2ab7
// 0.068200
0x2c5d
// 0.339091
0x356d
// 0.045982
0x29e3
// 0.038135
0x28e2
// 0.178093
0x31b3
// 1.451062
0x3dce
// 0.464864
0x3770
// 0.011804
0x220b
// 0.002968
0x1a14
// 0.072789
0x2ca9
// 0.019172
0x24e8
// 0.145560
0x30a8
// 0.050686
0x2a7d
// 0.041670
0x2955
// 0.081591
0x2d39
// 0.272416
0x345c
// 0.449953
0x3733
// 0.362239
0x35cc
// 0.013746
0x230a
// 0.646934
0x392d
// 0.272603
0x345d
// 0.208196
0x32aa
// 0.106203
0x2ecc
// 0.002726
0x1995
// 0.222703
0x3320
// 0.384809
0x3628
// 0.033756
0x2852
// 0.375349
0x3601
// 0.132593
0x303e
// 0.271316
0x3457
// 0.481399
0x37b4
// 0.021541
0x2584
// 0.123010
0x2fdf
// 0.327660
0x353e
// 0.021302
0x2574
// 0.434565
0x36f4
// 0.054029
0x2aea
// 0.133889
0x3049
// 0.284771
0x348e
// 0.334612
0x355b
// 0.027864
0x2722
// 0.193338
0x3230
// 0.182744
0x31d9
// 0.213245
0x32d3
// 0.085367
0x2d77
// 0.212809
0x32cf
// 0.295428
0x34ba
// 0.189756
0x3212
// 0.056338
0x2b36
// 0.133997
0x304a
// 0.038607
0x28f1
// 0.000526
0x104f
// 0.129085
0x3021
// 0.167673
0x315e
// 0.052153
0x2aad
// 0.015447
0x23e9
// 0.189017
0x320c
// 0.396894
0x365a

@ -0,0 +1,6 @@
H
2
// -0.368091
0xb5e4
// -0.861249
0xbae4

@ -0,0 +1,308 @@
#include "ComplexTestsF16.h"
#include <stdio.h>
#include "Error.h"
#define SNR_THRESHOLD 40
#define REL_ERROR (6.0e-2)
void ComplexTestsF16::test_cmplx_conj_f16()
{
const float16_t *inp1=input1.ptr();
float16_t *outp=output.ptr();
arm_cmplx_conj_f16(inp1,outp,input1.nbSamples() >> 1 );
ASSERT_EMPTY_TAIL(output);
ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(output,ref,REL_ERROR);
}
void ComplexTestsF16::test_cmplx_dot_prod_f16()
{
float16_t re,im;
const float16_t *inp1=input1.ptr();
const float16_t *inp2=input2.ptr();
float16_t *outp=output.ptr();
arm_cmplx_dot_prod_f16(inp1,inp2,input1.nbSamples() >> 1,&re,&im);
outp[0] = re;
outp[1] = im;
ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(output,ref,REL_ERROR);
ASSERT_EMPTY_TAIL(output);
}
void ComplexTestsF16::test_cmplx_mag_f16()
{
const float16_t *inp1=input1.ptr();
float16_t *outp=output.ptr();
arm_cmplx_mag_f16(inp1,outp,input1.nbSamples() >> 1 );
ASSERT_EMPTY_TAIL(output);
ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(output,ref,REL_ERROR);
}
void ComplexTestsF16::test_cmplx_mag_squared_f16()
{
const float16_t *inp1=input1.ptr();
float16_t *outp=output.ptr();
arm_cmplx_mag_squared_f16(inp1,outp,input1.nbSamples() >> 1 );
ASSERT_EMPTY_TAIL(output);
ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(output,ref,REL_ERROR);
}
void ComplexTestsF16::test_cmplx_mult_cmplx_f16()
{
const float16_t *inp1=input1.ptr();
const float16_t *inp2=input2.ptr();
float16_t *outp=output.ptr();
arm_cmplx_mult_cmplx_f16(inp1,inp2,outp,input1.nbSamples() >> 1 );
ASSERT_EMPTY_TAIL(output);
ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(output,ref,REL_ERROR);
}
void ComplexTestsF16::test_cmplx_mult_real_f16()
{
const float16_t *inp1=input1.ptr();
const float16_t *inp2=input2.ptr();
float16_t *outp=output.ptr();
arm_cmplx_mult_real_f16(inp1,inp2,outp,input1.nbSamples() >> 1 );
ASSERT_EMPTY_TAIL(output);
ASSERT_SNR(output,ref,(float16_t)SNR_THRESHOLD);
ASSERT_REL_ERROR(output,ref,REL_ERROR);
}
void ComplexTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr)
{
Testing::nbSamples_t nb=MAX_NB_SAMPLES;
(void)params;
switch(id)
{
case ComplexTestsF16::TEST_CMPLX_CONJ_F16_1:
nb = 7;
ref.reload(ComplexTestsF16::REF_CONJ_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_CONJ_F16_2:
nb = 16;
ref.reload(ComplexTestsF16::REF_CONJ_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_CONJ_F16_3:
nb = 23;
ref.reload(ComplexTestsF16::REF_CONJ_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_DOT_PROD_F16_4:
nb = 7;
ref.reload(ComplexTestsF16::REF_DOT_PROD_3_F16_ID,mgr);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_DOT_PROD_F16_5:
nb = 16;
ref.reload(ComplexTestsF16::REF_DOT_PROD_4N_F16_ID,mgr);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_DOT_PROD_F16_6:
nb = 23;
ref.reload(ComplexTestsF16::REF_DOT_PROD_4N1_F16_ID,mgr);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MAG_F16_7:
nb = 7;
ref.reload(ComplexTestsF16::REF_MAG_F16_ID,mgr,nb);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MAG_F16_8:
nb = 16;
ref.reload(ComplexTestsF16::REF_MAG_F16_ID,mgr,nb);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MAG_F16_9:
nb = 23;
ref.reload(ComplexTestsF16::REF_MAG_F16_ID,mgr,nb);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MAG_SQUARED_F16_10:
nb = 7;
ref.reload(ComplexTestsF16::REF_MAG_SQUARED_F16_ID,mgr,nb);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MAG_SQUARED_F16_11:
nb = 16;
ref.reload(ComplexTestsF16::REF_MAG_SQUARED_F16_ID,mgr,nb);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MAG_SQUARED_F16_12:
nb = 23;
ref.reload(ComplexTestsF16::REF_MAG_SQUARED_F16_ID,mgr,nb);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MULT_CMPLX_F16_13:
nb = 7;
ref.reload(ComplexTestsF16::REF_CMPLX_MULT_CMPLX_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MULT_CMPLX_F16_14:
nb = 16;
ref.reload(ComplexTestsF16::REF_CMPLX_MULT_CMPLX_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MULT_CMPLX_F16_15:
nb = 23;
ref.reload(ComplexTestsF16::REF_CMPLX_MULT_CMPLX_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MULT_REAL_F16_16:
nb = 7;
ref.reload(ComplexTestsF16::REF_CMPLX_MULT_REAL_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT3_F16_ID,mgr,nb);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MULT_REAL_F16_17:
nb = 16;
ref.reload(ComplexTestsF16::REF_CMPLX_MULT_REAL_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT3_F16_ID,mgr,nb);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MULT_REAL_F16_18:
nb = 23;
ref.reload(ComplexTestsF16::REF_CMPLX_MULT_REAL_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT3_F16_ID,mgr,nb);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_CONJ_F16_19:
ref.reload(ComplexTestsF16::REF_CONJ_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_DOT_PROD_F16_20:
ref.reload(ComplexTestsF16::REF_DOT_PROD_LONG_F16_ID,mgr);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MAG_F16_21:
ref.reload(ComplexTestsF16::REF_MAG_F16_ID,mgr,nb);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MAG_SQUARED_F16_22:
ref.reload(ComplexTestsF16::REF_MAG_SQUARED_F16_ID,mgr,nb);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MULT_CMPLX_F16_23:
ref.reload(ComplexTestsF16::REF_CMPLX_MULT_CMPLX_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT2_F16_ID,mgr,nb << 1);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
case ComplexTestsF16::TEST_CMPLX_MULT_REAL_F16_24:
ref.reload(ComplexTestsF16::REF_CMPLX_MULT_REAL_F16_ID,mgr,nb << 1);
input1.reload(ComplexTestsF16::INPUT1_F16_ID,mgr,nb << 1);
input2.reload(ComplexTestsF16::INPUT3_F16_ID,mgr,nb);
output.create(ref.nbSamples(),ComplexTestsF16::OUT_SAMPLES_F16_ID,mgr);
break;
}
}
void ComplexTestsF16::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)
{
(void)id;
output.dump(mgr);
}

@ -77,6 +77,69 @@ group Root {
} }
group Complex Tests {
class = ComplexTests
folder = ComplexMaths
suite Complex Tests F16{
class = ComplexTestsF16
folder = ComplexMathsF16
Pattern INPUT1_F16_ID : Input1_f16.txt
Pattern INPUT2_F16_ID : Input2_f16.txt
Pattern INPUT3_F16_ID : Input3_f16.txt
Pattern REF_CONJ_F16_ID : Reference1_f16.txt
Pattern REF_DOT_PROD_3_F16_ID : Reference2_f16.txt
Pattern REF_DOT_PROD_4N_F16_ID : Reference3_f16.txt
Pattern REF_DOT_PROD_4N1_F16_ID : Reference4_f16.txt
Pattern REF_MAG_F16_ID : Reference5_f16.txt
Pattern REF_MAG_SQUARED_F16_ID : Reference6_f16.txt
Pattern REF_CMPLX_MULT_CMPLX_F16_ID : Reference7_f16.txt
Pattern REF_CMPLX_MULT_REAL_F16_ID : Reference8_f16.txt
Pattern REF_DOT_PROD_LONG_F16_ID : Reference9_f16.txt
Output OUT_SAMPLES_F16_ID : Output
Output OUT_STATE_F16_ID : State
Functions {
Test nb=3 arm_cmplx_conj_f16:test_cmplx_conj_f16
Test nb=4n arm_cmplx_conj_f16:test_cmplx_conj_f16
Test nb=4n+1 arm_cmplx_conj_f16:test_cmplx_conj_f16
Test nb=3 arm_cmplx_dot_prod_f16:test_cmplx_dot_prod_f16
Test nb=4n arm_cmplx_dot_prod_f16:test_cmplx_dot_prod_f16
Test nb=4n+1 arm_cmplx_dot_prod_f16:test_cmplx_dot_prod_f16
Test nb=3 arm_cmplx_mag_f16:test_cmplx_mag_f16
Test nb=4n arm_cmplx_mag_f16:test_cmplx_mag_f16
Test nb=4n+1 arm_cmplx_mag_f16:test_cmplx_mag_f16
Test nb=3 arm_cmplx_mag_squared_f16:test_cmplx_mag_squared_f16
Test nb=4n arm_cmplx_mag_squared_f16:test_cmplx_mag_squared_f16
Test nb=4n+1 arm_cmplx_mag_squared_f16:test_cmplx_mag_squared_f16
Test nb=3 arm_cmplx_mult_cmplx_f16:test_cmplx_mult_cmplx_f16
Test nb=4n arm_cmplx_mult_cmplx_f16:test_cmplx_mult_cmplx_f16
Test nb=4n+1 arm_cmplx_mult_cmplx_f16:test_cmplx_mult_cmplx_f16
Test nb=3 arm_cmplx_mult_real_f16:test_cmplx_mult_real_f16
Test nb=4n arm_cmplx_mult_real_f16:test_cmplx_mult_real_f16
Test nb=4n+1 arm_cmplx_mult_real_f16:test_cmplx_mult_real_f16
Test long arm_cmplx_conj_f16:test_cmplx_conj_f16
Test long arm_cmplx_dot_prod_f16:test_cmplx_dot_prod_f16
Test long arm_cmplx_mag_f16:test_cmplx_mag_f16
Test long arm_cmplx_mag_squared_f16:test_cmplx_mag_squared_f16
Test long arm_cmplx_mult_cmplx_f16:test_cmplx_mult_cmplx_f16
Test long arm_cmplx_mult_real_f16:test_cmplx_mult_real_f16
}
}
}
group Transform Tests { group Transform Tests {
class = TransformTests class = TransformTests
folder = Transform folder = Transform

Loading…
Cancel
Save