You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
CMSIS-DSP/Source/TransformFunctions/arm_cfft_radix2_f16.c

480 lines
13 KiB
C

/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_f16.c
* Description: Radix-2 Decimation in Frequency CFFT & CIFFT Floating point processing function
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/transform_functions_f16.h"
/**
* @defgroup ComplexFFTDeprecated Deprecated Complex FFT functions
*/
#if defined(ARM_FLOAT16_SUPPORTED)
void arm_radix2_butterfly_f16(
float16_t * pSrc,
uint32_t fftLen,
const float16_t * pCoef,
uint16_t twidCoefModifier);
void arm_radix2_butterfly_inverse_f16(
float16_t * pSrc,
uint32_t fftLen,
const float16_t * pCoef,
uint16_t twidCoefModifier,
float16_t onebyfftLen);
extern void arm_bitreversal_f16(
float16_t * pSrc,
uint16_t fftSize,
uint16_t bitRevFactor,
const uint16_t * pBitRevTab);
/**
@ingroup ComplexFFT
*/
/**
@addtogroup ComplexFFTDeprecated
@{
*/
/**
@brief Radix-2 CFFT/CIFFT.
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_f16 and will be removed in the future
@param[in] S points to an instance of the floating-point Radix-2 CFFT/CIFFT structure
@param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
@return none
*/
void arm_cfft_radix2_f16(
const arm_cfft_radix2_instance_f16 * S,
float16_t * pSrc)
{
if (S->ifftFlag == 1U)
{
/* Complex IFFT radix-2 */
arm_radix2_butterfly_inverse_f16(pSrc, S->fftLen, S->pTwiddle,
S->twidCoefModifier, S->onebyfftLen);
}
else
{
/* Complex FFT radix-2 */
arm_radix2_butterfly_f16(pSrc, S->fftLen, S->pTwiddle,
S->twidCoefModifier);
}
if (S->bitReverseFlag == 1U)
{
/* Bit Reversal */
arm_bitreversal_f16(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
}
}
/**
@} end of ComplexFFTDeprecated group
*/
/* ----------------------------------------------------------------------
** Internal helper function used by the FFTs
** ------------------------------------------------------------------- */
/*
* @brief Core function for the floating-point CFFT butterfly process.
* @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
* @param[in] fftLen length of the FFT.
* @param[in] *pCoef points to the twiddle coefficient buffer.
* @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_radix2_butterfly_f16(
float16_t * pSrc,
uint32_t fftLen,
const float16_t * pCoef,
uint16_t twidCoefModifier)
{
uint32_t i, j, k, l;
uint32_t n1, n2, ia;
float16_t xt, yt, cosVal, sinVal;
float16_t p0, p1, p2, p3;
float16_t a0, a1;
#if defined (ARM_MATH_DSP)
/* Initializations for the first stage */
n2 = fftLen >> 1;
ia = 0;
i = 0;
// loop for groups
for (k = n2; k > 0; k--)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
/* Twiddle coefficients index modifier */
ia += twidCoefModifier;
/* index calculation for the input as, */
/* pSrc[i + 0], pSrc[i + fftLen/1] */
l = i + n2;
/* Butterfly implementation */
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
p0 = (_Float16)xt * (_Float16)cosVal;
p1 = (_Float16)yt * (_Float16)sinVal;
p2 = (_Float16)yt * (_Float16)cosVal;
p3 = (_Float16)xt * (_Float16)sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = (_Float16)p0 + (_Float16)p1;
pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
i++;
} // groups loop end
twidCoefModifier <<= 1U;
// loop for stage
for (k = n2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
p0 = (_Float16)xt * (_Float16)cosVal;
p1 = (_Float16)yt * (_Float16)sinVal;
p2 = (_Float16)yt * (_Float16)cosVal;
p3 = (_Float16)xt * (_Float16)sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = (_Float16)p0 + (_Float16)p1;
pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
i += n1;
} while ( i < fftLen ); // butterfly loop end
j++;
} while ( j < n2); // groups loop end
twidCoefModifier <<= 1U;
} // stages loop end
// loop for butterfly
for (i = 0; i < fftLen; i += 2)
{
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * i + 2];
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * i + 2];
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * i + 3];
a1 = (_Float16)pSrc[2 * i + 3] + (_Float16)pSrc[2 * i + 1];
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * i + 2] = xt;
pSrc[2 * i + 3] = yt;
} // groups loop end
#else
n2 = fftLen;
// loop for stage
for (k = fftLen; k > 1; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
p0 = (_Float16)xt * (_Float16)cosVal;
p1 = (_Float16)yt * (_Float16)sinVal;
p2 = (_Float16)yt * (_Float16)cosVal;
p3 = (_Float16)xt * (_Float16)sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = (_Float16)p0 + (_Float16)p1;
pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
i += n1;
} while (i < fftLen);
j++;
} while (j < n2);
twidCoefModifier <<= 1U;
}
#endif // #if defined (ARM_MATH_DSP)
}
void arm_radix2_butterfly_inverse_f16(
float16_t * pSrc,
uint32_t fftLen,
const float16_t * pCoef,
uint16_t twidCoefModifier,
float16_t onebyfftLen)
{
uint32_t i, j, k, l;
uint32_t n1, n2, ia;
float16_t xt, yt, cosVal, sinVal;
float16_t p0, p1, p2, p3;
float16_t a0, a1;
#if defined (ARM_MATH_DSP)
n2 = fftLen >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
l = i + n2;
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
p0 = (_Float16)xt * (_Float16)cosVal;
p1 = (_Float16)yt * (_Float16)sinVal;
p2 = (_Float16)yt * (_Float16)cosVal;
p3 = (_Float16)xt * (_Float16)sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = (_Float16)p0 - (_Float16)p1;
pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
} // groups loop end
twidCoefModifier <<= 1U;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
p0 = (_Float16)xt * (_Float16)cosVal;
p1 = (_Float16)yt * (_Float16)sinVal;
p2 = (_Float16)yt * (_Float16)cosVal;
p3 = (_Float16)xt * (_Float16)sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = (_Float16)p0 - (_Float16)p1;
pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
i += n1;
} while ( i < fftLen ); // butterfly loop end
j++;
} while (j < n2); // groups loop end
twidCoefModifier <<= 1U;
} // stages loop end
// loop for butterfly
for (i = 0; i < fftLen; i += 2)
{
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * i + 2];
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * i + 2];
a1 = (_Float16)pSrc[2 * i + 3] + (_Float16)pSrc[2 * i + 1];
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * i + 3];
p0 = (_Float16)a0 * (_Float16)onebyfftLen;
p2 = (_Float16)xt * (_Float16)onebyfftLen;
p1 = (_Float16)a1 * (_Float16)onebyfftLen;
p3 = (_Float16)yt * (_Float16)onebyfftLen;
pSrc[2 * i] = p0;
pSrc[2 * i + 1] = p1;
pSrc[2 * i + 2] = p2;
pSrc[2 * i + 3] = p3;
} // butterfly loop end
#else
n2 = fftLen;
// loop for stage
for (k = fftLen; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
p0 = (_Float16)xt * (_Float16)cosVal;
p1 = (_Float16)yt * (_Float16)sinVal;
p2 = (_Float16)yt * (_Float16)cosVal;
p3 = (_Float16)xt * (_Float16)sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = (_Float16)p0 - (_Float16)p1;
pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
i += n1;
} while ( i < fftLen ); // butterfly loop end
j++;
} while ( j < n2 ); // groups loop end
twidCoefModifier = twidCoefModifier << 1U;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
p0 = (_Float16)a0 * (_Float16)onebyfftLen;
p2 = (_Float16)xt * (_Float16)onebyfftLen;
p1 = (_Float16)a1 * (_Float16)onebyfftLen;
p3 = (_Float16)yt * (_Float16)onebyfftLen;
pSrc[2 * i] = p0;
pSrc[2U * l] = p2;
pSrc[2 * i + 1] = p1;
pSrc[2U * l + 1U] = p3;
} // butterfly loop end
#endif // #if defined (ARM_MATH_DSP)
}
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */