You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
311 lines
10 KiB
C
311 lines
10 KiB
C
/******************************************************************************
|
|
* @file arm_math_f16.h
|
|
* @brief Public header file for f16 function of the CMSIS DSP Library
|
|
* @version V1.8.1
|
|
* @date 20. April 2020
|
|
******************************************************************************/
|
|
/*
|
|
* Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved.
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
* not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifndef _ARM_MATH_F16_H
|
|
#define _ARM_MATH_F16_H
|
|
|
|
#include "arm_math.h"
|
|
|
|
#ifdef __cplusplus
|
|
extern "C"
|
|
{
|
|
#endif
|
|
|
|
#if !defined( __CC_ARM )
|
|
|
|
/**
|
|
* @brief 16-bit floating-point type definition.
|
|
* This is already defined in arm_mve.h
|
|
*
|
|
* This is not fully supported on ARM AC5.
|
|
*/
|
|
|
|
/*
|
|
|
|
Check if the type __fp16 is available.
|
|
If it is not available, f16 version of the kernels
|
|
won't be built.
|
|
|
|
*/
|
|
#if !(__ARM_FEATURE_MVE & 2) && !(__ARM_NEON)
|
|
#if defined(__ARM_FP16_FORMAT_IEEE) || defined(__ARM_FP16_FORMAT_ALTERNATIVE)
|
|
typedef __fp16 float16_t;
|
|
#define ARM_FLOAT16_SUPPORTED
|
|
#endif
|
|
#else
|
|
#define ARM_FLOAT16_SUPPORTED
|
|
#endif
|
|
|
|
#if defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF) /* floating point vector*/
|
|
|
|
#if defined(ARM_MATH_MVE_FLOAT16) || defined(ARM_MATH_NEON_FLOAT16)
|
|
/**
|
|
* @brief 16-bit floating-point 128-bit vector data type
|
|
*/
|
|
typedef __ALIGNED(2) float16x8_t f16x8_t;
|
|
|
|
/**
|
|
* @brief 16-bit floating-point 128-bit vector pair data type
|
|
*/
|
|
typedef float16x8x2_t f16x8x2_t;
|
|
|
|
/**
|
|
* @brief 16-bit floating-point 128-bit vector quadruplet data type
|
|
*/
|
|
typedef float16x8x4_t f16x8x4_t;
|
|
|
|
/**
|
|
* @brief 16-bit ubiquitous 128-bit vector data type
|
|
*/
|
|
typedef union _any16x8_t
|
|
{
|
|
float16x8_t f;
|
|
int16x8_t i;
|
|
} any16x8_t;
|
|
#endif
|
|
|
|
#endif
|
|
|
|
#if defined(ARM_MATH_NEON)
|
|
|
|
|
|
#if defined(ARM_MATH_NEON_FLOAT16)
|
|
/**
|
|
* @brief 16-bit float 64-bit vector data type.
|
|
*/
|
|
typedef __ALIGNED(2) float16x4_t f16x4_t;
|
|
|
|
/**
|
|
* @brief 16-bit floating-point 128-bit vector triplet data type
|
|
*/
|
|
typedef float16x8x3_t f16x8x3_t;
|
|
|
|
/**
|
|
* @brief 16-bit floating-point 64-bit vector pair data type
|
|
*/
|
|
typedef float16x4x2_t f16x4x2_t;
|
|
|
|
/**
|
|
* @brief 16-bit floating-point 64-bit vector triplet data type
|
|
*/
|
|
typedef float16x4x3_t f16x4x3_t;
|
|
|
|
/**
|
|
* @brief 16-bit floating-point 64-bit vector quadruplet data type
|
|
*/
|
|
typedef float16x4x4_t f16x4x4_t;
|
|
|
|
/**
|
|
* @brief 16-bit ubiquitous 64-bit vector data type
|
|
*/
|
|
typedef union _any16x4_t
|
|
{
|
|
float16x4_t f;
|
|
int16x4_t i;
|
|
} any16x4_t;
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if defined(ARM_FLOAT16_SUPPORTED)
|
|
#define F16_MAX ((float16_t)FLT_MAX)
|
|
#define F16_MIN (-(float16_t)FLT_MAX)
|
|
|
|
#define F16_ABSMAX ((float16_t)FLT_MAX)
|
|
#define F16_ABSMIN ((float16_t)0.0)
|
|
|
|
/**
|
|
* @brief Floating-point vector addition.
|
|
* @param[in] pSrcA points to the first input vector
|
|
* @param[in] pSrcB points to the second input vector
|
|
* @param[out] pDst points to the output vector
|
|
* @param[in] blockSize number of samples in each vector
|
|
*/
|
|
void arm_add_f16(
|
|
const float16_t * pSrcA,
|
|
const float16_t * pSrcB,
|
|
float16_t * pDst,
|
|
uint32_t blockSize);
|
|
|
|
/**
|
|
* @brief Floating-point vector subtraction.
|
|
* @param[in] pSrcA points to the first input vector
|
|
* @param[in] pSrcB points to the second input vector
|
|
* @param[out] pDst points to the output vector
|
|
* @param[in] blockSize number of samples in each vector
|
|
*/
|
|
void arm_sub_f16(
|
|
const float16_t * pSrcA,
|
|
const float16_t * pSrcB,
|
|
float16_t * pDst,
|
|
uint32_t blockSize);
|
|
|
|
/**
|
|
* @brief Multiplies a floating-point vector by a scalar.
|
|
* @param[in] pSrc points to the input vector
|
|
* @param[in] scale scale factor to be applied
|
|
* @param[out] pDst points to the output vector
|
|
* @param[in] blockSize number of samples in the vector
|
|
*/
|
|
void arm_scale_f16(
|
|
const float16_t * pSrc,
|
|
float16_t scale,
|
|
float16_t * pDst,
|
|
uint32_t blockSize);
|
|
|
|
/**
|
|
* @brief Floating-point vector absolute value.
|
|
* @param[in] pSrc points to the input buffer
|
|
* @param[out] pDst points to the output buffer
|
|
* @param[in] blockSize number of samples in each vector
|
|
*/
|
|
void arm_abs_f16(
|
|
const float16_t * pSrc,
|
|
float16_t * pDst,
|
|
uint32_t blockSize);
|
|
|
|
|
|
/**
|
|
* @brief Adds a constant offset to a floating-point vector.
|
|
* @param[in] pSrc points to the input vector
|
|
* @param[in] offset is the offset to be added
|
|
* @param[out] pDst points to the output vector
|
|
* @param[in] blockSize number of samples in the vector
|
|
*/
|
|
void arm_offset_f16(
|
|
const float16_t * pSrc,
|
|
float16_t offset,
|
|
float16_t * pDst,
|
|
uint32_t blockSize);
|
|
|
|
/**
|
|
* @brief Dot product of floating-point vectors.
|
|
* @param[in] pSrcA points to the first input vector
|
|
* @param[in] pSrcB points to the second input vector
|
|
* @param[in] blockSize number of samples in each vector
|
|
* @param[out] result output result returned here
|
|
*/
|
|
void arm_dot_prod_f16(
|
|
const float16_t * pSrcA,
|
|
const float16_t * pSrcB,
|
|
uint32_t blockSize,
|
|
float16_t * result);
|
|
|
|
/**
|
|
* @brief Floating-point vector multiplication.
|
|
* @param[in] pSrcA points to the first input vector
|
|
* @param[in] pSrcB points to the second input vector
|
|
* @param[out] pDst points to the output vector
|
|
* @param[in] blockSize number of samples in each vector
|
|
*/
|
|
void arm_mult_f16(
|
|
const float16_t * pSrcA,
|
|
const float16_t * pSrcB,
|
|
float16_t * pDst,
|
|
uint32_t blockSize);
|
|
|
|
/**
|
|
* @brief Negates the elements of a floating-point vector.
|
|
* @param[in] pSrc points to the input vector
|
|
* @param[out] pDst points to the output vector
|
|
* @param[in] blockSize number of samples in the vector
|
|
*/
|
|
void arm_negate_f16(
|
|
const float16_t * pSrc,
|
|
float16_t * pDst,
|
|
uint32_t blockSize);
|
|
|
|
/**
|
|
* @brief Instance structure for the floating-point CFFT/CIFFT function.
|
|
*/
|
|
typedef struct
|
|
{
|
|
uint16_t fftLen; /**< length of the FFT. */
|
|
uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
|
|
uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
|
|
const float16_t *pTwiddle; /**< points to the Twiddle factor table. */
|
|
const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
|
|
uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
|
|
uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
|
|
float16_t onebyfftLen; /**< value of 1/fftLen. */
|
|
} arm_cfft_radix2_instance_f16;
|
|
|
|
/**
|
|
* @brief Instance structure for the floating-point CFFT/CIFFT function.
|
|
*/
|
|
typedef struct
|
|
{
|
|
uint16_t fftLen; /**< length of the FFT. */
|
|
uint8_t ifftFlag; /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
|
|
uint8_t bitReverseFlag; /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
|
|
const float16_t *pTwiddle; /**< points to the Twiddle factor table. */
|
|
const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
|
|
uint16_t twidCoefModifier; /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
|
|
uint16_t bitRevFactor; /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
|
|
float16_t onebyfftLen; /**< value of 1/fftLen. */
|
|
} arm_cfft_radix4_instance_f16;
|
|
|
|
/**
|
|
* @brief Instance structure for the floating-point CFFT/CIFFT function.
|
|
*/
|
|
typedef struct
|
|
{
|
|
uint16_t fftLen; /**< length of the FFT. */
|
|
const float16_t *pTwiddle; /**< points to the Twiddle factor table. */
|
|
const uint16_t *pBitRevTable; /**< points to the bit reversal table. */
|
|
uint16_t bitRevLength; /**< bit reversal table length. */
|
|
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
|
const uint32_t *rearranged_twiddle_tab_stride1_arr; /**< Per stage reordered twiddle pointer (offset 1) */ \
|
|
const uint32_t *rearranged_twiddle_tab_stride2_arr; /**< Per stage reordered twiddle pointer (offset 2) */ \
|
|
const uint32_t *rearranged_twiddle_tab_stride3_arr; /**< Per stage reordered twiddle pointer (offset 3) */ \
|
|
const float16_t *rearranged_twiddle_stride1; /**< reordered twiddle offset 1 storage */ \
|
|
const float16_t *rearranged_twiddle_stride2; /**< reordered twiddle offset 2 storage */ \
|
|
const float16_t *rearranged_twiddle_stride3;
|
|
#endif
|
|
} arm_cfft_instance_f16;
|
|
|
|
|
|
arm_status arm_cfft_init_f16(
|
|
arm_cfft_instance_f16 * S,
|
|
uint16_t fftLen);
|
|
|
|
void arm_cfft_f16(
|
|
const arm_cfft_instance_f16 * S,
|
|
float16_t * p1,
|
|
uint8_t ifftFlag,
|
|
uint8_t bitReverseFlag);
|
|
|
|
#endif /* ARM_FLOAT16_SUPPORTED*/
|
|
#endif /* !defined( __CC_ARM ) */
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* _ARM_MATH_F16_H */
|
|
|
|
|