CMSIS-DSP: Added Helium support for functions in BasicMaths.

Added test patterns for functions in BasicMaths.
Corrected github issue 622 (clarified comment for lms_norm_f32).
Added support for q63_t to the test framework.
pull/19/head
Christophe Favergeon 7 years ago
parent 2d3a2166d6
commit 56a540336a

@ -36,35 +36,6 @@ Definitions available for MVEF and MVEI
***************************************/
#if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI)
#define nbLanes(sz) (128/sz)
#define VEC_LANES_F32 nbLanes(32)
#define VEC_LANES_F16 nbLanes(16)
#define VEC_LANES_Q63 nbLanes(64)
#define VEC_LANES_Q31 nbLanes(32)
#define VEC_LANES_Q15 nbLanes(16)
#define VEC_LANES_Q7 nbLanes(8)
#define nb_vec_lanes(ptr) _Generic((ptr), \
uint32_t *: VEC_LANES_Q31, \
uint16_t *: VEC_LANES_Q15, \
uint8_t *: VEC_LANES_Q7, \
q31_t *: VEC_LANES_Q31, \
q15_t *: VEC_LANES_Q15, \
q7_t *: VEC_LANES_Q7, \
float32_t*: VEC_LANES_F32, \
float16_t*: VEC_LANES_F16, \
const q31_t *: VEC_LANES_Q31, \
const q15_t *: VEC_LANES_Q15, \
const q7_t *: VEC_LANES_Q7, \
const float32_t*: VEC_LANES_F32, \
const float16_t*: VEC_LANES_F16, \
default: "err")
#define post_incr_vec_size(ptr) ptr += nb_vec_lanes(ptr)
#endif /* defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI) */
/***************************************

@ -386,10 +386,22 @@ extern "C"
#define ARM_MATH_DSP 1
#endif
#if defined(__ARM_NEON)
#if defined(ARM_MATH_NEON)
#include <arm_neon.h>
#endif
#if defined (ARM_MATH_HELIUM)
#define ARM_MATH_MVEF
#endif
#if defined (ARM_MATH_MVEF)
#define ARM_MATH_MVEI
#endif
#if defined (ARM_MATH_HELIUM) || defined(ARM_MATH_MVEF) || defined(ARM_MATH_MVEI)
#include <arm_mve.h>
#endif
/**
* @brief Macros required for reciprocal calculation in Normalized LMS
@ -466,6 +478,308 @@ extern "C"
*/
typedef double float64_t;
/**
* @brief vector types
*/
#if defined(ARM_MATH_NEON) || defined (ARM_MATH_MVEI)
/**
* @brief 64-bit fractional 128-bit vector data type in 1.63 format
*/
typedef int64x2_t q63x2_t;
/**
* @brief 32-bit fractional 128-bit vector data type in 1.31 format.
*/
typedef int32x4_t q31x4_t;
/**
* @brief 16-bit fractional 128-bit vector data type with 16-bit alignement in 1.15 format.
*/
typedef __ALIGNED(2) int16x8_t q15x8_t;
/**
* @brief 8-bit fractional 128-bit vector data type with 8-bit alignement in 1.7 format.
*/
typedef __ALIGNED(1) int8x16_t q7x16_t;
/**
* @brief 32-bit fractional 128-bit vector pair data type in 1.31 format.
*/
typedef int32x4x2_t q31x4x2_t;
/**
* @brief 32-bit fractional 128-bit vector quadruplet data type in 1.31 format.
*/
typedef int32x4x4_t q31x4x4_t;
/**
* @brief 16-bit fractional 128-bit vector pair data type in 1.15 format.
*/
typedef int16x8x2_t q15x8x2_t;
/**
* @brief 16-bit fractional 128-bit vector quadruplet data type in 1.15 format.
*/
typedef int16x8x4_t q15x8x4_t;
/**
* @brief 8-bit fractional 128-bit vector pair data type in 1.7 format.
*/
typedef int8x16x2_t q7x16x2_t;
/**
* @brief 8-bit fractional 128-bit vector quadruplet data type in 1.7 format.
*/
typedef int8x16x4_t q7x16x4_t;
/**
* @brief 32-bit fractional data type in 9.23 format.
*/
typedef int32_t q23_t;
/**
* @brief 32-bit fractional 128-bit vector data type in 9.23 format.
*/
typedef int32x4_t q23x4_t;
/**
* @brief 64-bit status 128-bit vector data type.
*/
typedef int64x2_t status64x2_t;
/**
* @brief 32-bit status 128-bit vector data type.
*/
typedef int32x4_t status32x4_t;
/**
* @brief 16-bit status 128-bit vector data type.
*/
typedef int16x8_t status16x8_t;
/**
* @brief 8-bit status 128-bit vector data type.
*/
typedef int8x16_t status8x16_t;
#endif
#if defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF) /* floating point vector*/
/**
* @brief 32-bit floating-point 128-bit vector type
*/
typedef float32x4_t f32x4_t;
/**
* @brief 16-bit floating-point 128-bit vector data type
*/
typedef __ALIGNED(2) float16x8_t f16x8_t;
/**
* @brief 32-bit floating-point 128-bit vector pair data type
*/
typedef float32x4x2_t f32x4x2_t;
/**
* @brief 32-bit floating-point 128-bit vector quadruplet data type
*/
typedef float32x4x4_t f32x4x4_t;
/**
* @brief 16-bit floating-point 128-bit vector pair data type
*/
typedef float16x8x2_t f16x8x2_t;
/**
* @brief 16-bit floating-point 128-bit vector quadruplet data type
*/
typedef float16x8x4_t f16x8x4_t;
/**
* @brief 32-bit ubiquitous 128-bit vector data type
*/
typedef union _any32x4_t
{
float32x4_t f;
int32x4_t i;
} any32x4_t;
/**
* @brief 16-bit ubiquitous 128-bit vector data type
*/
typedef union _any16x8_t
{
float16x8_t f;
int16x8_t i;
} any16x8_t;
#endif
#if defined(ARM_MATH_NEON)
/**
* @brief 32-bit fractional 64-bit vector data type in 1.31 format.
*/
typedef int32x2_t q31x2_t;
/**
* @brief 16-bit fractional 64-bit vector data type in 1.15 format.
*/
typedef __ALIGNED(2) int16x4_t q15x4_t;
/**
* @brief 8-bit fractional 64-bit vector data type in 1.7 format.
*/
typedef __ALIGNED(1) int8x8_t q7x8_t;
/**
* @brief 32-bit float 64-bit vector data type.
*/
typedef float32x2_t f32x2_t;
/**
* @brief 16-bit float 64-bit vector data type.
*/
typedef __ALIGNED(2) float16x4_t f16x4_t;
/**
* @brief 32-bit floating-point 128-bit vector triplet data type
*/
typedef float32x4x3_t f32x4x3_t;
/**
* @brief 16-bit floating-point 128-bit vector triplet data type
*/
typedef float16x8x3_t f16x8x3_t;
/**
* @brief 32-bit fractional 128-bit vector triplet data type in 1.31 format
*/
typedef int32x4x3_t q31x4x3_t;
/**
* @brief 16-bit fractional 128-bit vector triplet data type in 1.15 format
*/
typedef int16x8x3_t q15x8x3_t;
/**
* @brief 8-bit fractional 128-bit vector triplet data type in 1.7 format
*/
typedef int8x16x3_t q7x16x3_t;
/**
* @brief 32-bit floating-point 64-bit vector pair data type
*/
typedef float32x2x2_t f32x2x2_t;
/**
* @brief 32-bit floating-point 64-bit vector triplet data type
*/
typedef float32x2x3_t f32x2x3_t;
/**
* @brief 32-bit floating-point 64-bit vector quadruplet data type
*/
typedef float32x2x4_t f32x2x4_t;
/**
* @brief 16-bit floating-point 64-bit vector pair data type
*/
typedef float16x4x2_t f16x4x2_t;
/**
* @brief 16-bit floating-point 64-bit vector triplet data type
*/
typedef float16x4x3_t f16x4x3_t;
/**
* @brief 16-bit floating-point 64-bit vector quadruplet data type
*/
typedef float16x4x4_t f16x4x4_t;
/**
* @brief 32-bit fractional 64-bit vector pair data type in 1.31 format
*/
typedef int32x2x2_t q31x2x2_t;
/**
* @brief 32-bit fractional 64-bit vector triplet data type in 1.31 format
*/
typedef int32x2x3_t q31x2x3_t;
/**
* @brief 32-bit fractional 64-bit vector quadruplet data type in 1.31 format
*/
typedef int32x4x3_t q31x2x4_t;
/**
* @brief 16-bit fractional 64-bit vector pair data type in 1.15 format
*/
typedef int16x4x2_t q15x4x2_t;
/**
* @brief 16-bit fractional 64-bit vector triplet data type in 1.15 format
*/
typedef int16x4x2_t q15x4x3_t;
/**
* @brief 16-bit fractional 64-bit vector quadruplet data type in 1.15 format
*/
typedef int16x4x3_t q15x4x4_t;
/**
* @brief 8-bit fractional 64-bit vector pair data type in 1.7 format
*/
typedef int8x8x2_t q7x8x2_t;
/**
* @brief 8-bit fractional 64-bit vector triplet data type in 1.7 format
*/
typedef int8x8x3_t q7x8x3_t;
/**
* @brief 8-bit fractional 64-bit vector quadruplet data type in 1.7 format
*/
typedef int8x8x4_t q7x8x4_t;
/**
* @brief 32-bit ubiquitous 64-bit vector data type
*/
typedef union _any32x2_t
{
float32x2_t f;
int32x2_t i;
} any32x2_t;
/**
* @brief 16-bit ubiquitous 64-bit vector data type
*/
typedef union _any16x4_t
{
float16x4_t f;
int16x4_t i;
} any16x4_t;
/**
* @brief 32-bit status 64-bit vector data type.
*/
typedef int32x4_t status32x2_t;
/**
* @brief 16-bit status 64-bit vector data type.
*/
typedef int16x8_t status16x4_t;
/**
* @brief 8-bit status 64-bit vector data type.
*/
typedef int8x16_t status8x8_t;
#endif
/**
@brief definition to read/write two 16 bit values.

@ -60,6 +60,55 @@
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_abs_f32(
const float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
f32x4_t vec1;
f32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute values and then store the results in the destination buffer. */
vec1 = vld1q(pSrc);
res = vabsq(vec1);
vst1q(pDst, res);
/* Increment pointers */
pSrc += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
if (blkCnt > 0U)
{
/* C = |A| */
mve_pred16_t p0 = vctp32q(blkCnt);
vec1 = vld1q(pSrc);
vstrwq_p(pDst, vabsq(vec1), p0);
}
}
#else
void arm_abs_f32(
const float32_t * pSrc,
float32_t * pDst,
@ -67,9 +116,9 @@ void arm_abs_f32(
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON)
float32x4_t vec1;
float32x4_t res;
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
f32x4_t vec1;
f32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
@ -95,7 +144,7 @@ void arm_abs_f32(
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
@ -140,7 +189,7 @@ void arm_abs_f32(
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of BasicAbs group
*/

@ -49,6 +49,51 @@
The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_abs_q15(
const q15_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q15x8_t vecSrc;
/* Compute 8 outputs at a time */
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
/*
* C = |A|
* Calculate absolute and then store the results in the destination buffer.
*/
vecSrc = vld1q(pSrc);
vst1q(pDst, vqabsq(vecSrc));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrc += 8;
pDst += 8;
}
/*
* tail
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecSrc = vld1q(pSrc);
vstrhq_p(pDst, vqabsq(vecSrc), p0);
}
}
#else
void arm_abs_q15(
const q15_t * pSrc,
q15_t * pDst,
@ -126,6 +171,7 @@ void arm_abs_q15(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicAbs group

@ -49,6 +49,51 @@
The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_abs_q31(
const q31_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q31x4_t vecSrc;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
/*
* C = |A|
* Calculate absolute and then store the results in the destination buffer.
*/
vecSrc = vld1q(pSrc);
vst1q(pDst, vqabsq(vecSrc));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrc += 4;
pDst += 4;
}
/*
* tail
*/
blkCnt = blockSize & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecSrc = vld1q(pSrc);
vstrwq_p(pDst, vqabsq(vecSrc), p0);
}
}
#else
void arm_abs_q31(
const q31_t * pSrc,
q31_t * pDst,
@ -126,7 +171,7 @@ void arm_abs_q31(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicAbs group
*/

@ -51,6 +51,51 @@
The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_abs_q7(
const q7_t * pSrc,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q7x16_t vecSrc;
/* Compute 16 outputs at a time */
blkCnt = blockSize >> 4;
while (blkCnt > 0U)
{
/*
* C = |A|
* Calculate absolute and then store the results in the destination buffer.
*/
vecSrc = vld1q(pSrc);
vst1q(pDst, vqabsq(vecSrc));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrc += 16;
pDst += 16;
}
/*
* tail
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp8q(blkCnt);
vecSrc = vld1q(pSrc);
vstrbq_p(pDst, vqabsq(vecSrc), p0);
}
}
#else
void arm_abs_q7(
const q7_t * pSrc,
q7_t * pDst,
@ -128,6 +173,7 @@ void arm_abs_q7(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicAbs group

@ -58,6 +58,59 @@
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_add_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
f32x4_t vec1;
f32x4_t vec2;
f32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
vec1 = vld1q(pSrcA);
vec2 = vld1q(pSrcB);
res = vaddq(vec1, vec2);
vst1q(pDst, res);
/* Increment pointers */
pSrcA += 4;
pSrcB += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
if (blkCnt > 0U)
{
/* C = A + B */
mve_pred16_t p0 = vctp32q(blkCnt);
vec1 = vld1q(pSrcA);
vec2 = vld1q(pSrcB);
vstrwq_p(pDst, vaddq(vec1,vec2), p0);
}
}
#else
void arm_add_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
@ -66,10 +119,10 @@ void arm_add_f32(
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON)
float32x4_t vec1;
float32x4_t vec2;
float32x4_t res;
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
f32x4_t vec1;
f32x4_t vec2;
f32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
@ -97,7 +150,7 @@ void arm_add_f32(
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
@ -139,6 +192,7 @@ void arm_add_f32(
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of BasicAdd group

@ -50,6 +50,56 @@
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_add_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q15x8_t vecA;
q15x8_t vecB;
/* Compute 8 outputs at a time */
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
/*
* C = A + B
* Add and then store the results in the destination buffer.
*/
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vst1q(pDst, vqaddq(vecA, vecB));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrcA += 8;
pSrcB += 8;
pDst += 8;
}
/*
* tail
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vstrhq_p(pDst, vqaddq(vecA, vecB), p0);
}
}
#else
void arm_add_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
@ -120,7 +170,7 @@ void arm_add_q15(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicAdd group
*/

@ -50,6 +50,56 @@
Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_add_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
q31x4_t vecA;
q31x4_t vecB;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
/*
* C = A + B
* Add and then store the results in the destination buffer.
*/
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vst1q(pDst, vqaddq(vecA, vecB));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrcA += 4;
pSrcB += 4;
pDst += 4;
}
/*
* tail
*/
blkCnt = blockSize & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vstrwq_p(pDst, vqaddq(vecA, vecB), p0);
}
}
#else
void arm_add_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
@ -103,6 +153,7 @@ void arm_add_q31(
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicAdd group
*/

@ -50,6 +50,55 @@
Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_add_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q7x16_t vecA;
q7x16_t vecB;
/* Compute 16 outputs at a time */
blkCnt = blockSize >> 4;
while (blkCnt > 0U)
{
/*
* C = A + B
* Add and then store the results in the destination buffer.
*/
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vst1q(pDst, vqaddq(vecA, vecB));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrcA += 16;
pSrcB += 16;
pDst += 16;
}
/*
* tail
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp8q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vstrbq_p(pDst, vqaddq(vecA, vecB), p0);
}
}
#else
void arm_add_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
@ -103,7 +152,7 @@ void arm_add_q7(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicAdd group
*/

@ -59,42 +59,62 @@
@return none
*/
#if defined (ARM_MATH_HELIUM)
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_mve.h"
#include "arm_helium_utils.h"
void arm_dot_prod_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t blockSize,
float32_t * result)
{
float32x4_t vecA, vecB;
float32x4_t vecSum;
f32x4_t vecA, vecB;
f32x4_t vecSum;
uint32_t blkCnt;
float32_t sum = 0.0f;
vecSum = vdupq_n_f32(0.0);
do {
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/*
* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1]
* Calculate dot product and then store the result in a temporary buffer.
* and advance vector source and destination pointers
*/
mve_pred16_t p = vctp32q(blockSize);
vecA = vld1q(pSrcA);
pSrcA += 4;
vecB = vld1q(pSrcB);
pSrcB += 4;
vecA = vldrwq_z_f32(pSrcA, p);
vecB = vldrwq_z_f32(pSrcB, p);
vecSum = vfmaq_m(vecSum, vecA, vecB, p);
vecSum = vfmaq(vecSum, vecA, vecB);
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
post_incr_vec_size(pSrcA);
post_incr_vec_size(pSrcB);
blockSize -= VEC_LANES_F32;
blkCnt --;
}
while ((int32_t) blockSize > 0);
*result = vecAddAcrossF32Mve(vecSum);
blkCnt = blockSize & 3;
if (blkCnt > 0U)
{
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
mve_pred16_t p0 = vctp32q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vecSum = vfmaq_m(vecSum, vecA, vecB, p0);
}
sum = vecAddAcrossF32Mve(vecSum);
/* Store result in destination buffer */
*result = sum;
}
#else
@ -108,11 +128,11 @@ void arm_dot_prod_f32(
uint32_t blkCnt; /* Loop counter */
float32_t sum = 0.0f; /* Temporary return variable */
#if defined(ARM_MATH_NEON)
float32x4_t vec1;
float32x4_t vec2;
float32x4_t res;
float32x4_t accum = vdupq_n_f32(0);
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
f32x4_t vec1;
f32x4_t vec2;
f32x4_t res;
f32x4_t accum = vdupq_n_f32(0);
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
@ -125,7 +145,7 @@ void arm_dot_prod_f32(
/* C = A[0]*B[0] + A[1]*B[1] + A[2]*B[2] + ... + A[blockSize-1]*B[blockSize-1] */
/* Calculate dot product and then store the result in a temporary buffer. */
accum = vmlaq_f32(accum, vec1, vec2);
accum = vmlaq_f32(accum, vec1, vec2);
/* Increment pointers */
pSrcA += 4;
@ -148,7 +168,7 @@ void arm_dot_prod_f32(
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
@ -198,7 +218,7 @@ void arm_dot_prod_f32(
*result = sum;
}
#endif /* ARM_MATH_HELIUM */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of BasicDotProd group
*/

@ -52,7 +52,58 @@
there is no risk of overflow.
The return result is in 34.30 format.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_dot_prod_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
uint32_t blockSize,
q63_t * result)
{
uint32_t blkCnt; /* loop counters */
q15x8_t vecA;
q15x8_t vecB;
q63_t sum = 0LL;
/* Compute 8 outputs at a time */
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
/*
* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1]
* Calculate dot product and then store the result in a temporary buffer.
*/
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
sum = vmlaldavaq(sum, vecA, vecB);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrcA += 8;
pSrcB += 8;
}
/*
* tail
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
sum = vmlaldavaq_p(sum, vecA, vecB, p0);
}
*result = sum;
}
#else
void arm_dot_prod_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
@ -114,6 +165,7 @@ void arm_dot_prod_q15(
/* Store result in destination buffer in 34.30 format */
*result = sum;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicDotProd group

@ -54,6 +54,64 @@
The return result is in 16.48 format.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_dot_prod_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
uint32_t blockSize,
q63_t * result)
{
uint32_t blkCnt; /* loop counters */
q31x4_t vecA;
q31x4_t vecB;
q63_t sum = 0LL;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
/*
* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1]
* Calculate dot product and then store the result in a temporary buffer.
*/
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
sum = vrmlaldavhaq(sum, vecA, vecB);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrcA += 4;
pSrcB += 4;
}
/*
* tail
*/
blkCnt = blockSize & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
sum = vrmlaldavhaq_p(sum, vecA, vecB, p0);
}
/*
* vrmlaldavhaq provides extra intermediate accumulator headroom.
* limiting the need of intermediate scaling
* Scalar variant uses 2.48 accu format by right shifting accumulators by 14.
* 16.48 output conversion is performed outside the loop by scaling accu. by 6
*/
*result = asrl(sum, (14 - 8));
}
#else
void arm_dot_prod_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
@ -109,6 +167,7 @@ void arm_dot_prod_q31(
/* Store result in destination buffer in 16.48 format */
*result = sum;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicDotProd group

@ -53,6 +53,57 @@
The return result is in 18.14 format.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_dot_prod_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
uint32_t blockSize,
q31_t * result)
{
uint32_t blkCnt; /* loop counters */
q7x16_t vecA;
q7x16_t vecB;
q31_t sum = 0;
/* Compute 16 outputs at a time */
blkCnt = blockSize >> 4;
while (blkCnt > 0U)
{
/*
* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1]
* Calculate dot product and then store the result in a temporary buffer.
*/
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
sum = vmladavaq(sum, vecA, vecB);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrcA += 16;
pSrcB += 16;
}
/*
* tail
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp8q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
sum = vmladavaq_p(sum, vecA, vecB, p0);
}
*result = sum;
}
#else
void arm_dot_prod_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
@ -133,6 +184,7 @@ void arm_dot_prod_q7(
/* Store result in destination buffer in 18.14 format */
*result = sum;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicDotProd group

@ -58,6 +58,57 @@
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_mult_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
f32x4_t vec1;
f32x4_t vec2;
f32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
vec1 = vld1q(pSrcA);
vec2 = vld1q(pSrcB);
res = vmulq(vec1, vec2);
vst1q(pDst, res);
/* Increment pointers */
pSrcA += 4;
pSrcB += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
if (blkCnt > 0U)
{
/* C = A + B */
mve_pred16_t p0 = vctp32q(blkCnt);
vec1 = vld1q(pSrcA);
vec2 = vld1q(pSrcB);
vstrwq_p(pDst, vmulq(vec1,vec2), p0);
}
}
#else
void arm_mult_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
@ -66,10 +117,10 @@ void arm_mult_f32(
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON)
float32x4_t vec1;
float32x4_t vec2;
float32x4_t res;
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
f32x4_t vec1;
f32x4_t vec2;
f32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
@ -97,7 +148,7 @@ void arm_mult_f32(
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
@ -142,6 +193,7 @@ void arm_mult_f32(
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of BasicMult group

@ -49,7 +49,55 @@
The function uses saturating arithmetic.
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_mult_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q15x8_t vecA, vecB;
/* Compute 8 outputs at a time */
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
/*
* C = A * B
* Multiply the inputs and then store the results in the destination buffer.
*/
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vst1q(pDst, vqdmulhq(vecA, vecB));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrcA += 8;
pSrcB += 8;
pDst += 8;
}
/*
* tail
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vstrhq_p(pDst, vqdmulhq(vecA, vecB), p0);
}
}
#else
void arm_mult_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
@ -137,6 +185,7 @@ void arm_mult_q15(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicMult group

@ -49,7 +49,55 @@
The function uses saturating arithmetic.
Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] are saturated.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_mult_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q31x4_t vecA, vecB;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
/*
* C = A * B
* Multiply the inputs and then store the results in the destination buffer.
*/
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vst1q(pDst, vqdmulhq(vecA, vecB));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrcA += 4;
pSrcB += 4;
pDst += 4;
}
/*
* tail
*/
blkCnt = blockSize & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vstrwq_p(pDst, vqdmulhq(vecA, vecB), p0);
}
}
#else
void arm_mult_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
@ -113,6 +161,7 @@ void arm_mult_q31(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicMult group

@ -49,7 +49,55 @@
The function uses saturating arithmetic.
Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_mult_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q7x16_t vecA, vecB;
/* Compute 16 outputs at a time */
blkCnt = blockSize >> 4;
while (blkCnt > 0U)
{
/*
* C = A * B
* Multiply the inputs and then store the results in the destination buffer.
*/
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vst1q(pDst, vqdmulhq(vecA, vecB));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrcA += 16;
pSrcB += 16;
pDst += 16;
}
/*
* tail
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp8q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vstrbq_p(pDst, vqdmulhq(vecA, vecB), p0);
}
}
#else
void arm_mult_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
@ -113,6 +161,7 @@ void arm_mult_q7(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicMult group

@ -59,6 +59,52 @@
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_negate_f32(
const float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
f32x4_t vec1;
f32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = |A| */
/* Calculate absolute values and then store the results in the destination buffer. */
vec1 = vld1q(pSrc);
res = vnegq(vec1);
vst1q(pDst, res);
/* Increment pointers */
pSrc += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
if (blkCnt > 0U)
{
/* C = |A| */
mve_pred16_t p0 = vctp32q(blkCnt);
vec1 = vld1q((float32_t const *) pSrc);
vstrwq_p(pDst, vnegq(vec1), p0);
}
}
#else
void arm_negate_f32(
const float32_t * pSrc,
float32_t * pDst,
@ -66,9 +112,9 @@ void arm_negate_f32(
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
float32x4_t vec1;
float32x4_t res;
#if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE)
f32x4_t vec1;
f32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
@ -94,7 +140,7 @@ void arm_negate_f32(
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
@ -139,6 +185,7 @@ void arm_negate_f32(
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of BasicNegate group

@ -50,7 +50,51 @@
The function uses saturating arithmetic.
The Q15 value -1 (0x8000) is saturated to the maximum allowable positive value 0x7FFF.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_negate_q15(
const q15_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q15x8_t vecSrc;
/* Compute 8 outputs at a time */
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
/*
* C = -A
* Negate and then store the results in the destination buffer.
*/
vecSrc = vld1q(pSrc);
vst1q(pDst, vqnegq(vecSrc));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrc += 8;
pDst += 8;
}
/*
* tail
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecSrc = vld1q(pSrc);
vstrhq_p(pDst, vqnegq(vecSrc), p0);
}
}
#else
void arm_negate_q15(
const q15_t * pSrc,
q15_t * pDst,
@ -120,6 +164,7 @@ void arm_negate_q15(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicNegate group

@ -49,6 +49,51 @@
The Q31 value -1 (0x80000000) is saturated to the maximum allowable positive value 0x7FFFFFFF.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_negate_q31(
const q31_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q31x4_t vecSrc;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
/*
* C = -A
* Negate and then store the results in the destination buffer.
*/
vecSrc = vld1q(pSrc);
vst1q(pDst, vqnegq(vecSrc));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrc += 4;
pDst += 4;
}
/*
* tail
*/
blkCnt = blockSize & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecSrc = vld1q(pSrc);
vstrwq_p(pDst, vqnegq(vecSrc), p0);
}
}
#else
void arm_negate_q31(
const q31_t * pSrc,
q31_t * pDst,
@ -126,6 +171,7 @@ void arm_negate_q31(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicNegate group

@ -48,7 +48,51 @@
The function uses saturating arithmetic.
The Q7 value -1 (0x80) is saturated to the maximum allowable positive value 0x7F.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_negate_q7(
const q7_t * pSrc,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q7x16_t vecSrc;
/* Compute 16 outputs at a time */
blkCnt = blockSize >> 4;
while (blkCnt > 0U)
{
/*
* C = -A
* Negate and then store the results in the destination buffer.
*/
vecSrc = vld1q(pSrc);
vst1q(pDst, vqnegq(vecSrc));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrc += 16;
pDst += 16;
}
/*
* tail
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp8q(blkCnt);
vecSrc = vld1q(pSrc);
vstrbq_p(pDst, vqnegq(vecSrc), p0);
}
}
#else
void arm_negate_q7(
const q7_t * pSrc,
q7_t * pDst,
@ -110,7 +154,7 @@ void arm_negate_q7(
in = *pSrc++;
#if defined (ARM_MATH_DSP)
*pDst++ = (q7_t) __QSUB(0, in);
*pDst++ = (q7_t) __QSUB8(0, in);
#else
*pDst++ = (in == (q7_t) 0x80) ? (q7_t) 0x7f : -in;
#endif
@ -120,6 +164,7 @@ void arm_negate_q7(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicNegate group

@ -60,6 +60,54 @@
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_offset_f32(
const float32_t * pSrc,
float32_t offset,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
f32x4_t vec1;
f32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and then store the results in the destination buffer. */
vec1 = vld1q(pSrc);
res = vaddq(vec1,offset);
vst1q(pDst, res);
/* Increment pointers */
pSrc += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vec1 = vld1q((float32_t const *) pSrc);
vstrwq_p(pDst, vaddq(vec1, offset), p0);
}
}
#else
void arm_offset_f32(
const float32_t * pSrc,
float32_t offset,
@ -68,9 +116,9 @@ void arm_offset_f32(
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
float32x4_t vec1;
float32x4_t res;
#if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE)
f32x4_t vec1;
f32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
@ -96,7 +144,7 @@ void arm_offset_f32(
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
@ -141,6 +189,7 @@ void arm_offset_f32(
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of BasicOffset group

@ -49,7 +49,53 @@
The function uses saturating arithmetic.
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_offset_q15(
const q15_t * pSrc,
q15_t offset,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q15x8_t vecSrc;
/* Compute 8 outputs at a time */
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
/*
* C = A + offset
* Add offset and then store the result in the destination buffer.
*/
vecSrc = vld1q(pSrc);
vst1q(pDst, vqaddq(vecSrc, offset));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrc += 8;
pDst += 8;
}
/*
* tail
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecSrc = vld1q(pSrc);
vstrhq_p(pDst, vqaddq(vecSrc, offset), p0);
}
}
#else
void arm_offset_q15(
const q15_t * pSrc,
q15_t offset,
@ -115,6 +161,7 @@ void arm_offset_q15(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicOffset group

@ -50,6 +50,52 @@
Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_offset_q31(
const q31_t * pSrc,
q31_t offset,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q31x4_t vecSrc;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
/*
* C = A + offset
* Add offset and then store the result in the destination buffer.
*/
vecSrc = vld1q(pSrc);
vst1q(pDst, vqaddq(vecSrc, offset));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrc += 4;
pDst += 4;
}
/*
* tail
*/
blkCnt = blockSize & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecSrc = vld1q(pSrc);
vstrwq_p(pDst, vqaddq(vecSrc, offset), p0);
}
}
#else
void arm_offset_q31(
const q31_t * pSrc,
q31_t offset,
@ -122,6 +168,7 @@ void arm_offset_q31(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicOffset group

@ -49,7 +49,52 @@
The function uses saturating arithmetic.
Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_offset_q7(
const q7_t * pSrc,
q7_t offset,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q7x16_t vecSrc;
/* Compute 16 outputs at a time */
blkCnt = blockSize >> 4;
while (blkCnt > 0U)
{
/*
* C = A + offset
* Add offset and then store the result in the destination buffer.
*/
vecSrc = vld1q(pSrc);
vst1q(pDst, vqaddq(vecSrc, offset));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrc += 16;
pDst += 16;
}
/*
* tail
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp8q(blkCnt);
vecSrc = vld1q(pSrc);
vstrbq_p(pDst, vqaddq(vecSrc, offset), p0);
}
}
#else
void arm_offset_q7(
const q7_t * pSrc,
q7_t offset,
@ -110,6 +155,7 @@ void arm_offset_q7(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicOffset group

@ -73,6 +73,55 @@
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_scale_f32(
const float32_t * pSrc,
float32_t scale,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
f32x4_t vec1;
f32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + offset */
/* Add offset and then store the results in the destination buffer. */
vec1 = vld1q(pSrc);
res = vmulq(vec1,scale);
vst1q(pDst, res);
/* Increment pointers */
pSrc += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vec1 = vld1q((float32_t const *) pSrc);
vstrwq_p(pDst, vmulq(vec1, scale), p0);
}
}
#else
void arm_scale_f32(
const float32_t *pSrc,
float32_t scale,
@ -81,8 +130,8 @@ void arm_scale_f32(
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
float32x4_t vec1;
float32x4_t res;
f32x4_t vec1;
f32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
@ -153,6 +202,7 @@ void arm_scale_f32(
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of BasicScale group

@ -51,6 +51,62 @@
These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to 1.15 format.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_scale_q15(
const q15_t * pSrc,
q15_t scaleFract,
int8_t shift,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q15x8_t vecSrc;
q15x8_t vecDst;
/* Compute 8 outputs at a time */
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
/*
* C = A * scale
* Scale the input and then store the result in the destination buffer.
*/
vecSrc = vld1q(pSrc);
vecDst = vmulhq(vecSrc, vdupq_n_s16(scaleFract));
vecDst = vqshlq_r(vecDst, shift + 1);
vst1q(pDst, vecDst);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrc += 8;
pDst += 8;
}
/*
* tail
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);;
vecSrc = vld1q(pSrc);
vecDst = vmulhq(vecSrc, vdupq_n_s16(scaleFract));
vecDst = vqshlq_r(vecDst, shift + 1);
vstrhq_p(pDst, vecDst, p0);
}
}
#else
void arm_scale_q15(
const q15_t *pSrc,
q15_t scaleFract,
@ -138,6 +194,7 @@ void arm_scale_q15(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicScale group

@ -51,6 +51,58 @@
These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to 1.31 format.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_scale_q31(
const q31_t * pSrc,
q31_t scaleFract,
int8_t shift,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q31x4_t vecSrc;
q31x4_t vecDst;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
/*
* C = A * scale
* Scale the input and then store the result in the destination buffer.
*/
vecSrc = vld1q(pSrc);
vecDst = vmulhq(vecSrc, vdupq_n_s32(scaleFract));
vecDst = vqshlq_r(vecDst, shift + 1);
vst1q(pDst, vecDst);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrc += 4;
pDst += 4;
}
/*
* tail
*/
blkCnt = blockSize & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecSrc = vld1q(pSrc);
vecDst = vmulhq(vecSrc, vdupq_n_s32(scaleFract));
vecDst = vqshlq_r(vecDst, shift + 1);
vstrwq_p(pDst, vecDst, p0);
}
}
#else
void arm_scale_q31(
const q31_t *pSrc,
q31_t scaleFract,
@ -185,6 +237,7 @@ void arm_scale_q31(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicScale group

@ -51,6 +51,62 @@
These are multiplied to yield a 2.14 intermediate result and this is shifted with saturation to 1.7 format.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_scale_q7(
const q7_t * pSrc,
q7_t scaleFract,
int8_t shift,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q7x16_t vecSrc;
q7x16_t vecDst;
/* Compute 16 outputs at a time */
blkCnt = blockSize >> 4;
while (blkCnt > 0U)
{
/*
* C = A * scale
* Scale the input and then store the result in the destination buffer.
*/
vecSrc = vld1q(pSrc);
vecDst = vmulhq(vecSrc, vdupq_n_s8(scaleFract));
vecDst = vqshlq_r(vecDst, shift + 1);
vst1q(pDst, vecDst);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrc += 16;
pDst += 16;
}
/*
* tail
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp8q(blkCnt);
vecSrc = vld1q(pSrc);
vecDst = vmulhq(vecSrc, vdupq_n_s8(scaleFract));
vecDst = vqshlq_r(vecDst, shift + 1);
vstrbq_p(pDst, vecDst, p0);
}
}
#else
void arm_scale_q7(
const q7_t * pSrc,
q7_t scaleFract,
@ -123,6 +179,7 @@ void arm_scale_q7(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicScale group

@ -50,6 +50,55 @@
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_shift_q15(
const q15_t * pSrc,
int8_t shiftBits,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q15x8_t vecSrc;
q15x8_t vecDst;
/* Compute 8 outputs at a time */
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
/*
* C = A (>> or <<) shiftBits
* Shift the input and then store the result in the destination buffer.
*/
vecSrc = vld1q(pSrc);
vecDst = vqshlq_r(vecSrc, shiftBits);
vst1q(pDst, vecDst);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrc += 8;
pDst += 8;
}
/*
* tail
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecSrc = vld1q(pSrc);
vecDst = vqshlq_r(vecSrc, shiftBits);
vstrhq_p(pDst, vecDst, p0);
}
}
#else
void arm_shift_q15(
const q15_t * pSrc,
int8_t shiftBits,
@ -195,6 +244,7 @@ void arm_shift_q15(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicShift group

@ -67,6 +67,56 @@
Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_shift_q31(
const q31_t * pSrc,
int8_t shiftBits,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q31x4_t vecSrc;
q31x4_t vecDst;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
/*
* C = A (>> or <<) shiftBits
* Shift the input and then store the result in the destination buffer.
*/
vecSrc = vld1q((q31_t const *) pSrc);
vecDst = vqshlq_r(vecSrc, shiftBits);
vst1q(pDst, vecDst);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrc += 4;
pDst += 4;
}
/*
* tail
*/
blkCnt = blockSize & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecSrc = vld1q((q31_t const *) pSrc);
vecDst = vqshlq_r(vecSrc, shiftBits);
vstrwq_p(pDst, vecDst, p0);
}
}
#else
void arm_shift_q31(
const q31_t * pSrc,
int8_t shiftBits,
@ -175,6 +225,7 @@ void arm_shift_q31(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicShift group

@ -52,6 +52,55 @@
Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_shift_q7(
const q7_t * pSrc,
int8_t shiftBits,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q7x16_t vecSrc;
q7x16_t vecDst;
/* Compute 16 outputs at a time */
blkCnt = blockSize >> 4;
while (blkCnt > 0U)
{
/*
* C = A (>> or <<) shiftBits
* Shift the input and then store the result in the destination buffer.
*/
vecSrc = vld1q(pSrc);
vecDst = vqshlq_r(vecSrc, shiftBits);
vst1q(pDst, vecDst);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrc += 16;
pDst += 16;
}
/*
* tail
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp8q(blkCnt);
vecSrc = vld1q(pSrc);
vecDst = vqshlq_r(vecSrc, shiftBits);
vstrbq_p(pDst, vecDst, p0);
}
}
#else
void arm_shift_q7(
const q7_t * pSrc,
int8_t shiftBits,
@ -169,6 +218,7 @@ void arm_shift_q7(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicShift group

@ -58,6 +58,59 @@
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_sub_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
f32x4_t vec1;
f32x4_t vec2;
f32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A + B */
/* Add and then store the results in the destination buffer. */
vec1 = vld1q(pSrcA);
vec2 = vld1q(pSrcB);
res = vsubq(vec1, vec2);
vst1q(pDst, res);
/* Increment pointers */
pSrcA += 4;
pSrcB += 4;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* Tail */
blkCnt = blockSize & 0x3;
if (blkCnt > 0U)
{
/* C = A + B */
mve_pred16_t p0 = vctp32q(blkCnt);
vec1 = vld1q(pSrcA);
vec2 = vld1q(pSrcB);
vstrwq_p(pDst, vsubq(vec1,vec2), p0);
}
}
#else
void arm_sub_f32(
const float32_t * pSrcA,
const float32_t * pSrcB,
@ -66,10 +119,10 @@ void arm_sub_f32(
{
uint32_t blkCnt; /* Loop counter */
#if defined(ARM_MATH_NEON)
float32x4_t vec1;
float32x4_t vec2;
float32x4_t res;
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
f32x4_t vec1;
f32x4_t vec2;
f32x4_t res;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
@ -97,7 +150,7 @@ void arm_sub_f32(
blkCnt = blockSize & 0x3;
#else
#if defined (ARM_MATH_LOOPUNROLL)
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
@ -142,6 +195,7 @@ void arm_sub_f32(
}
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of BasicSub group

@ -50,6 +50,57 @@
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_sub_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q15x8_t vecA;
q15x8_t vecB;
/* Compute 8 outputs at a time */
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
/*
* C = A - B
* Subtract and then store the results in the destination buffer.
*/
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vst1q(pDst, vqsubq(vecA, vecB));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrcA += 8;
pSrcB += 8;
pDst += 8;
}
/*
* tail
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp16q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vstrhq_p(pDst, vqsubq(vecA, vecB), p0);
}
}
#else
void arm_sub_q15(
const q15_t * pSrcA,
const q15_t * pSrcB,
@ -120,6 +171,7 @@ void arm_sub_q15(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicSub group

@ -50,6 +50,56 @@
Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_sub_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
q31x4_t vecA;
q31x4_t vecB;
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
/*
* C = A + B
* Add and then store the results in the destination buffer.
*/
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vst1q(pDst, vqsubq(vecA, vecB));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrcA += 4;
pSrcB += 4;
pDst += 4;
}
/*
* tail
*/
blkCnt = blockSize & 3;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp32q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vstrwq_p(pDst, vqsubq(vecA, vecB), p0);
}
}
#else
void arm_sub_q31(
const q31_t * pSrcA,
const q31_t * pSrcB,
@ -102,6 +152,7 @@ void arm_sub_q31(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicSub group

@ -49,7 +49,55 @@
The function uses saturating arithmetic.
Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
*/
#if defined(ARM_MATH_MVEI)
#include "arm_helium_utils.h"
void arm_sub_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
q7_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
q7x16_t vecA;
q7x16_t vecB;
/* Compute 16 outputs at a time */
blkCnt = blockSize >> 4;
while (blkCnt > 0U)
{
/*
* C = A - B
* Subtract and then store the results in the destination buffer.
*/
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vst1q(pDst, vqsubq(vecA, vecB));
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
/*
* advance vector source and destination pointers
*/
pSrcA += 16;
pSrcB += 16;
pDst += 16;
}
/*
* tail
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0U)
{
mve_pred16_t p0 = vctp8q(blkCnt);
vecA = vld1q(pSrcA);
vecB = vld1q(pSrcB);
vstrbq_p(pDst, vqsubq(vecA, vecB), p0);
}
}
#else
void arm_sub_q7(
const q7_t * pSrcA,
const q7_t * pSrcB,
@ -103,6 +151,7 @@ void arm_sub_q7(
}
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of BasicSub group

@ -126,7 +126,7 @@
- Scaling of coefficients
- Overflow and saturation
@par Scaling of Coefficients
@par Scaling of Coefficients (fixed point versions)
Filter coefficients are represented as fractional values and
coefficients are restricted to lie in the range <code>[-1 +1)</code>.
The fixed-point functions have an additional scaling parameter <code>postShift</code>.
@ -135,7 +135,7 @@
allows the filter coefficients to exceed the range <code>[+1 -1)</code>.
The value of <code>postShift</code> is set by the user based on the expected gain through the system being modeled.
@par Overflow and Saturation
@par Overflow and Saturation (fixed point versions)
Overflow and saturation behavior of the fixed-point Q15 and Q31 versions are
described separately as part of the function specific documentation below.
*/

@ -139,6 +139,9 @@ else()
set(TESTSRC
Source/Tests/BasicTestsF32.cpp
Source/Tests/BasicTestsQ31.cpp
Source/Tests/BasicTestsQ15.cpp
Source/Tests/BasicTestsQ7.cpp
Source/Tests/SVMF32.cpp
Source/Tests/BayesF32.cpp
Source/Tests/TransformF32.cpp

@ -59,6 +59,7 @@ FPGA driver. Used to read a C array describing how to drive the test.
virtual void ImportPattern_f64(Testing::PatternID_t,char*,Testing::nbSamples_t nb);
virtual void ImportPattern_f32(Testing::PatternID_t,char*,Testing::nbSamples_t nb);
virtual void ImportPattern_q63(Testing::PatternID_t,char*,Testing::nbSamples_t nb);
virtual void ImportPattern_q31(Testing::PatternID_t,char*,Testing::nbSamples_t nb);
virtual void ImportPattern_q15(Testing::PatternID_t,char*,Testing::nbSamples_t nb);
virtual void ImportPattern_q7(Testing::PatternID_t,char*,Testing::nbSamples_t nb);
@ -73,6 +74,7 @@ FPGA driver. Used to read a C array describing how to drive the test.
virtual void DumpPattern_f64(Testing::outputID_t,Testing::nbSamples_t nb, float64_t* data);
virtual void DumpPattern_f32(Testing::outputID_t,Testing::nbSamples_t nb, float32_t* data);
virtual void DumpPattern_q63(Testing::outputID_t,Testing::nbSamples_t nb, q63_t* data);
virtual void DumpPattern_q31(Testing::outputID_t,Testing::nbSamples_t nb, q31_t* data);
virtual void DumpPattern_q15(Testing::outputID_t,Testing::nbSamples_t nb, q15_t* data);
virtual void DumpPattern_q7(Testing::outputID_t,Testing::nbSamples_t nb, q7_t* data);

@ -45,6 +45,9 @@ float64_t *loadPattern(Testing::PatternID_t id, PatternMgr *mgr,Testing::nbSampl
template <>
float32_t *loadPattern(Testing::PatternID_t id, PatternMgr *mgr,Testing::nbSamples_t &nb, Testing::nbSamples_t maxSamples);
template <>
q63_t *loadPattern(Testing::PatternID_t id, PatternMgr *mgr,Testing::nbSamples_t &nb, Testing::nbSamples_t maxSamples);
template <>
q31_t *loadPattern(Testing::PatternID_t id, PatternMgr *mgr,Testing::nbSamples_t &nb, Testing::nbSamples_t maxSamples);
@ -75,6 +78,9 @@ float64_t *localPattern(Testing::nbSamples_t nb, PatternMgr *mgr);
template <>
float32_t *localPattern(Testing::nbSamples_t nb, PatternMgr *mgr);
template <>
q63_t *localPattern(Testing::nbSamples_t nb, PatternMgr *mgr);
template <>
q31_t *localPattern(Testing::nbSamples_t nb, PatternMgr *mgr);
@ -95,6 +101,7 @@ uint8_t *localPattern(Testing::nbSamples_t nb, PatternMgr *mgr);
extern void dumpPattern(Testing::outputID_t id,Testing::nbSamples_t nb,float64_t* data,PatternMgr *mgr);
extern void dumpPattern(Testing::outputID_t id,Testing::nbSamples_t,float32_t*,PatternMgr *);
extern void dumpPattern(Testing::outputID_t id,Testing::nbSamples_t,q63_t*,PatternMgr *);
extern void dumpPattern(Testing::outputID_t id,Testing::nbSamples_t,q31_t*,PatternMgr *);
extern void dumpPattern(Testing::outputID_t id,Testing::nbSamples_t,q15_t*,PatternMgr *);
extern void dumpPattern(Testing::outputID_t id,Testing::nbSamples_t,q7_t*,PatternMgr *);

@ -63,6 +63,7 @@ Semihosting driver. Used to read a text file describing how to drive the test.
virtual void ImportPattern_f64(Testing::PatternID_t,char*,Testing::nbSamples_t nb=0);
virtual void ImportPattern_f32(Testing::PatternID_t,char*,Testing::nbSamples_t nb=0);
virtual void ImportPattern_q63(Testing::PatternID_t,char*,Testing::nbSamples_t nb=0);
virtual void ImportPattern_q31(Testing::PatternID_t,char*,Testing::nbSamples_t nb=0);
virtual void ImportPattern_q15(Testing::PatternID_t,char*,Testing::nbSamples_t nb=0);
virtual void ImportPattern_q7(Testing::PatternID_t,char*,Testing::nbSamples_t nb=0);
@ -78,6 +79,7 @@ Semihosting driver. Used to read a text file describing how to drive the test.
virtual void DumpPattern_f64(Testing::outputID_t,Testing::nbSamples_t nb, float64_t*);
virtual void DumpPattern_f32(Testing::outputID_t,Testing::nbSamples_t nb, float32_t*);
virtual void DumpPattern_q63(Testing::outputID_t,Testing::nbSamples_t nb, q63_t*);
virtual void DumpPattern_q31(Testing::outputID_t,Testing::nbSamples_t nb, q31_t*);
virtual void DumpPattern_q15(Testing::outputID_t,Testing::nbSamples_t nb, q15_t*);
virtual void DumpPattern_q7(Testing::outputID_t,Testing::nbSamples_t nb, q7_t*);

@ -280,6 +280,7 @@ API of Memory managers used in the test framework
*/
virtual void ImportPattern_f64(Testing::PatternID_t,char*,Testing::nbSamples_t nb=MAX_NB_SAMPLES)=0;
virtual void ImportPattern_f32(Testing::PatternID_t,char*,Testing::nbSamples_t nb=MAX_NB_SAMPLES)=0;
virtual void ImportPattern_q63(Testing::PatternID_t,char*,Testing::nbSamples_t nb=MAX_NB_SAMPLES)=0;
virtual void ImportPattern_q31(Testing::PatternID_t,char*,Testing::nbSamples_t nb=MAX_NB_SAMPLES)=0;
virtual void ImportPattern_q15(Testing::PatternID_t,char*,Testing::nbSamples_t nb=MAX_NB_SAMPLES)=0;
virtual void ImportPattern_q7(Testing::PatternID_t,char*,Testing::nbSamples_t nb=MAX_NB_SAMPLES)=0;
@ -308,6 +309,7 @@ API of Memory managers used in the test framework
*/
virtual void DumpPattern_f64(Testing::outputID_t,Testing::nbSamples_t nb, float64_t*)=0;
virtual void DumpPattern_f32(Testing::outputID_t,Testing::nbSamples_t nb, float32_t*)=0;
virtual void DumpPattern_q63(Testing::outputID_t,Testing::nbSamples_t nb, q63_t*)=0;
virtual void DumpPattern_q31(Testing::outputID_t,Testing::nbSamples_t nb, q31_t*)=0;
virtual void DumpPattern_q15(Testing::outputID_t,Testing::nbSamples_t nb, q15_t*)=0;
virtual void DumpPattern_q7(Testing::outputID_t,Testing::nbSamples_t nb, q7_t*)=0;
@ -386,6 +388,7 @@ public:
*/
float64_t *load_f64(Testing::PatternID_t,Testing::nbSamples_t&,Testing::nbSamples_t maxSamples=MAX_NB_SAMPLES);
float32_t *load_f32(Testing::PatternID_t,Testing::nbSamples_t&,Testing::nbSamples_t maxSamples=MAX_NB_SAMPLES);
q63_t *load_q63(Testing::PatternID_t,Testing::nbSamples_t&,Testing::nbSamples_t maxSamples=MAX_NB_SAMPLES);
q31_t *load_q31(Testing::PatternID_t,Testing::nbSamples_t&,Testing::nbSamples_t maxSamples=MAX_NB_SAMPLES);
q15_t *load_q15(Testing::PatternID_t,Testing::nbSamples_t&,Testing::nbSamples_t maxSamples=MAX_NB_SAMPLES);
q7_t *load_q7(Testing::PatternID_t,Testing::nbSamples_t&,Testing::nbSamples_t maxSamples=MAX_NB_SAMPLES);
@ -402,6 +405,7 @@ public:
*/
float64_t *local_f64(Testing::nbSamples_t);
float32_t *local_f32(Testing::nbSamples_t);
q63_t *local_q63(Testing::nbSamples_t);
q31_t *local_q31(Testing::nbSamples_t);
q15_t *local_q15(Testing::nbSamples_t);
q7_t *local_q7(Testing::nbSamples_t);
@ -416,6 +420,7 @@ public:
void dumpPattern_f64(Testing::outputID_t,Testing::nbSamples_t,float64_t*);
void dumpPattern_f32(Testing::outputID_t,Testing::nbSamples_t,float32_t*);
void dumpPattern_q63(Testing::outputID_t,Testing::nbSamples_t,q63_t*);
void dumpPattern_q31(Testing::outputID_t,Testing::nbSamples_t,q31_t*);
void dumpPattern_q15(Testing::outputID_t,Testing::nbSamples_t,q15_t*);
void dumpPattern_q7(Testing::outputID_t,Testing::nbSamples_t,q7_t*);

@ -187,8 +187,8 @@ float arm_snr_q15(q15_t *pRef, q15_t *pTest, uint32_t buffSize)
{
float EnergySignal = 0.0, EnergyError = 0.0;
uint32_t i;
float SNR;
float SNR;
float32_t testVal,refVal;
for (i = 0; i < buffSize; i++)
@ -331,6 +331,7 @@ void assert_snr_error(unsigned long nb,AnyPattern<q15_t> &pa,AnyPattern<q15_t> &
snr = arm_snr_q15(ptrA, ptrB, pa.nbSamples());
//printf("SNR = %f\n",snr);
if (snr < threshold)
{
@ -353,6 +354,7 @@ void assert_snr_error(unsigned long nb,AnyPattern<q7_t> &pa,AnyPattern<q7_t> &pb
snr = arm_snr_q7(ptrA, ptrB, pa.nbSamples());
//printf("SNR = %f\n",snr);
if (snr < threshold)
{

@ -96,6 +96,10 @@ namespace Client
delete(this->outputNames);
}
/** Read word 64 from C array
*/
/** Read word 32 from C array
*/
@ -531,6 +535,25 @@ namespace Client
}
void FPGA::ImportPattern_q63(Testing::PatternID_t id,char* p,Testing::nbSamples_t nb)
{
unsigned long offset,i;
offset=this->getPatternOffset(id);
const char *patternStart = this->m_patterns + offset;
const q63_t *src = (const q63_t*)patternStart;
q63_t *dst = (q63_t*)p;
if (dst)
{
for(i=0; i < nb; i++)
{
*dst++ = *src++;
}
}
}
void FPGA::ImportPattern_q31(Testing::PatternID_t id,char* p,Testing::nbSamples_t nb)
{
unsigned long offset,i;
@ -691,6 +714,25 @@ namespace Client
}
}
void FPGA::DumpPattern_q63(Testing::outputID_t id,Testing::nbSamples_t nb, q63_t* data)
{
std::string fileName = this->getOutputPath(id);
if (data)
{
printf("D: %s\n",fileName.c_str());
Testing::nbSamples_t i=0;
uint64_t t;
q63_t v;
for(i=0; i < nb; i++)
{
v = data[i];
t = (uint64_t)v;
printf("D: 0x%016llx\n",t);
}
printf("D: END\n");
}
}
void FPGA::DumpPattern_q31(Testing::outputID_t id,Testing::nbSamples_t nb, q31_t* data)
{
std::string fileName = this->getOutputPath(id);

@ -45,6 +45,12 @@ float32_t *loadPattern(Testing::PatternID_t id, Client::PatternMgr *mgr,Testing:
return(mgr->load_f32(id,nb,maxSamples));
}
template <>
q63_t *loadPattern(Testing::PatternID_t id, Client::PatternMgr *mgr,Testing::nbSamples_t &nb, Testing::nbSamples_t maxSamples)
{
return(mgr->load_q63(id,nb,maxSamples));
}
template <>
q31_t *loadPattern(Testing::PatternID_t id, Client::PatternMgr *mgr,Testing::nbSamples_t &nb, Testing::nbSamples_t maxSamples)
{
@ -94,6 +100,12 @@ float32_t *localPattern(Testing::PatternID_t id, Client::PatternMgr *mgr)
return(mgr->local_f32(id));
}
template <>
q63_t *localPattern(Testing::PatternID_t id, Client::PatternMgr *mgr)
{
return(mgr->local_q63(id));
}
template <>
q31_t *localPattern(Testing::PatternID_t id, Client::PatternMgr *mgr)
{
@ -140,6 +152,11 @@ void dumpPattern(Testing::outputID_t id,Testing::nbSamples_t nbSamples,float32_t
mgr->dumpPattern_f32(id,nbSamples,data);
}
void dumpPattern(Testing::outputID_t id,Testing::nbSamples_t nbSamples,q63_t* data,PatternMgr *mgr)
{
mgr->dumpPattern_q63(id,nbSamples,data);
}
void dumpPattern(Testing::outputID_t id,Testing::nbSamples_t nbSamples,q31_t* data,PatternMgr *mgr)
{
mgr->dumpPattern_q31(id,nbSamples,data);

@ -46,6 +46,7 @@ TYPE *PatternMgr::local_##EXT(Testing::nbSamples_t nbSamples) \
LOCAL(float64_t,f64)
LOCAL(float32_t,f32)
LOCAL(q63_t,q63)
LOCAL(q31_t,q31)
LOCAL(q15_t,q15)
LOCAL(q7_t,q7)
@ -90,6 +91,24 @@ float32_t *PatternMgr::load_f32(Testing::PatternID_t id,Testing::nbSamples_t& nb
}
q63_t *PatternMgr::load_q63(Testing::PatternID_t id,Testing::nbSamples_t& nbSamples,Testing::nbSamples_t maxSamples)
{
nbSamples=m_io->GetPatternSize(id);
if ((maxSamples != MAX_NB_SAMPLES) && (maxSamples < nbSamples))
{
nbSamples = maxSamples;
}
char *b = m_mem->NewBuffer(sizeof(q63_t)*nbSamples);
if (b != NULL)
{
m_io->ImportPattern_q63(id,b,nbSamples);
}
return((q63_t*)b);
}
q31_t *PatternMgr::load_q31(Testing::PatternID_t id,Testing::nbSamples_t& nbSamples,Testing::nbSamples_t maxSamples)
{
nbSamples=m_io->GetPatternSize(id);
@ -203,6 +222,11 @@ void PatternMgr::dumpPattern_f32(Testing::outputID_t id,Testing::nbSamples_t nbS
m_io->DumpPattern_f32(id,nbSamples,data);
}
void PatternMgr::dumpPattern_q63(Testing::outputID_t id,Testing::nbSamples_t nbSamples,q63_t* data)
{
m_io->DumpPattern_q63(id,nbSamples,data);
}
void PatternMgr::dumpPattern_q31(Testing::outputID_t id,Testing::nbSamples_t nbSamples,q31_t* data)
{
m_io->DumpPattern_q31(id,nbSamples,data);

@ -663,6 +663,44 @@ namespace Client
}
void Semihosting::ImportPattern_q63(Testing::PatternID_t id,char* p,Testing::nbSamples_t nb)
{
char tmp[256];
Testing::nbSamples_t len;
Testing::nbSamples_t i=0;
uint64_t val;
q63_t *ptr=(q63_t*)p;
std::string fileName = this->getPatternPath(id);
FILE *pattern=fopen(fileName.c_str(), "r");
// Ignore word size format
fgets(tmp,256,pattern);
// Get nb of samples
fgets(tmp,256,pattern);
len=atoi(tmp);
if ((nb != MAX_NB_SAMPLES) && (nb < len))
{
len = nb;
}
if (ptr)
{
for(i=0;i<len;i++)
{
// Ignore comment
fgets(tmp,256,pattern);
fscanf(pattern,"0x%016llX\n",&val);
*ptr = TOTYP(q63_t,val);
ptr++;
}
}
fclose(pattern);
}
void Semihosting::ImportPattern_q31(Testing::PatternID_t id,char* p,Testing::nbSamples_t nb)
{
char tmp[256];
@ -928,6 +966,24 @@ namespace Client
fclose(f);
}
}
void Semihosting::DumpPattern_q63(Testing::outputID_t id,Testing::nbSamples_t nb, q63_t* data)
{
std::string fileName = this->getOutputPath(id);
if (data)
{
FILE *f = fopen(fileName.c_str(),"w");
Testing::nbSamples_t i=0;
uint64_t t;
for(i=0; i < nb; i++)
{
t = (uint64_t)data[i];
fprintf(f,"0x%016llx\n",t);
}
fclose(f);
}
}
void Semihosting::DumpPattern_q31(Testing::outputID_t id,Testing::nbSamples_t nb, q31_t* data)
{
std::string fileName = this->getOutputPath(id);

@ -12,10 +12,15 @@ class BasicMathsBenchmarksF32:public Client::Suite
Client::Pattern<float32_t> input2;
Client::LocalPattern<float32_t> output;
Client::RefPattern<float32_t> ref;
int nb;
float32_t *inp1;
float32_t *inp2;
float32_t *outp;
float32_t *refp;
};

@ -0,0 +1,25 @@
#include "Test.h"
#include "Pattern.h"
class BasicTestsQ15:public Client::Suite
{
public:
BasicTestsQ15(Testing::testID_t id);
virtual void setUp(Testing::testID_t,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr);
virtual void tearDown(Testing::testID_t,Client::PatternMgr *mgr);
private:
#include "BasicTestsQ15_decl.h"
Client::Pattern<q15_t> input1;
Client::Pattern<q15_t> input2;
Client::LocalPattern<q15_t> output;
Client::LocalPattern<q63_t> dotOutput;
// Reference patterns are not loaded when we are in dump mode
Client::RefPattern<q15_t> ref;
Client::RefPattern<q63_t> dotRef;
/* Offset or scale value */
q15_t scalar;
};

@ -0,0 +1,25 @@
#include "Test.h"
#include "Pattern.h"
class BasicTestsQ31:public Client::Suite
{
public:
BasicTestsQ31(Testing::testID_t id);
virtual void setUp(Testing::testID_t,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr);
virtual void tearDown(Testing::testID_t,Client::PatternMgr *mgr);
private:
#include "BasicTestsQ31_decl.h"
Client::Pattern<q31_t> input1;
Client::Pattern<q31_t> input2;
Client::LocalPattern<q31_t> output;
Client::LocalPattern<q63_t> dotOutput;
// Reference patterns are not loaded when we are in dump mode
Client::RefPattern<q31_t> ref;
Client::RefPattern<q63_t> dotRef;
/* Offset or scale value */
q31_t scalar;
};

@ -0,0 +1,25 @@
#include "Test.h"
#include "Pattern.h"
class BasicTestsQ7:public Client::Suite
{
public:
BasicTestsQ7(Testing::testID_t id);
virtual void setUp(Testing::testID_t,std::vector<Testing::param_t>& params,Client::PatternMgr *mgr);
virtual void tearDown(Testing::testID_t,Client::PatternMgr *mgr);
private:
#include "BasicTestsQ7_decl.h"
Client::Pattern<q7_t> input1;
Client::Pattern<q7_t> input2;
Client::LocalPattern<q7_t> output;
Client::LocalPattern<q31_t> dotOutput;
// Reference patterns are not loaded when we are in dump mode
Client::RefPattern<q7_t> ref;
Client::RefPattern<q31_t> dotRef;
/* Offset or scale value */
q7_t scalar;
};

@ -7,7 +7,7 @@ import Tools
# Those patterns are used for tests and benchmarks.
# For tests, there is the need to add tests for saturation
def writeTests(config):
def writeTests(config,format):
NBSAMPLES=256
data1=np.random.randn(NBSAMPLES)
@ -39,16 +39,31 @@ def writeTests(config):
config.writeReference(6, ref)
nb = 3
ref = np.array([np.dot(data1[0:nb] ,data2[0:nb])])
config.writeReference(7, ref)
ref = np.array([np.dot(data1[0:nb] ,data2[0:nb])]) / 2**15
if format == 31 or format == 15:
config.writeReferenceQ63(7, ref)
elif format == 7:
config.writeReferenceQ31(7, ref)
else:
config.writeReference(7, ref)
nb = 8
ref = np.array([np.dot(data1[0:nb] ,data2[0:nb])])
config.writeReference(8, ref)
ref = np.array([np.dot(data1[0:nb] ,data2[0:nb])]) / 2**15
if format == 31 or format == 15:
config.writeReferenceQ63(8, ref)
elif format == 7:
config.writeReferenceQ31(8, ref)
else:
config.writeReference(8, ref)
nb = 9
ref = np.array([np.dot(data1[0:nb] ,data2[0:nb])])
config.writeReference(9, ref)
ref = np.array([np.dot(data1[0:nb] ,data2[0:nb])]) / 2**15
if format == 31 or format == 15:
config.writeReferenceQ63(9, ref)
elif format == 7:
config.writeReferenceQ31(9, ref)
else:
config.writeReference(9, ref)
ref = abs(data1)
config.writeReference(10, ref)
@ -56,6 +71,88 @@ def writeTests(config):
ref = np.array([np.dot(data1 ,data2)])
config.writeReference(11, ref)
return(11)
def writeTestsWithSat(config,format):
if format == 31:
NBSAMPLES=9
if format == 15:
NBSAMPLES=17
if format == 7:
NBSAMPLES=33
nb = writeTests(config,format)
data1 = np.full(NBSAMPLES, 2**format - 1)
data1[1::2] = 2
data2 = np.full(NBSAMPLES, -2**format)
data2[1::2] = -2
datar=np.random.randn(NBSAMPLES)
datar = datar/max(datar)
datar = datar / 3.0 # Because used to test shift of 2 without saturation
config.writeInput(12, datar)
if format == 31:
config.writeInputS32(12,data1-1,"MaxPosInput")
config.writeInputS32(12,data2+1,"MaxNegInput")
config.writeInputS32(12,data2,"MaxNeg2Input")
if format == 15:
config.writeInputS16(12,data1-1,"MaxPosInput")
config.writeInputS16(12,data2+1,"MaxNegInput")
config.writeInputS16(12,data2,"MaxNeg2Input")
if format == 7:
config.writeInputS8(12,data1-1,"MaxPosInput")
config.writeInputS8(12,data2+1,"MaxNegInput")
config.writeInputS8(12,data2,"MaxNeg2Input")
d1 = 1.0*(data1-1) / 2**format
d2 = 1.0*(data2+1) / 2**format
d3 = 1.0*(data2) / 2**format
ref = d1 + d1
config.writeReference(nb+1, ref,"PosSat")
ref = d2 + d2
config.writeReference(nb+2, ref,"NegSat")
d1 = 1.0*(data1-1) / 2**format
d2 = 1.0*(data2+1) / 2**format
ref = d1 - d2
config.writeReference(nb+3, ref,"PosSat")
ref = d2 - d1
config.writeReference(nb+4, ref,"NegSat")
ref = d3*d3
config.writeReference(nb+5, ref,"PosSat")
ref = -d3
config.writeReference(nb+6, ref,"PosSat")
ref = d1 + 0.9
config.writeReference(nb+7, ref,"PosSat")
ref = d2 - 0.9
config.writeReference(nb+8, ref,"NegSat")
ref = d3 * d3[0]
config.writeReference(nb+9, ref,"PosSat")
ref = datar * 2.0
config.writeReference(nb+10, ref,"Shift")
ref = d1 * 2.0
config.writeReference(nb+11, ref,"Shift")
ref = d2 * 2.0
config.writeReference(nb+12, ref,"Shift")
PATTERNDIR = os.path.join("Patterns","DSP","BasicMaths","BasicMaths")
PARAMDIR = os.path.join("Parameters","DSP","BasicMaths","BasicMaths")
@ -67,10 +164,10 @@ configq7=Tools.Config(PATTERNDIR,PARAMDIR,"q7")
writeTests(configf32)
writeTests(configq31)
writeTests(configq15)
writeTests(configq7)
#writeTests(configf32,0)
writeTestsWithSat(configq31,31)
writeTestsWithSat(configq15,15)
writeTestsWithSat(configq7,7)
# Params just as example
someLists=[[1,3,5],[1,3,5],[1,3,5]]

@ -52,6 +52,14 @@ def float64_to_hex(f):
"""
return hex(struct.unpack('<Q', struct.pack('<d', f))[0])
def to_q63(v):
r = int(round(v * 2**63))
if (r > 0x07FFFFFFFFFFFFFFF):
r = 0x07FFFFFFFFFFFFFFF
if (r < -0x08000000000000000):
r = -0x08000000000000000
return ("0x%s" % format(struct.unpack('<Q', struct.pack('<q', r))[0],'016X'))
def to_q31(v):
r = int(round(v * 2**31))
if (r > 0x07FFFFFFF):
@ -113,6 +121,21 @@ class Config:
else:
return(os.path.join(self._patternDir,"Input%d_%s.txt" % (i,self._ext)))
def inputS32P(self,i,name=None):
""" Path to a reference pattern from the ID
Args:
i (int): ID to the reference pattern
Raises:
Nothing
Returns:
str : path to the file where to generate the pattern data
"""
if name:
return(os.path.join(self._patternDir,"%s%d_%s.txt" % (name,i,"s32")))
else:
return(os.path.join(self._patternDir,"Input%d_%s.txt" % (i,"s32")))
def inputS16P(self,i,name=None):
""" Path to a reference pattern from the ID
@ -128,6 +151,21 @@ class Config:
else:
return(os.path.join(self._patternDir,"Input%d_%s.txt" % (i,"s16")))
def inputS8P(self,i,name=None):
""" Path to a reference pattern from the ID
Args:
i (int): ID to the reference pattern
Raises:
Nothing
Returns:
str : path to the file where to generate the pattern data
"""
if name:
return(os.path.join(self._patternDir,"%s%d_%s.txt" % (name,i,"s8")))
else:
return(os.path.join(self._patternDir,"Input%d_%s.txt" % (i,"s8")))
def inputQ31P(self,i,name=None):
""" Path to a reference pattern from the ID
@ -248,6 +286,36 @@ class Config:
else:
return(os.path.join(self._patternDir,"Reference%d_%s.txt" % (i,"s32")))
def refQ63P(self,i,name=None):
""" Path to a reference pattern from the ID
Args:
i (int): ID to the reference pattern
Raises:
Nothing
Returns:
str : path to the file where to generate the pattern data
"""
if name:
return(os.path.join(self._patternDir,"%s%d_%s.txt" % (name,i,"q63")))
else:
return(os.path.join(self._patternDir,"Reference%d_%s.txt" % (i,"q63")))
def refQ31P(self,i,name=None):
""" Path to a reference pattern from the ID
Args:
i (int): ID to the reference pattern
Raises:
Nothing
Returns:
str : path to the file where to generate the pattern data
"""
if name:
return(os.path.join(self._patternDir,"%s%d_%s.txt" % (name,i,"q31")))
else:
return(os.path.join(self._patternDir,"Reference%d_%s.txt" % (i,"q31")))
def refF32P(self,i,name=None):
""" Path to a reference pattern from the ID
@ -328,6 +396,31 @@ class Config:
f.write("// %f\n" % v)
f.write("%s\n" % float_to_hex(v))
def _writeVectorQ63(self,i,data):
""" Write pattern data
The format is recognized by the text framework script.
First line is the sample width (B,H or W for 8,16 or 32 bits)
Second line is number of samples
Other lines are hexadecimal representation of the samples in format
which can be read on big endian ARM.
Args:
j (int): ID of pattern file
data (array): Vector containing the data
Raises:
Nothing
Returns:
Nothing
"""
with open(i,"w") as f:
# Write sample dimension nb sample header
#np.savetxt(i, data, newline="\n", header="W\n%d" % len(data),comments ="" )
f.write("D\n%d\n" % len(data))
for v in data:
f.write("// %f\n" % v)
f.write("%s\n" % to_q63(v))
def _writeVectorQ31(self,i,data):
""" Write pattern data
@ -508,6 +601,8 @@ class Config:
self._writeVectorF64(self.refP(j,name),data)
if (self._ext == "f32"):
self._writeVectorF32(self.refP(j,name),data)
if (self._ext == "q63"):
self._writeVectorQ63(self.refP(j,name),data)
if (self._ext == "q31"):
self._writeVectorQ31(self.refP(j,name),data)
if (self._ext == "q15"):
@ -519,6 +614,12 @@ class Config:
if (self._ext == "s8"):
self._writeVectorS8(self.refP(j,name),data)
def writeReferenceQ63(self,j,data,name=None):
self._writeVectorQ63(self.refQ63P(j,name),data)
def writeReferenceQ31(self,j,data,name=None):
self._writeVectorQ31(self.refQ31P(j,name),data)
def writeReferenceS8(self,j,data,name=None):
self._writeVectorS8(self.refS8P(j,name),data)
@ -556,9 +657,15 @@ class Config:
def writeInputQ7(self,j,data,name=None):
self._writeVectorQ7(self.inputQ7P(j,name),data)
def writeInputS32(self,j,data,name=None):
self._writeVectorS32(self.inputS32P(j,name),data)
def writeInputS16(self,j,data,name=None):
self._writeVectorS16(self.inputS16P(j,name),data)
def writeInputS8(self,j,data,name=None):
self._writeVectorS8(self.inputS8P(j,name),data)
def writeInputU32(self,j,data,name=None):
self._writeVectorU32(self.inputU32P(j,name),data)

@ -0,0 +1,36 @@
H
17
// -0.127026
0xEFBE
// 0.135563
0x115A
// -0.055957
0xF8D6
// 0.005012
0x00A4
// 0.049539
0x0657
// 0.143211
0x1255
// 0.041455
0x054E
// -0.054525
0xF905
// 0.016068
0x020F
// -0.120403
0xF097
// 0.097939
0x0C89
// -0.110690
0xF1D5
// 0.333333
0x2AAB
// 0.004649
0x0098
// 0.090070
0x0B87
// 0.027590
0x0388
// 0.058612
0x0781

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,36 @@
H
17
// -32768
0x8000
// -2
0xFFFE
// -32768
0x8000
// -2
0xFFFE
// -32768
0x8000
// -2
0xFFFE
// -32768
0x8000
// -2
0xFFFE
// -32768
0x8000
// -2
0xFFFE
// -32768
0x8000
// -2
0xFFFE
// -32768
0x8000
// -2
0xFFFE
// -32768
0x8000
// -2
0xFFFE
// -32768
0x8000

@ -0,0 +1,36 @@
H
17
// -32767
0x8001
// -1
0xFFFF
// -32767
0x8001
// -1
0xFFFF
// -32767
0x8001
// -1
0xFFFF
// -32767
0x8001
// -1
0xFFFF
// -32767
0x8001
// -1
0xFFFF
// -32767
0x8001
// -1
0xFFFF
// -32767
0x8001
// -1
0xFFFF
// -32767
0x8001
// -1
0xFFFF
// -32767
0x8001

@ -0,0 +1,36 @@
H
17
// 32766
0x7FFE
// 1
0x0001
// 32766
0x7FFE
// 1
0x0001
// 32766
0x7FFE
// 1
0x0001
// 32766
0x7FFE
// 1
0x0001
// 32766
0x7FFE
// 1
0x0001
// 32766
0x7FFE
// 1
0x0001
// 32766
0x7FFE
// 1
0x0001
// 32766
0x7FFE
// 1
0x0001
// 32766
0x7FFE

@ -0,0 +1,36 @@
H
17
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000

@ -0,0 +1,36 @@
H
17
// -1.999908
0x8000
// -0.000061
0xFFFE
// -1.999908
0x8000
// -0.000061
0xFFFE
// -1.999908
0x8000
// -0.000061
0xFFFE
// -1.999908
0x8000
// -0.000061
0xFFFE
// -1.999908
0x8000
// -0.000061
0xFFFE
// -1.999908
0x8000
// -0.000061
0xFFFE
// -1.999908
0x8000
// -0.000061
0xFFFE
// -1.999908
0x8000
// -0.000061
0xFFFE
// -1.999908
0x8000

@ -0,0 +1,36 @@
H
17
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000

@ -0,0 +1,36 @@
H
17
// -1.899969
0x8000
// -0.900031
0x8CCC
// -1.899969
0x8000
// -0.900031
0x8CCC
// -1.899969
0x8000
// -0.900031
0x8CCC
// -1.899969
0x8000
// -0.900031
0x8CCC
// -1.899969
0x8000
// -0.900031
0x8CCC
// -1.899969
0x8000
// -0.900031
0x8CCC
// -1.899969
0x8000
// -0.900031
0x8CCC
// -1.899969
0x8000
// -0.900031
0x8CCC
// -1.899969
0x8000

@ -0,0 +1,36 @@
H
17
// -1.900000
0x8000
// -0.900061
0x8CCB
// -1.900000
0x8000
// -0.900061
0x8CCB
// -1.900000
0x8000
// -0.900061
0x8CCB
// -1.900000
0x8000
// -0.900061
0x8CCB
// -1.900000
0x8000
// -0.900061
0x8CCB
// -1.900000
0x8000
// -0.900061
0x8CCB
// -1.900000
0x8000
// -0.900061
0x8CCB
// -1.900000
0x8000
// -0.900061
0x8CCB
// -1.900000
0x8000

@ -0,0 +1,36 @@
H
17
// 1.999878
0x7FFF
// 0.000061
0x0002
// 1.999878
0x7FFF
// 0.000061
0x0002
// 1.999878
0x7FFF
// 0.000061
0x0002
// 1.999878
0x7FFF
// 0.000061
0x0002
// 1.999878
0x7FFF
// 0.000061
0x0002
// 1.999878
0x7FFF
// 0.000061
0x0002
// 1.999878
0x7FFF
// 0.000061
0x0002
// 1.999878
0x7FFF
// 0.000061
0x0002
// 1.999878
0x7FFF

@ -0,0 +1,36 @@
H
17
// 1.999908
0x7FFF
// 0.000061
0x0002
// 1.999908
0x7FFF
// 0.000061
0x0002
// 1.999908
0x7FFF
// 0.000061
0x0002
// 1.999908
0x7FFF
// 0.000061
0x0002
// 1.999908
0x7FFF
// 0.000061
0x0002
// 1.999908
0x7FFF
// 0.000061
0x0002
// 1.999908
0x7FFF
// 0.000061
0x0002
// 1.999908
0x7FFF
// 0.000061
0x0002
// 1.999908
0x7FFF

@ -0,0 +1,36 @@
H
17
// 1.000000
0x7FFF
// 0.000000
0x0000
// 1.000000
0x7FFF
// 0.000000
0x0000
// 1.000000
0x7FFF
// 0.000000
0x0000
// 1.000000
0x7FFF
// 0.000000
0x0000
// 1.000000
0x7FFF
// 0.000000
0x0000
// 1.000000
0x7FFF
// 0.000000
0x0000
// 1.000000
0x7FFF
// 0.000000
0x0000
// 1.000000
0x7FFF
// 0.000000
0x0000
// 1.000000
0x7FFF

@ -0,0 +1,36 @@
H
17
// 1.000000
0x7FFF
// 0.000061
0x0002
// 1.000000
0x7FFF
// 0.000061
0x0002
// 1.000000
0x7FFF
// 0.000061
0x0002
// 1.000000
0x7FFF
// 0.000061
0x0002
// 1.000000
0x7FFF
// 0.000061
0x0002
// 1.000000
0x7FFF
// 0.000061
0x0002
// 1.000000
0x7FFF
// 0.000061
0x0002
// 1.000000
0x7FFF
// 0.000061
0x0002
// 1.000000
0x7FFF

@ -0,0 +1,36 @@
H
17
// 1.899939
0x7FFF
// 0.900031
0x7334
// 1.899939
0x7FFF
// 0.900031
0x7334
// 1.899939
0x7FFF
// 0.900031
0x7334
// 1.899939
0x7FFF
// 0.900031
0x7334
// 1.899939
0x7FFF
// 0.900031
0x7334
// 1.899939
0x7FFF
// 0.900031
0x7334
// 1.899939
0x7FFF
// 0.900031
0x7334
// 1.899939
0x7FFF
// 0.900031
0x7334
// 1.899939
0x7FFF

@ -0,0 +1,36 @@
H
17
// 1.899939
0x7FFF
// 0.900031
0x7334
// 1.899939
0x7FFF
// 0.900031
0x7334
// 1.899939
0x7FFF
// 0.900031
0x7334
// 1.899939
0x7FFF
// 0.900031
0x7334
// 1.899939
0x7FFF
// 0.900031
0x7334
// 1.899939
0x7FFF
// 0.900031
0x7334
// 1.899939
0x7FFF
// 0.900031
0x7334
// 1.899939
0x7FFF
// 0.900031
0x7334
// 1.899939
0x7FFF

@ -0,0 +1,36 @@
H
17
// 1.000000
0x7FFF
// 0.000061
0x0002
// 1.000000
0x7FFF
// 0.000061
0x0002
// 1.000000
0x7FFF
// 0.000061
0x0002
// 1.000000
0x7FFF
// 0.000061
0x0002
// 1.000000
0x7FFF
// 0.000061
0x0002
// 1.000000
0x7FFF
// 0.000061
0x0002
// 1.000000
0x7FFF
// 0.000061
0x0002
// 1.000000
0x7FFF
// 0.000061
0x0002
// 1.000000
0x7FFF

@ -1,4 +1,4 @@
H
1
// 0.049476
0x0655
// 0.000003
0x0000

@ -1,4 +1,4 @@
H
1
// 0.273481
0x2301
// 0.000008
0x0000

@ -1,4 +1,4 @@
H
1
// 0.308351
0x2778
// 0.000016
0x0001

@ -0,0 +1,36 @@
H
17
// -0.254051
0xDF7B
// 0.271125
0x22B4
// -0.111913
0xF1AD
// 0.010024
0x0148
// 0.099078
0x0CAF
// 0.286423
0x24AA
// 0.082909
0x0A9D
// -0.109049
0xF20B
// 0.032135
0x041D
// -0.240805
0xE12D
// 0.195877
0x1913
// -0.221380
0xE3AA
// 0.666667
0x5555
// 0.009299
0x0131
// 0.180140
0x170F
// 0.055180
0x0710
// 0.117224
0x0F01

@ -0,0 +1,36 @@
H
17
// 1.999878
0x7FFF
// 0.000061
0x0002
// 1.999878
0x7FFF
// 0.000061
0x0002
// 1.999878
0x7FFF
// 0.000061
0x0002
// 1.999878
0x7FFF
// 0.000061
0x0002
// 1.999878
0x7FFF
// 0.000061
0x0002
// 1.999878
0x7FFF
// 0.000061
0x0002
// 1.999878
0x7FFF
// 0.000061
0x0002
// 1.999878
0x7FFF
// 0.000061
0x0002
// 1.999878
0x7FFF

@ -0,0 +1,36 @@
H
17
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000
// -0.000061
0xFFFE
// -1.999939
0x8000

@ -0,0 +1,20 @@
W
9
// -0.085318
0xF5144ACE
// -0.193783
0xE7321E29
// -0.014971
0xFE156F8A
// 0.209494
0x1AD0AF9C
// -0.112886
0xF18CF0A8
// 0.333333
0x2AAAAAAB
// 0.221288
0x1C5327BE
// 0.021019
0x02B0BCF5
// 0.158600
0x144D0407

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,20 @@
W
9
// -2147483648
0x80000000
// -2
0xFFFFFFFE
// -2147483648
0x80000000
// -2
0xFFFFFFFE
// -2147483648
0x80000000
// -2
0xFFFFFFFE
// -2147483648
0x80000000
// -2
0xFFFFFFFE
// -2147483648
0x80000000

@ -0,0 +1,20 @@
W
9
// -2147483647
0x80000001
// -1
0xFFFFFFFF
// -2147483647
0x80000001
// -1
0xFFFFFFFF
// -2147483647
0x80000001
// -1
0xFFFFFFFF
// -2147483647
0x80000001
// -1
0xFFFFFFFF
// -2147483647
0x80000001

@ -0,0 +1,20 @@
W
9
// 2147483646
0x7FFFFFFE
// 1
0x00000001
// 2147483646
0x7FFFFFFE
// 1
0x00000001
// 2147483646
0x7FFFFFFE
// 1
0x00000001
// 2147483646
0x7FFFFFFE
// 1
0x00000001
// 2147483646
0x7FFFFFFE

@ -0,0 +1,20 @@
W
9
// -2.000000
0x80000000
// -0.000000
0xFFFFFFFE
// -2.000000
0x80000000
// -0.000000
0xFFFFFFFE
// -2.000000
0x80000000
// -0.000000
0xFFFFFFFE
// -2.000000
0x80000000
// -0.000000
0xFFFFFFFE
// -2.000000
0x80000000

@ -0,0 +1,20 @@
W
9
// -2.000000
0x80000000
// -0.000000
0xFFFFFFFE
// -2.000000
0x80000000
// -0.000000
0xFFFFFFFE
// -2.000000
0x80000000
// -0.000000
0xFFFFFFFE
// -2.000000
0x80000000
// -0.000000
0xFFFFFFFE
// -2.000000
0x80000000

@ -0,0 +1,20 @@
W
9
// -1.900000
0x80000000
// -0.900000
0x8CCCCCCC
// -1.900000
0x80000000
// -0.900000
0x8CCCCCCC
// -1.900000
0x80000000
// -0.900000
0x8CCCCCCC
// -1.900000
0x80000000
// -0.900000
0x8CCCCCCC
// -1.900000
0x80000000

@ -0,0 +1,20 @@
W
9
// 2.000000
0x7FFFFFFF
// 0.000000
0x00000002
// 2.000000
0x7FFFFFFF
// 0.000000
0x00000002
// 2.000000
0x7FFFFFFF
// 0.000000
0x00000002
// 2.000000
0x7FFFFFFF
// 0.000000
0x00000002
// 2.000000
0x7FFFFFFF

@ -0,0 +1,20 @@
W
9
// 2.000000
0x7FFFFFFF
// 0.000000
0x00000002
// 2.000000
0x7FFFFFFF
// 0.000000
0x00000002
// 2.000000
0x7FFFFFFF
// 0.000000
0x00000002
// 2.000000
0x7FFFFFFF
// 0.000000
0x00000002
// 2.000000
0x7FFFFFFF

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save