From da2a055e21c50590858ff7396f5a7c648b2d9efa Mon Sep 17 00:00:00 2001 From: Christophe Favergeon Date: Thu, 28 Jul 2022 08:14:36 +0200 Subject: [PATCH] Improved speed of atan2 for q15 and q31 versions. Inlined some CMSIS-DSP functions (since they were used on vectors of only 1 sample). --- Source/FastMathFunctions/arm_atan2_q15.c | 44 +++++++++++++++++++++--- Source/FastMathFunctions/arm_atan2_q31.c | 44 +++++++++++++++++++++--- 2 files changed, 78 insertions(+), 10 deletions(-) diff --git a/Source/FastMathFunctions/arm_atan2_q15.c b/Source/FastMathFunctions/arm_atan2_q15.c index ddf98d8c..513d2a76 100755 --- a/Source/FastMathFunctions/arm_atan2_q15.c +++ b/Source/FastMathFunctions/arm_atan2_q15.c @@ -79,14 +79,26 @@ __STATIC_FORCEINLINE q15_t arm_atan_q15(q15_t y,q15_t x) if (y<0) { - arm_negate_q15(&y,&y,1); + /* Negate y */ +#if defined (ARM_MATH_DSP) + y = __QSUB16(0, y); +#else + y = (y == (q15_t) 0x8000) ? (q15_t) 0x7fff : -y; +#endif + sign=1-sign; } if (x < 0) { sign=1 - sign; - arm_negate_q15(&x,&x,1); + + /* Negate x */ +#if defined (ARM_MATH_DSP) + x = __QSUB16(0, x); +#else + x = (x == (q15_t) 0x8000) ? (q15_t) 0x7fff : -x; +#endif } if (y > x) @@ -96,7 +108,15 @@ __STATIC_FORCEINLINE q15_t arm_atan_q15(q15_t y,q15_t x) arm_divide_q15(x,y,&ratio,&shift); - arm_shift_q15(&ratio,shift,&ratio,1); + /* Shift ratio by shift */ + if (shift >=0) + { + ratio = __SSAT(((q31_t) ratio << shift), 16); + } + else + { + ratio = (ratio >> -shift); + } res = PIHALFQ13 - arm_atan_limited_q15(ratio); @@ -108,7 +128,16 @@ __STATIC_FORCEINLINE q15_t arm_atan_q15(q15_t y,q15_t x) arm_divide_q15(y,x,&ratio,&shift); - arm_shift_q15(&ratio,shift,&ratio,1); + /* Shift ratio by shift */ + if (shift >=0) + { + ratio = __SSAT(((q31_t) ratio << shift), 16); + } + else + { + ratio = (ratio >> -shift); + } + res = arm_atan_limited_q15(ratio); @@ -117,7 +146,12 @@ __STATIC_FORCEINLINE q15_t arm_atan_q15(q15_t y,q15_t x) if (sign) { - arm_negate_q15(&res,&res,1); + /* Negate res */ +#if defined (ARM_MATH_DSP) + res = __QSUB16(0, res); +#else + res = (res == (q15_t) 0x8000) ? (q15_t) 0x7fff : -res; +#endif } return(res); diff --git a/Source/FastMathFunctions/arm_atan2_q31.c b/Source/FastMathFunctions/arm_atan2_q31.c index 1c83e436..addbe973 100755 --- a/Source/FastMathFunctions/arm_atan2_q31.c +++ b/Source/FastMathFunctions/arm_atan2_q31.c @@ -80,14 +80,26 @@ __STATIC_FORCEINLINE q31_t arm_atan_q31(q31_t y,q31_t x) if (y<0) { - arm_negate_q31(&y,&y,1); + /* Negate y */ +#if defined (ARM_MATH_DSP) + y = __QSUB(0, y); +#else + y = (y == INT32_MIN) ? INT32_MAX : -y; +#endif + sign=1-sign; } if (x < 0) { sign=1 - sign; - arm_negate_q31(&x,&x,1); + + /* Negate x */ +#if defined (ARM_MATH_DSP) + x = __QSUB(0, x); +#else + x = (x == INT32_MIN) ? INT32_MAX : -x; +#endif } if (y > x) @@ -97,7 +109,15 @@ __STATIC_FORCEINLINE q31_t arm_atan_q31(q31_t y,q31_t x) arm_divide_q31(x,y,&ratio,&shift); - arm_shift_q31(&ratio,shift,&ratio,1); + /* Shift ratio by shift */ + if (shift >= 0) + { + ratio = clip_q63_to_q31((q63_t) ratio << shift); + } + else + { + ratio = (ratio >> -shift); + } res = PIHALF_Q29 - arm_atan_limited_q31(ratio); @@ -109,7 +129,16 @@ __STATIC_FORCEINLINE q31_t arm_atan_q31(q31_t y,q31_t x) arm_divide_q31(y,x,&ratio,&shift); - arm_shift_q31(&ratio,shift,&ratio,1); + /* Shift ratio by shift */ + if (shift >= 0) + { + ratio = clip_q63_to_q31((q63_t) ratio << shift); + } + else + { + ratio = (ratio >> -shift); + } + res = arm_atan_limited_q31(ratio); @@ -118,7 +147,12 @@ __STATIC_FORCEINLINE q31_t arm_atan_q31(q31_t y,q31_t x) if (sign) { - arm_negate_q31(&res,&res,1); + /* Negate res */ +#if defined (ARM_MATH_DSP) + res = __QSUB(0, res); +#else + res = (res == INT32_MIN) ? INT32_MAX : -res; +#endif } return(res);