CMSIS-DSP: Corrected compilation warnings

More compilation tests done with AC5
pull/19/head
Christophe Favergeon 6 years ago
parent 8526741f9e
commit dc0be10d4e

@ -1,371 +1,371 @@
/****************************************************************************** /******************************************************************************
* @file arm_vec_math.h * @file arm_vec_math.h
* @brief Public header file for CMSIS DSP Library * @brief Public header file for CMSIS DSP Library
* @version V1.7.0 * @version V1.7.0
* @date 15. October 2019 * @date 15. October 2019
******************************************************************************/ ******************************************************************************/
/* /*
* Copyright (c) 2010-2019 Arm Limited or its affiliates. All rights reserved. * Copyright (c) 2010-2019 Arm Limited or its affiliates. All rights reserved.
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
* *
* Licensed under the Apache License, Version 2.0 (the License); you may * Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License. * not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* www.apache.org/licenses/LICENSE-2.0 * www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT * distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
#ifndef _ARM_VEC_MATH_H #ifndef _ARM_VEC_MATH_H
#define _ARM_VEC_MATH_H #define _ARM_VEC_MATH_H
#include "arm_math.h" #include "arm_math.h"
#include "arm_common_tables.h" #include "arm_common_tables.h"
#include "arm_helium_utils.h" #include "arm_helium_utils.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" extern "C"
{ {
#endif #endif
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) #if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
#define INV_NEWTON_INIT_F32 0x7EF127EA #define INV_NEWTON_INIT_F32 0x7EF127EA
static const float32_t __logf_rng_f32=0.693147180f; static const float32_t __logf_rng_f32=0.693147180f;
/* fast inverse approximation (3x newton) */ /* fast inverse approximation (3x newton) */
__STATIC_INLINE f32x4_t vrecip_medprec_f32( __STATIC_INLINE f32x4_t vrecip_medprec_f32(
f32x4_t x) f32x4_t x)
{ {
q31x4_t m; q31x4_t m;
f32x4_t b; f32x4_t b;
any32x4_t xinv; any32x4_t xinv;
f32x4_t ax = vabsq(x); f32x4_t ax = vabsq(x);
xinv.f = ax; xinv.f = ax;
m = 0x3F800000 - (xinv.i & 0x7F800000); m = 0x3F800000 - (xinv.i & 0x7F800000);
xinv.i = xinv.i + m; xinv.i = xinv.i + m;
xinv.f = 1.41176471f - 0.47058824f * xinv.f; xinv.f = 1.41176471f - 0.47058824f * xinv.f;
xinv.i = xinv.i + m; xinv.i = xinv.i + m;
b = 2.0f - xinv.f * ax; b = 2.0f - xinv.f * ax;
xinv.f = xinv.f * b; xinv.f = xinv.f * b;
b = 2.0f - xinv.f * ax; b = 2.0f - xinv.f * ax;
xinv.f = xinv.f * b; xinv.f = xinv.f * b;
b = 2.0f - xinv.f * ax; b = 2.0f - xinv.f * ax;
xinv.f = xinv.f * b; xinv.f = xinv.f * b;
xinv.f = vdupq_m(xinv.f, INFINITY, vcmpeqq(x, 0.0f)); xinv.f = vdupq_m(xinv.f, INFINITY, vcmpeqq(x, 0.0f));
/* /*
* restore sign * restore sign
*/ */
xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq(x, 0.0f)); xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq(x, 0.0f));
return xinv.f; return xinv.f;
} }
/* fast inverse approximation (4x newton) */ /* fast inverse approximation (4x newton) */
__STATIC_INLINE f32x4_t vrecip_hiprec_f32( __STATIC_INLINE f32x4_t vrecip_hiprec_f32(
f32x4_t x) f32x4_t x)
{ {
q31x4_t m; q31x4_t m;
f32x4_t b; f32x4_t b;
any32x4_t xinv; any32x4_t xinv;
f32x4_t ax = vabsq(x); f32x4_t ax = vabsq(x);
xinv.f = ax; xinv.f = ax;
m = 0x3F800000 - (xinv.i & 0x7F800000); m = 0x3F800000 - (xinv.i & 0x7F800000);
xinv.i = xinv.i + m; xinv.i = xinv.i + m;
xinv.f = 1.41176471f - 0.47058824f * xinv.f; xinv.f = 1.41176471f - 0.47058824f * xinv.f;
xinv.i = xinv.i + m; xinv.i = xinv.i + m;
b = 2.0f - xinv.f * ax; b = 2.0f - xinv.f * ax;
xinv.f = xinv.f * b; xinv.f = xinv.f * b;
b = 2.0f - xinv.f * ax; b = 2.0f - xinv.f * ax;
xinv.f = xinv.f * b; xinv.f = xinv.f * b;
b = 2.0f - xinv.f * ax; b = 2.0f - xinv.f * ax;
xinv.f = xinv.f * b; xinv.f = xinv.f * b;
b = 2.0f - xinv.f * ax; b = 2.0f - xinv.f * ax;
xinv.f = xinv.f * b; xinv.f = xinv.f * b;
xinv.f = vdupq_m(xinv.f, INFINITY, vcmpeqq(x, 0.0f)); xinv.f = vdupq_m(xinv.f, INFINITY, vcmpeqq(x, 0.0f));
/* /*
* restore sign * restore sign
*/ */
xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq(x, 0.0f)); xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq(x, 0.0f));
return xinv.f; return xinv.f;
} }
__STATIC_INLINE f32x4_t vdiv_f32( __STATIC_INLINE f32x4_t vdiv_f32(
f32x4_t num, f32x4_t den) f32x4_t num, f32x4_t den)
{ {
return vmulq(num, vrecip_hiprec_f32(den)); return vmulq(num, vrecip_hiprec_f32(den));
} }
/** /**
@brief Single-precision taylor dev. @brief Single-precision taylor dev.
@param[in] x f32 quad vector input @param[in] x f32 quad vector input
@param[in] coeffs f32 quad vector coeffs @param[in] coeffs f32 quad vector coeffs
@return destination f32 quad vector @return destination f32 quad vector
*/ */
__STATIC_INLINE f32x4_t vtaylor_polyq_f32( __STATIC_INLINE f32x4_t vtaylor_polyq_f32(
f32x4_t x, f32x4_t x,
const float32_t * coeffs) const float32_t * coeffs)
{ {
f32x4_t A = vfmasq(vdupq_n_f32(coeffs[4]), x, coeffs[0]); f32x4_t A = vfmasq(vdupq_n_f32(coeffs[4]), x, coeffs[0]);
f32x4_t B = vfmasq(vdupq_n_f32(coeffs[6]), x, coeffs[2]); f32x4_t B = vfmasq(vdupq_n_f32(coeffs[6]), x, coeffs[2]);
f32x4_t C = vfmasq(vdupq_n_f32(coeffs[5]), x, coeffs[1]); f32x4_t C = vfmasq(vdupq_n_f32(coeffs[5]), x, coeffs[1]);
f32x4_t D = vfmasq(vdupq_n_f32(coeffs[7]), x, coeffs[3]); f32x4_t D = vfmasq(vdupq_n_f32(coeffs[7]), x, coeffs[3]);
f32x4_t x2 = vmulq(x, x); f32x4_t x2 = vmulq(x, x);
f32x4_t x4 = vmulq(x2, x2); f32x4_t x4 = vmulq(x2, x2);
f32x4_t res = vfmaq(vfmaq_f32(A, B, x2), vfmaq_f32(C, D, x2), x4); f32x4_t res = vfmaq(vfmaq_f32(A, B, x2), vfmaq_f32(C, D, x2), x4);
return res; return res;
} }
__STATIC_INLINE f32x4_t vmant_exp_f32( __STATIC_INLINE f32x4_t vmant_exp_f32(
f32x4_t x, f32x4_t x,
int32x4_t * e) int32x4_t * e)
{ {
any32x4_t r; any32x4_t r;
int32x4_t n; int32x4_t n;
r.f = x; r.f = x;
n = r.i >> 23; n = r.i >> 23;
n = n - 127; n = n - 127;
r.i = r.i - (n << 23); r.i = r.i - (n << 23);
*e = n; *e = n;
return r.f; return r.f;
} }
__STATIC_INLINE f32x4_t vlogq_f32(f32x4_t vecIn) __STATIC_INLINE f32x4_t vlogq_f32(f32x4_t vecIn)
{ {
q31x4_t vecExpUnBiased; q31x4_t vecExpUnBiased;
f32x4_t vecTmpFlt0, vecTmpFlt1; f32x4_t vecTmpFlt0, vecTmpFlt1;
f32x4_t vecAcc0, vecAcc1, vecAcc2, vecAcc3; f32x4_t vecAcc0, vecAcc1, vecAcc2, vecAcc3;
f32x4_t vecExpUnBiasedFlt; f32x4_t vecExpUnBiasedFlt;
/* /*
* extract exponent * extract exponent
*/ */
vecTmpFlt1 = vmant_exp_f32(vecIn, &vecExpUnBiased); vecTmpFlt1 = vmant_exp_f32(vecIn, &vecExpUnBiased);
vecTmpFlt0 = vecTmpFlt1 * vecTmpFlt1; vecTmpFlt0 = vecTmpFlt1 * vecTmpFlt1;
/* /*
* a = (__logf_lut_f32[4] * r.f) + (__logf_lut_f32[0]); * a = (__logf_lut_f32[4] * r.f) + (__logf_lut_f32[0]);
*/ */
vecAcc0 = vdupq_n_f32(__logf_lut_f32[0]); vecAcc0 = vdupq_n_f32(__logf_lut_f32[0]);
vecAcc0 = vfmaq(vecAcc0, vecTmpFlt1, __logf_lut_f32[4]); vecAcc0 = vfmaq(vecAcc0, vecTmpFlt1, __logf_lut_f32[4]);
/* /*
* b = (__logf_lut_f32[6] * r.f) + (__logf_lut_f32[2]); * b = (__logf_lut_f32[6] * r.f) + (__logf_lut_f32[2]);
*/ */
vecAcc1 = vdupq_n_f32(__logf_lut_f32[2]); vecAcc1 = vdupq_n_f32(__logf_lut_f32[2]);
vecAcc1 = vfmaq(vecAcc1, vecTmpFlt1, __logf_lut_f32[6]); vecAcc1 = vfmaq(vecAcc1, vecTmpFlt1, __logf_lut_f32[6]);
/* /*
* c = (__logf_lut_f32[5] * r.f) + (__logf_lut_f32[1]); * c = (__logf_lut_f32[5] * r.f) + (__logf_lut_f32[1]);
*/ */
vecAcc2 = vdupq_n_f32(__logf_lut_f32[1]); vecAcc2 = vdupq_n_f32(__logf_lut_f32[1]);
vecAcc2 = vfmaq(vecAcc2, vecTmpFlt1, __logf_lut_f32[5]); vecAcc2 = vfmaq(vecAcc2, vecTmpFlt1, __logf_lut_f32[5]);
/* /*
* d = (__logf_lut_f32[7] * r.f) + (__logf_lut_f32[3]); * d = (__logf_lut_f32[7] * r.f) + (__logf_lut_f32[3]);
*/ */
vecAcc3 = vdupq_n_f32(__logf_lut_f32[3]); vecAcc3 = vdupq_n_f32(__logf_lut_f32[3]);
vecAcc3 = vfmaq(vecAcc3, vecTmpFlt1, __logf_lut_f32[7]); vecAcc3 = vfmaq(vecAcc3, vecTmpFlt1, __logf_lut_f32[7]);
/* /*
* a = a + b * xx; * a = a + b * xx;
*/ */
vecAcc0 = vfmaq(vecAcc0, vecAcc1, vecTmpFlt0); vecAcc0 = vfmaq(vecAcc0, vecAcc1, vecTmpFlt0);
/* /*
* c = c + d * xx; * c = c + d * xx;
*/ */
vecAcc2 = vfmaq(vecAcc2, vecAcc3, vecTmpFlt0); vecAcc2 = vfmaq(vecAcc2, vecAcc3, vecTmpFlt0);
/* /*
* xx = xx * xx; * xx = xx * xx;
*/ */
vecTmpFlt0 = vecTmpFlt0 * vecTmpFlt0; vecTmpFlt0 = vecTmpFlt0 * vecTmpFlt0;
vecExpUnBiasedFlt = vcvtq_f32_s32(vecExpUnBiased); vecExpUnBiasedFlt = vcvtq_f32_s32(vecExpUnBiased);
/* /*
* r.f = a + c * xx; * r.f = a + c * xx;
*/ */
vecAcc0 = vfmaq(vecAcc0, vecAcc2, vecTmpFlt0); vecAcc0 = vfmaq(vecAcc0, vecAcc2, vecTmpFlt0);
/* /*
* add exponent * add exponent
* r.f = r.f + ((float32_t) m) * __logf_rng_f32; * r.f = r.f + ((float32_t) m) * __logf_rng_f32;
*/ */
vecAcc0 = vfmaq(vecAcc0, vecExpUnBiasedFlt, __logf_rng_f32); vecAcc0 = vfmaq(vecAcc0, vecExpUnBiasedFlt, __logf_rng_f32);
// set log0 down to -inf // set log0 down to -inf
vecAcc0 = vdupq_m(vecAcc0, -INFINITY, vcmpeqq(vecIn, 0.0f)); vecAcc0 = vdupq_m(vecAcc0, -INFINITY, vcmpeqq(vecIn, 0.0f));
return vecAcc0; return vecAcc0;
} }
__STATIC_INLINE f32x4_t vexpq_f32( __STATIC_INLINE f32x4_t vexpq_f32(
f32x4_t x) f32x4_t x)
{ {
// Perform range reduction [-log(2),log(2)] // Perform range reduction [-log(2),log(2)]
int32x4_t m = vcvtq_s32_f32(vmulq_n_f32(x, 1.4426950408f)); int32x4_t m = vcvtq_s32_f32(vmulq_n_f32(x, 1.4426950408f));
f32x4_t val = vfmsq_f32(x, vcvtq_f32_s32(m), vdupq_n_f32(0.6931471805f)); f32x4_t val = vfmsq_f32(x, vcvtq_f32_s32(m), vdupq_n_f32(0.6931471805f));
// Polynomial Approximation // Polynomial Approximation
f32x4_t poly = vtaylor_polyq_f32(val, exp_tab); f32x4_t poly = vtaylor_polyq_f32(val, exp_tab);
// Reconstruct // Reconstruct
poly = (f32x4_t) (vqaddq_s32((q31x4_t) (poly), vqshlq_n_s32(m, 23))); poly = (f32x4_t) (vqaddq_s32((q31x4_t) (poly), vqshlq_n_s32(m, 23)));
poly = vdupq_m(poly, 0.0f, vcmpltq_n_s32(m, -126)); poly = vdupq_m(poly, 0.0f, vcmpltq_n_s32(m, -126));
return poly; return poly;
} }
__STATIC_INLINE f32x4_t arm_vec_exponent_f32(f32x4_t x, int32_t nb) __STATIC_INLINE f32x4_t arm_vec_exponent_f32(f32x4_t x, int32_t nb)
{ {
f32x4_t r = x; f32x4_t r = x;
nb--; nb--;
while (nb > 0) { while (nb > 0) {
r = vmulq(r, x); r = vmulq(r, x);
nb--; nb--;
} }
return (r); return (r);
} }
__STATIC_INLINE f32x4_t vrecip_f32(f32x4_t vecIn) __STATIC_INLINE f32x4_t vrecip_f32(f32x4_t vecIn)
{ {
f32x4_t vecSx, vecW, vecTmp; f32x4_t vecSx, vecW, vecTmp;
any32x4_t v; any32x4_t v;
vecSx = vabsq(vecIn); vecSx = vabsq(vecIn);
v.f = vecIn; v.f = vecIn;
v.i = vsubq(vdupq_n_s32(INV_NEWTON_INIT_F32), v.i); v.i = vsubq(vdupq_n_s32(INV_NEWTON_INIT_F32), v.i);
vecW = vmulq(vecSx, v.f); vecW = vmulq(vecSx, v.f);
// v.f = v.f * (8 + w * (-28 + w * (56 + w * (-70 + w *(56 + w * (-28 + w * (8 - w))))))); // v.f = v.f * (8 + w * (-28 + w * (56 + w * (-70 + w *(56 + w * (-28 + w * (8 - w)))))));
vecTmp = vsubq(vdupq_n_f32(8.0f), vecW); vecTmp = vsubq(vdupq_n_f32(8.0f), vecW);
vecTmp = vfmasq(vecW, vecTmp, -28.0f); vecTmp = vfmasq(vecW, vecTmp, -28.0f);
vecTmp = vfmasq(vecW, vecTmp, 56.0f); vecTmp = vfmasq(vecW, vecTmp, 56.0f);
vecTmp = vfmasq(vecW, vecTmp, -70.0f); vecTmp = vfmasq(vecW, vecTmp, -70.0f);
vecTmp = vfmasq(vecW, vecTmp, 56.0f); vecTmp = vfmasq(vecW, vecTmp, 56.0f);
vecTmp = vfmasq(vecW, vecTmp, -28.0f); vecTmp = vfmasq(vecW, vecTmp, -28.0f);
vecTmp = vfmasq(vecW, vecTmp, 8.0f); vecTmp = vfmasq(vecW, vecTmp, 8.0f);
v.f = vmulq(v.f, vecTmp); v.f = vmulq(v.f, vecTmp);
v.f = vdupq_m(v.f, INFINITY, vcmpeqq(vecIn, 0.0f)); v.f = vdupq_m(v.f, INFINITY, vcmpeqq(vecIn, 0.0f));
/* /*
* restore sign * restore sign
*/ */
v.f = vnegq_m(v.f, v.f, vcmpltq(vecIn, 0.0f)); v.f = vnegq_m(v.f, v.f, vcmpltq(vecIn, 0.0f));
return v.f; return v.f;
} }
__STATIC_INLINE f32x4_t vtanhq_f32( __STATIC_INLINE f32x4_t vtanhq_f32(
f32x4_t val) f32x4_t val)
{ {
f32x4_t x = f32x4_t x =
vminnmq_f32(vmaxnmq_f32(val, vdupq_n_f32(-10.f)), vdupq_n_f32(10.0f)); vminnmq_f32(vmaxnmq_f32(val, vdupq_n_f32(-10.f)), vdupq_n_f32(10.0f));
f32x4_t exp2x = vexpq_f32(vmulq_n_f32(x, 2.f)); f32x4_t exp2x = vexpq_f32(vmulq_n_f32(x, 2.f));
f32x4_t num = vsubq_n_f32(exp2x, 1.f); f32x4_t num = vsubq_n_f32(exp2x, 1.f);
f32x4_t den = vaddq_n_f32(exp2x, 1.f); f32x4_t den = vaddq_n_f32(exp2x, 1.f);
f32x4_t tanh = vmulq_f32(num, vrecip_f32(den)); f32x4_t tanh = vmulq_f32(num, vrecip_f32(den));
return tanh; return tanh;
} }
__STATIC_INLINE f32x4_t vpowq_f32( __STATIC_INLINE f32x4_t vpowq_f32(
f32x4_t val, f32x4_t val,
f32x4_t n) f32x4_t n)
{ {
return vexpq_f32(vmulq_f32(n, vlogq_f32(val))); return vexpq_f32(vmulq_f32(n, vlogq_f32(val)));
} }
#endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)*/ #endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)*/
#if (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) #if (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
#endif /* (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) */ #endif /* (defined(ARM_MATH_MVEI) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) */
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) #if (defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "NEMath.h" #include "NEMath.h"
/** /**
* @brief Vectorized integer exponentiation * @brief Vectorized integer exponentiation
* @param[in] x value * @param[in] x value
* @param[in] nb integer exponent >= 1 * @param[in] nb integer exponent >= 1
* @return x^nb * @return x^nb
* *
*/ */
__STATIC_INLINE float32x4_t arm_vec_exponent_f32(float32x4_t x, int32_t nb) __STATIC_INLINE float32x4_t arm_vec_exponent_f32(float32x4_t x, int32_t nb)
{ {
float32x4_t r = x; float32x4_t r = x;
nb --; nb --;
while(nb > 0) while(nb > 0)
{ {
r = vmulq_f32(r , x); r = vmulq_f32(r , x);
nb--; nb--;
} }
return(r); return(r);
} }
__STATIC_INLINE float32x4_t __arm_vec_sqrt_f32_neon(float32x4_t x) __STATIC_INLINE float32x4_t __arm_vec_sqrt_f32_neon(float32x4_t x)
{ {
float32x4_t x1 = vmaxq_f32(x, vdupq_n_f32(FLT_MIN)); float32x4_t x1 = vmaxq_f32(x, vdupq_n_f32(FLT_MIN));
float32x4_t e = vrsqrteq_f32(x1); float32x4_t e = vrsqrteq_f32(x1);
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e); e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e);
e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e); e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e);
return vmulq_f32(x, e); return vmulq_f32(x, e);
} }
__STATIC_INLINE int16x8_t __arm_vec_sqrt_q15_neon(int16x8_t vec) __STATIC_INLINE int16x8_t __arm_vec_sqrt_q15_neon(int16x8_t vec)
{ {
float32x4_t tempF; float32x4_t tempF;
int32x4_t tempHI,tempLO; int32x4_t tempHI,tempLO;
tempLO = vmovl_s16(vget_low_s16(vec)); tempLO = vmovl_s16(vget_low_s16(vec));
tempF = vcvtq_n_f32_s32(tempLO,15); tempF = vcvtq_n_f32_s32(tempLO,15);
tempF = __arm_vec_sqrt_f32_neon(tempF); tempF = __arm_vec_sqrt_f32_neon(tempF);
tempLO = vcvtq_n_s32_f32(tempF,15); tempLO = vcvtq_n_s32_f32(tempF,15);
tempHI = vmovl_s16(vget_high_s16(vec)); tempHI = vmovl_s16(vget_high_s16(vec));
tempF = vcvtq_n_f32_s32(tempHI,15); tempF = vcvtq_n_f32_s32(tempHI,15);
tempF = __arm_vec_sqrt_f32_neon(tempF); tempF = __arm_vec_sqrt_f32_neon(tempF);
tempHI = vcvtq_n_s32_f32(tempF,15); tempHI = vcvtq_n_s32_f32(tempF,15);
return(vcombine_s16(vqmovn_s32(tempLO),vqmovn_s32(tempHI))); return(vcombine_s16(vqmovn_s32(tempLO),vqmovn_s32(tempHI)));
} }
__STATIC_INLINE int32x4_t __arm_vec_sqrt_q31_neon(int32x4_t vec) __STATIC_INLINE int32x4_t __arm_vec_sqrt_q31_neon(int32x4_t vec)
{ {
float32x4_t temp; float32x4_t temp;
temp = vcvtq_n_f32_s32(vec,31); temp = vcvtq_n_f32_s32(vec,31);
temp = __arm_vec_sqrt_f32_neon(temp); temp = __arm_vec_sqrt_f32_neon(temp);
return(vcvtq_n_s32_f32(temp,31)); return(vcvtq_n_s32_f32(temp,31));
} }
#endif /* (defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) */ #endif /* (defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) */
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /* _ARM_VEC_MATH_H */ #endif /* _ARM_VEC_MATH_H */
/** /**
* *
* End of file. * End of file.
*/ */

Loading…
Cancel
Save