From 169877344c3d93785c45f48c2f8b9389ca4228a5 Mon Sep 17 00:00:00 2001 From: ClaudioMartino Date: Tue, 7 Jan 2020 11:38:36 +0100 Subject: [PATCH] Corrected spline interpolation and merge sort --- Include/arm_math.h | 43 ++++++++-- PrivateInclude/arm_sorting.h | 12 --- Source/SupportFunctions/arm_merge_sort_f32.c | 27 +++++-- .../arm_merge_sort_init_f32.c | 35 +++++++++ Source/SupportFunctions/arm_sort_f32.c | 4 - .../SupportFunctions/arm_spline_interp_f32.c | 78 ++++++++----------- .../arm_spline_interp_init_f32.c | 13 ++-- Testing/Include/Tests/SupportTestsF32.h | 7 +- Testing/Source/Tests/SupportTestsF32.cpp | 33 ++++---- 9 files changed, 154 insertions(+), 98 deletions(-) create mode 100644 Source/SupportFunctions/arm_merge_sort_init_f32.c diff --git a/Include/arm_math.h b/Include/arm_math.h index 0cd2ee7d..2cd6375a 100644 --- a/Include/arm_math.h +++ b/Include/arm_math.h @@ -2264,11 +2264,9 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den) /**< Heap sort */ ARM_SORT_INSERTION = 3, /**< Insertion sort */ - ARM_SORT_MERGE = 4, - /**< Merge sort */ - ARM_SORT_QUICK = 5, + ARM_SORT_QUICK = 4, /**< Quick sort */ - ARM_SORT_SELECTION = 6 + ARM_SORT_SELECTION = 5 /**< Selection sort */ } arm_sort_alg; @@ -2314,6 +2312,37 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den) arm_sort_alg alg, arm_sort_dir dir); + /** + * @brief Instance structure for the sorting algorithms. + */ + typedef struct + { + arm_sort_dir dir; /**< Sorting order (direction) */ + float32_t * buffer; /**< Working buffer */ + } arm_merge_sort_instance_f32; + + /** + * @param[in] S points to an instance of the sorting structure. + * @param[in,out] pSrc points to the block of input data. + * @param[out] pDst points to the block of output data + * @param[in] blockSize number of samples to process. + */ + void arm_merge_sort_f32( + const arm_merge_sort_instance_f32 * S, + float32_t *pSrc, + float32_t *pDst, + uint32_t blockSize); + + /** + * @param[in,out] S points to an instance of the sorting structure. + * @param[in] dir Sorting order. + * @param[in] buffer Working buffer. + */ + void arm_merge_sort_init_f32( + arm_merge_sort_instance_f32 * S, + arm_sort_dir dir, + float32_t * buffer); + /** * @brief Struct for specifying cubic spline type */ @@ -2330,6 +2359,7 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den) { uint32_t n_x; /**< Number of known data points */ arm_spline_type type; /**< Type (boundary conditions) */ + float32_t * buffer; } arm_spline_instance_f32; /** @@ -2347,7 +2377,7 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den) const float32_t * y, const float32_t * xq, float32_t * pDst, - uint32_t blockSize); + uint32_t blockSize); /** * @brief Initialization function for the floating-point cubic spline interpolation. @@ -2358,7 +2388,8 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den) void arm_spline_init_f32( arm_spline_instance_f32 * S, uint32_t n, - arm_spline_type type); + arm_spline_type type, + float32_t * buffer); /** * @brief Instance structure for the floating-point matrix structure. diff --git a/PrivateInclude/arm_sorting.h b/PrivateInclude/arm_sorting.h index 4a405402..ec002b2f 100644 --- a/PrivateInclude/arm_sorting.h +++ b/PrivateInclude/arm_sorting.h @@ -68,18 +68,6 @@ extern "C" float32_t* pDst, uint32_t blockSize); - /** - * @param[in] S points to an instance of the sorting structure. - * @param[in] pSrc points to the block of input data. - * @param[out] pDst points to the block of output data - * @param[in] blockSize number of samples to process. - */ - void arm_merge_sort_f32( - const arm_sort_instance_f32 * S, - float32_t *pSrc, - float32_t *pDst, - uint32_t blockSize); - /** * @param[in] S points to an instance of the sorting structure. * @param[in] pSrc points to the block of input data. diff --git a/Source/SupportFunctions/arm_merge_sort_f32.c b/Source/SupportFunctions/arm_merge_sort_f32.c index 7e3180b8..2e026a39 100644 --- a/Source/SupportFunctions/arm_merge_sort_f32.c +++ b/Source/SupportFunctions/arm_merge_sort_f32.c @@ -92,24 +92,35 @@ static void arm_merge_sort_core_f32(float32_t * pB, uint32_t begin, uint32_t end * divide the input array in sublists and merge them to produce * longer sorted sublists until there is only one list remaining. * - * @par A work array is always needed, hence pSrc and pDst cannot be - * equal and the results will be stored in pDst. + * @par A work array is always needed. It must be allocated by the user + * linked to the instance at initialization time. + * + * @par It's an in-place algorithm. In order to obtain an out-of-place + * function, a memcpy of the source vector is performed */ void arm_merge_sort_f32( - const arm_sort_instance_f32 * S, + const arm_merge_sort_instance_f32 * S, float32_t *pSrc, float32_t *pDst, uint32_t blockSize) { - // A work array is needed: pDst - if(pSrc != pDst) // out-of-place only - { - memcpy(pDst, pSrc, blockSize*sizeof(float32_t)); + float32_t * pA; - arm_merge_sort_core_f32(pSrc, 0, blockSize, pDst, S->dir); + /* Out-of-place */ + if(pSrc != pDst) + { + memcpy(pDst, pSrc, blockSize*sizeof(float32_t)); + pA = pDst; } + else + pA = pSrc; + + /* A working buffer is needed */ + memcpy(S->buffer, pSrc, blockSize*sizeof(float32_t)); + + arm_merge_sort_core_f32(S->buffer, 0, blockSize, pA, S->dir); } /** @} end of Sorting group diff --git a/Source/SupportFunctions/arm_merge_sort_init_f32.c b/Source/SupportFunctions/arm_merge_sort_init_f32.c new file mode 100644 index 00000000..3a2ff07c --- /dev/null +++ b/Source/SupportFunctions/arm_merge_sort_init_f32.c @@ -0,0 +1,35 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_merge_sort_init_f32.c + * Description: Floating point merge sort initialization function + * + * $Date: 2019 + * $Revision: V1.6.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +void arm_merge_sort_init_f32(arm_merge_sort_instance_f32 * S, arm_sort_dir dir, float32_t * buffer) +{ + S->dir = dir; + S->buffer = buffer; +} diff --git a/Source/SupportFunctions/arm_sort_f32.c b/Source/SupportFunctions/arm_sort_f32.c index fea7aad4..bb1cf7e4 100644 --- a/Source/SupportFunctions/arm_sort_f32.c +++ b/Source/SupportFunctions/arm_sort_f32.c @@ -66,10 +66,6 @@ void arm_sort_f32( arm_insertion_sort_f32(S, pSrc, pDst, blockSize); break; - case ARM_SORT_MERGE: - arm_merge_sort_f32(S, pSrc, pDst, blockSize); - break; - case ARM_SORT_QUICK: arm_quick_sort_f32(S, pSrc, pDst, blockSize); break; diff --git a/Source/SupportFunctions/arm_spline_interp_f32.c b/Source/SupportFunctions/arm_spline_interp_f32.c index e3a8a375..2954b570 100644 --- a/Source/SupportFunctions/arm_spline_interp_f32.c +++ b/Source/SupportFunctions/arm_spline_interp_f32.c @@ -163,28 +163,16 @@ void arm_spline_f32( const float32_t * y, const float32_t * xq, float32_t * pDst, - uint32_t blockSize) + uint32_t blockSize) { - - /* - - As explained in arm_spline_interp_init_f32.c, this must be > 1 - - */ int32_t n = S->n_x; arm_spline_type type = S->type; float32_t hi, hm1; - - /* - - Temporary variables for system AX=B. - This will be replaced by new arguments for the function. - - */ - float32_t u[MAX_DATA_POINTS], z[MAX_DATA_POINTS], c[MAX_DATA_POINTS]; + float32_t * u = (S->buffer); /* (n-1)-long buffer for u elements */ + float32_t * z = (S->buffer)+(n-1); /* n-long buffer for z elements */ + float32_t * c = (S->buffer)+(n+n-1); /* n-long buffer for c elements */ float32_t Bi, li; - float32_t bi, di; float32_t x_sc; @@ -231,7 +219,7 @@ void arm_spline_f32( hi = x[i+1]-x[i]; Bi = 3*(y[i+1]-y[i])/hi - 3*(y[i]-y[i-1])/hm1; /* l(i) = a(ii)-a(i,i-1)*u(i-1) = 2[h(i-1)+h(i)]-h(i-1)*u(i-1) */ - li = 2*(hi+hm1) - hm1*u[i-1]; + li = 2*(hi+hm1) - hm1*u[i-1]; /* u(i) = a(i,i+1)/l(i) = h(i)/l(i) */ u[i] = hi/li; /* z(i) = [B(i)-h(i-1)*z(i-1)]/l(i) */ @@ -264,12 +252,12 @@ void arm_spline_f32( /* === Compute b(i) and d(i) from c(i) and create output for x(i) 4 ) - { - /* Load [xq(k) xq(k+1) xq(k+2) xq(k+3)] */ + { + /* Load [xq(k) xq(k+1) xq(k+2) xq(k+3)] */ xqv = vld1q_f32(pXq); - pXq+=4; - - /* Compute [xq(k)-x(i) xq(k+1)-x(i) xq(k+2)-x(i) xq(k+3)-x(i)] */ + pXq+=4; + + /* Compute [xq(k)-x(i) xq(k+1)-x(i) xq(k+2)-x(i) xq(k+3)-x(i)] */ diff = vsubq_f32(xqv, xiv); - temp = diff; - - /* y(i) = a(i) + ... */ + temp = diff; + + /* y(i) = a(i) + ... */ yv = aiv; - /* ... + b(i)*(x-x(i)) + ... */ - yv = vmlaq_f32(yv, biv, temp); - /* ... + c(i)*(x-x(i))^2 + ... */ - temp = vmulq_f32(temp, diff); - yv = vmlaq_f32(yv, civ, temp); + /* ... + b(i)*(x-x(i)) + ... */ + yv = vmlaq_f32(yv, biv, temp); + /* ... + c(i)*(x-x(i))^2 + ... */ + temp = vmulq_f32(temp, diff); + yv = vmlaq_f32(yv, civ, temp); /* ... + d(i)*(x-x(i))^3 */ - temp = vmulq_f32(temp, diff); - yv = vmlaq_f32(yv, div, temp); - + temp = vmulq_f32(temp, diff); + yv = vmlaq_f32(yv, div, temp); + /* Store [y(k) y(k+1) y(k+2) y(k+3)] */ - vst1q_f32(pDst, yv); - pDst+=4; - - blkCnt-=4; - } + vst1q_f32(pDst, yv); + pDst+=4; + + blkCnt-=4; + } #endif - while( *pXq <= x[i+1] && blkCnt > 0 ) - { - x_sc = *pXq++; + while( *pXq <= x[i+1] && blkCnt > 0 ) + { + x_sc = *pXq++; *pDst = y[i]+bi*(x_sc-x[i])+c[i]*(x_sc-x[i])*(x_sc-x[i])+di*(x_sc-x[i])*(x_sc-x[i])*(x_sc-x[i]); pDst++; - blkCnt--; - } + blkCnt--; + } } /* == Create output for remaining samples (x>=x(n)) == */ diff --git a/Source/SupportFunctions/arm_spline_interp_init_f32.c b/Source/SupportFunctions/arm_spline_interp_init_f32.c index e014f201..d5065d92 100644 --- a/Source/SupportFunctions/arm_spline_interp_init_f32.c +++ b/Source/SupportFunctions/arm_spline_interp_init_f32.c @@ -47,15 +47,16 @@ void arm_spline_init_f32( arm_spline_instance_f32 * S, uint32_t n, - arm_spline_type type) + arm_spline_type type, + float32_t * buffer) { - S->n_x = n; - S->type = type; - + S->n_x = n; + S->type = type; /* Type (boundary conditions): - * 0: Natural spline ( S1''(x1)=0 ; Sn''(xn)=0 ) - * 1: Parabolic runout spline ( S1''(x1)=S2''(x2) ; Sn-1''(xn-1)=Sn''(xn) ) + * - Natural spline ( S1''(x1)=0 ; Sn''(xn)=0 ) + * - Parabolic runout spline ( S1''(x1)=S2''(x2) ; Sn-1''(xn-1)=Sn''(xn) ) */ + S->buffer = buffer; } /** diff --git a/Testing/Include/Tests/SupportTestsF32.h b/Testing/Include/Tests/SupportTestsF32.h index 2830f02f..0fe693cd 100755 --- a/Testing/Include/Tests/SupportTestsF32.h +++ b/Testing/Include/Tests/SupportTestsF32.h @@ -12,9 +12,10 @@ class SupportTestsF32:public Client::Suite Client::Pattern input; Client::Pattern coefs; - Client::Pattern inputX; - Client::Pattern inputY; - Client::Pattern outputX; + Client::Pattern inputX; + Client::Pattern inputY; + Client::Pattern outputX; + Client::Pattern buffer; Client::LocalPattern output; Client::LocalPattern outputQ15; diff --git a/Testing/Source/Tests/SupportTestsF32.cpp b/Testing/Source/Tests/SupportTestsF32.cpp index 58468372..8f9ea251 100755 --- a/Testing/Source/Tests/SupportTestsF32.cpp +++ b/Testing/Source/Tests/SupportTestsF32.cpp @@ -297,11 +297,12 @@ { float32_t *inp = input.ptr(); float32_t *outp = output.ptr(); - arm_sort_instance_f32 S; - - arm_sort_init_f32(&S, ARM_SORT_MERGE, ARM_SORT_ASCENDING); + float32_t *buf = buffer.ptr(); + buf = (float32_t *)malloc((this->nbSamples)*sizeof(float32_t) ); + arm_merge_sort_instance_f32 S; - arm_sort_f32(&S,inp,outp,this->nbSamples); + arm_merge_sort_init_f32(&S, ARM_SORT_ASCENDING, buf); + arm_merge_sort_f32(&S,inp,outp,this->nbSamples); ASSERT_EMPTY_TAIL(output); @@ -313,11 +314,12 @@ { float32_t *inp = input.ptr(); float32_t *outp = output.ptr(); - arm_sort_instance_f32 S; - - arm_sort_init_f32(&S, ARM_SORT_MERGE, ARM_SORT_ASCENDING); + float32_t *buf = buffer.ptr(); + buf = (float32_t *)malloc((this->nbSamples)*sizeof(float32_t) ); + arm_merge_sort_instance_f32 S; - arm_sort_f32(&S,inp,outp,this->nbSamples); + arm_merge_sort_init_f32(&S, ARM_SORT_ASCENDING, buf); + arm_merge_sort_f32(&S,inp,outp,this->nbSamples); ASSERT_EMPTY_TAIL(output); @@ -424,10 +426,11 @@ const float32_t *inpY = inputY.ptr(); const float32_t *outX = outputX.ptr(); float32_t *outp = output.ptr(); - + float32_t *buf = buffer.ptr(); + buf=(float32_t*)malloc((3*4-1)*sizeof(float32_t)); arm_spline_instance_f32 S; - arm_spline_init_f32(&S, 4, ARM_SPLINE_PARABOLIC_RUNOUT); + arm_spline_init_f32(&S, 4, ARM_SPLINE_PARABOLIC_RUNOUT, buf); arm_spline_f32(&S, inpX, inpY, outX, outp, 20); ASSERT_EMPTY_TAIL(output); @@ -440,10 +443,11 @@ const float32_t *inpY = inputY.ptr(); const float32_t *outX = outputX.ptr(); float32_t *outp = output.ptr(); - + float32_t *buf = buffer.ptr(); + buf=(float32_t*)malloc((3*9-1)*sizeof(float32_t)); arm_spline_instance_f32 S; - arm_spline_init_f32(&S, 9, ARM_SPLINE_NATURAL); + arm_spline_init_f32(&S, 9, ARM_SPLINE_NATURAL, buf); arm_spline_f32(&S, inpX, inpY, outX, outp, 33); ASSERT_EMPTY_TAIL(output); @@ -456,10 +460,11 @@ const float32_t *inpY = inputY.ptr(); const float32_t *outX = outputX.ptr(); float32_t *outp = output.ptr(); - + float32_t *buf = buffer.ptr(); + buf=(float32_t*)malloc((3*3-1)*sizeof(float32_t)); arm_spline_instance_f32 S; - arm_spline_init_f32(&S, 3, ARM_SPLINE_PARABOLIC_RUNOUT); + arm_spline_init_f32(&S, 3, ARM_SPLINE_PARABOLIC_RUNOUT, buf); arm_spline_f32(&S, inpX, inpY, outX, outp, 30); ASSERT_EMPTY_TAIL(output);