Corrected spline interpolation and merge sort

pull/19/head
ClaudioMartino 6 years ago committed by Christophe Favergeon
parent 76153d838d
commit 169877344c

@ -2264,11 +2264,9 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den)
/**< Heap sort */ /**< Heap sort */
ARM_SORT_INSERTION = 3, ARM_SORT_INSERTION = 3,
/**< Insertion sort */ /**< Insertion sort */
ARM_SORT_MERGE = 4, ARM_SORT_QUICK = 4,
/**< Merge sort */
ARM_SORT_QUICK = 5,
/**< Quick sort */ /**< Quick sort */
ARM_SORT_SELECTION = 6 ARM_SORT_SELECTION = 5
/**< Selection sort */ /**< Selection sort */
} arm_sort_alg; } arm_sort_alg;
@ -2314,6 +2312,37 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den)
arm_sort_alg alg, arm_sort_alg alg,
arm_sort_dir dir); arm_sort_dir dir);
/**
* @brief Instance structure for the sorting algorithms.
*/
typedef struct
{
arm_sort_dir dir; /**< Sorting order (direction) */
float32_t * buffer; /**< Working buffer */
} arm_merge_sort_instance_f32;
/**
* @param[in] S points to an instance of the sorting structure.
* @param[in,out] pSrc points to the block of input data.
* @param[out] pDst points to the block of output data
* @param[in] blockSize number of samples to process.
*/
void arm_merge_sort_f32(
const arm_merge_sort_instance_f32 * S,
float32_t *pSrc,
float32_t *pDst,
uint32_t blockSize);
/**
* @param[in,out] S points to an instance of the sorting structure.
* @param[in] dir Sorting order.
* @param[in] buffer Working buffer.
*/
void arm_merge_sort_init_f32(
arm_merge_sort_instance_f32 * S,
arm_sort_dir dir,
float32_t * buffer);
/** /**
* @brief Struct for specifying cubic spline type * @brief Struct for specifying cubic spline type
*/ */
@ -2330,6 +2359,7 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den)
{ {
uint32_t n_x; /**< Number of known data points */ uint32_t n_x; /**< Number of known data points */
arm_spline_type type; /**< Type (boundary conditions) */ arm_spline_type type; /**< Type (boundary conditions) */
float32_t * buffer;
} arm_spline_instance_f32; } arm_spline_instance_f32;
/** /**
@ -2347,7 +2377,7 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den)
const float32_t * y, const float32_t * y,
const float32_t * xq, const float32_t * xq,
float32_t * pDst, float32_t * pDst,
uint32_t blockSize); uint32_t blockSize);
/** /**
* @brief Initialization function for the floating-point cubic spline interpolation. * @brief Initialization function for the floating-point cubic spline interpolation.
@ -2358,7 +2388,8 @@ __STATIC_INLINE q31_t arm_div_q63_to_q31(q63_t num, q31_t den)
void arm_spline_init_f32( void arm_spline_init_f32(
arm_spline_instance_f32 * S, arm_spline_instance_f32 * S,
uint32_t n, uint32_t n,
arm_spline_type type); arm_spline_type type,
float32_t * buffer);
/** /**
* @brief Instance structure for the floating-point matrix structure. * @brief Instance structure for the floating-point matrix structure.

@ -68,18 +68,6 @@ extern "C"
float32_t* pDst, float32_t* pDst,
uint32_t blockSize); uint32_t blockSize);
/**
* @param[in] S points to an instance of the sorting structure.
* @param[in] pSrc points to the block of input data.
* @param[out] pDst points to the block of output data
* @param[in] blockSize number of samples to process.
*/
void arm_merge_sort_f32(
const arm_sort_instance_f32 * S,
float32_t *pSrc,
float32_t *pDst,
uint32_t blockSize);
/** /**
* @param[in] S points to an instance of the sorting structure. * @param[in] S points to an instance of the sorting structure.
* @param[in] pSrc points to the block of input data. * @param[in] pSrc points to the block of input data.

@ -92,24 +92,35 @@ static void arm_merge_sort_core_f32(float32_t * pB, uint32_t begin, uint32_t end
* divide the input array in sublists and merge them to produce * divide the input array in sublists and merge them to produce
* longer sorted sublists until there is only one list remaining. * longer sorted sublists until there is only one list remaining.
* *
* @par A work array is always needed, hence pSrc and pDst cannot be * @par A work array is always needed. It must be allocated by the user
* equal and the results will be stored in pDst. * linked to the instance at initialization time.
*
* @par It's an in-place algorithm. In order to obtain an out-of-place
* function, a memcpy of the source vector is performed
*/ */
void arm_merge_sort_f32( void arm_merge_sort_f32(
const arm_sort_instance_f32 * S, const arm_merge_sort_instance_f32 * S,
float32_t *pSrc, float32_t *pSrc,
float32_t *pDst, float32_t *pDst,
uint32_t blockSize) uint32_t blockSize)
{ {
// A work array is needed: pDst float32_t * pA;
if(pSrc != pDst) // out-of-place only
{
memcpy(pDst, pSrc, blockSize*sizeof(float32_t));
arm_merge_sort_core_f32(pSrc, 0, blockSize, pDst, S->dir); /* Out-of-place */
if(pSrc != pDst)
{
memcpy(pDst, pSrc, blockSize*sizeof(float32_t));
pA = pDst;
} }
else
pA = pSrc;
/* A working buffer is needed */
memcpy(S->buffer, pSrc, blockSize*sizeof(float32_t));
arm_merge_sort_core_f32(S->buffer, 0, blockSize, pA, S->dir);
} }
/** /**
@} end of Sorting group @} end of Sorting group

@ -0,0 +1,35 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_merge_sort_init_f32.c
* Description: Floating point merge sort initialization function
*
* $Date: 2019
* $Revision: V1.6.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
void arm_merge_sort_init_f32(arm_merge_sort_instance_f32 * S, arm_sort_dir dir, float32_t * buffer)
{
S->dir = dir;
S->buffer = buffer;
}

@ -66,10 +66,6 @@ void arm_sort_f32(
arm_insertion_sort_f32(S, pSrc, pDst, blockSize); arm_insertion_sort_f32(S, pSrc, pDst, blockSize);
break; break;
case ARM_SORT_MERGE:
arm_merge_sort_f32(S, pSrc, pDst, blockSize);
break;
case ARM_SORT_QUICK: case ARM_SORT_QUICK:
arm_quick_sort_f32(S, pSrc, pDst, blockSize); arm_quick_sort_f32(S, pSrc, pDst, blockSize);
break; break;

@ -163,28 +163,16 @@ void arm_spline_f32(
const float32_t * y, const float32_t * y,
const float32_t * xq, const float32_t * xq,
float32_t * pDst, float32_t * pDst,
uint32_t blockSize) uint32_t blockSize)
{ {
/*
As explained in arm_spline_interp_init_f32.c, this must be > 1
*/
int32_t n = S->n_x; int32_t n = S->n_x;
arm_spline_type type = S->type; arm_spline_type type = S->type;
float32_t hi, hm1; float32_t hi, hm1;
float32_t * u = (S->buffer); /* (n-1)-long buffer for u elements */
/* float32_t * z = (S->buffer)+(n-1); /* n-long buffer for z elements */
float32_t * c = (S->buffer)+(n+n-1); /* n-long buffer for c elements */
Temporary variables for system AX=B.
This will be replaced by new arguments for the function.
*/
float32_t u[MAX_DATA_POINTS], z[MAX_DATA_POINTS], c[MAX_DATA_POINTS];
float32_t Bi, li; float32_t Bi, li;
float32_t bi, di; float32_t bi, di;
float32_t x_sc; float32_t x_sc;
@ -231,7 +219,7 @@ void arm_spline_f32(
hi = x[i+1]-x[i]; hi = x[i+1]-x[i];
Bi = 3*(y[i+1]-y[i])/hi - 3*(y[i]-y[i-1])/hm1; Bi = 3*(y[i+1]-y[i])/hi - 3*(y[i]-y[i-1])/hm1;
/* l(i) = a(ii)-a(i,i-1)*u(i-1) = 2[h(i-1)+h(i)]-h(i-1)*u(i-1) */ /* l(i) = a(ii)-a(i,i-1)*u(i-1) = 2[h(i-1)+h(i)]-h(i-1)*u(i-1) */
li = 2*(hi+hm1) - hm1*u[i-1]; li = 2*(hi+hm1) - hm1*u[i-1];
/* u(i) = a(i,i+1)/l(i) = h(i)/l(i) */ /* u(i) = a(i,i+1)/l(i) = h(i)/l(i) */
u[i] = hi/li; u[i] = hi/li;
/* z(i) = [B(i)-h(i-1)*z(i-1)]/l(i) */ /* z(i) = [B(i)-h(i-1)*z(i-1)]/l(i) */
@ -264,12 +252,12 @@ void arm_spline_f32(
/* === Compute b(i) and d(i) from c(i) and create output for x(i)<x<x(i+1) === */ /* === Compute b(i) and d(i) from c(i) and create output for x(i)<x<x(i+1) === */
for (i=0; i<n-1; i++) for (i=0; i<n-1; i++)
{ {
hi = x[i+1]-x[i]; hi = x[i+1]-x[i];
bi = (y[i+1]-y[i])/hi-hi*(c[i+1]+2*c[i])/3; bi = (y[i+1]-y[i])/hi-hi*(c[i+1]+2*c[i])/3;
di = (c[i+1]-c[i])/(3*hi); di = (c[i+1]-c[i])/(3*hi);
#ifdef ARM_MATH_NEON #ifdef ARM_MATH_NEON
xiv = vdupq_n_f32(x[i]); xiv = vdupq_n_f32(x[i]);
aiv = vdupq_n_f32(y[i]); aiv = vdupq_n_f32(y[i]);
biv = vdupq_n_f32(bi); biv = vdupq_n_f32(bi);
@ -277,42 +265,42 @@ void arm_spline_f32(
div = vdupq_n_f32(di); div = vdupq_n_f32(di);
while( *(pXq+4) <= x[i+1] && blkCnt > 4 ) while( *(pXq+4) <= x[i+1] && blkCnt > 4 )
{ {
/* Load [xq(k) xq(k+1) xq(k+2) xq(k+3)] */ /* Load [xq(k) xq(k+1) xq(k+2) xq(k+3)] */
xqv = vld1q_f32(pXq); xqv = vld1q_f32(pXq);
pXq+=4; pXq+=4;
/* Compute [xq(k)-x(i) xq(k+1)-x(i) xq(k+2)-x(i) xq(k+3)-x(i)] */ /* Compute [xq(k)-x(i) xq(k+1)-x(i) xq(k+2)-x(i) xq(k+3)-x(i)] */
diff = vsubq_f32(xqv, xiv); diff = vsubq_f32(xqv, xiv);
temp = diff; temp = diff;
/* y(i) = a(i) + ... */ /* y(i) = a(i) + ... */
yv = aiv; yv = aiv;
/* ... + b(i)*(x-x(i)) + ... */ /* ... + b(i)*(x-x(i)) + ... */
yv = vmlaq_f32(yv, biv, temp); yv = vmlaq_f32(yv, biv, temp);
/* ... + c(i)*(x-x(i))^2 + ... */ /* ... + c(i)*(x-x(i))^2 + ... */
temp = vmulq_f32(temp, diff); temp = vmulq_f32(temp, diff);
yv = vmlaq_f32(yv, civ, temp); yv = vmlaq_f32(yv, civ, temp);
/* ... + d(i)*(x-x(i))^3 */ /* ... + d(i)*(x-x(i))^3 */
temp = vmulq_f32(temp, diff); temp = vmulq_f32(temp, diff);
yv = vmlaq_f32(yv, div, temp); yv = vmlaq_f32(yv, div, temp);
/* Store [y(k) y(k+1) y(k+2) y(k+3)] */ /* Store [y(k) y(k+1) y(k+2) y(k+3)] */
vst1q_f32(pDst, yv); vst1q_f32(pDst, yv);
pDst+=4; pDst+=4;
blkCnt-=4; blkCnt-=4;
} }
#endif #endif
while( *pXq <= x[i+1] && blkCnt > 0 ) while( *pXq <= x[i+1] && blkCnt > 0 )
{ {
x_sc = *pXq++; x_sc = *pXq++;
*pDst = y[i]+bi*(x_sc-x[i])+c[i]*(x_sc-x[i])*(x_sc-x[i])+di*(x_sc-x[i])*(x_sc-x[i])*(x_sc-x[i]); *pDst = y[i]+bi*(x_sc-x[i])+c[i]*(x_sc-x[i])*(x_sc-x[i])+di*(x_sc-x[i])*(x_sc-x[i])*(x_sc-x[i]);
pDst++; pDst++;
blkCnt--; blkCnt--;
} }
} }
/* == Create output for remaining samples (x>=x(n)) == */ /* == Create output for remaining samples (x>=x(n)) == */

@ -47,15 +47,16 @@
void arm_spline_init_f32( void arm_spline_init_f32(
arm_spline_instance_f32 * S, arm_spline_instance_f32 * S,
uint32_t n, uint32_t n,
arm_spline_type type) arm_spline_type type,
float32_t * buffer)
{ {
S->n_x = n; S->n_x = n;
S->type = type; S->type = type;
/* Type (boundary conditions): /* Type (boundary conditions):
* 0: Natural spline ( S1''(x1)=0 ; Sn''(xn)=0 ) * - Natural spline ( S1''(x1)=0 ; Sn''(xn)=0 )
* 1: Parabolic runout spline ( S1''(x1)=S2''(x2) ; Sn-1''(xn-1)=Sn''(xn) ) * - Parabolic runout spline ( S1''(x1)=S2''(x2) ; Sn-1''(xn-1)=Sn''(xn) )
*/ */
S->buffer = buffer;
} }
/** /**

@ -12,9 +12,10 @@ class SupportTestsF32:public Client::Suite
Client::Pattern<float32_t> input; Client::Pattern<float32_t> input;
Client::Pattern<float32_t> coefs; Client::Pattern<float32_t> coefs;
Client::Pattern<float32_t> inputX; Client::Pattern<float32_t> inputX;
Client::Pattern<float32_t> inputY; Client::Pattern<float32_t> inputY;
Client::Pattern<float32_t> outputX; Client::Pattern<float32_t> outputX;
Client::Pattern<float32_t> buffer;
Client::LocalPattern<float32_t> output; Client::LocalPattern<float32_t> output;
Client::LocalPattern<q15_t> outputQ15; Client::LocalPattern<q15_t> outputQ15;

@ -297,11 +297,12 @@
{ {
float32_t *inp = input.ptr(); float32_t *inp = input.ptr();
float32_t *outp = output.ptr(); float32_t *outp = output.ptr();
arm_sort_instance_f32 S; float32_t *buf = buffer.ptr();
buf = (float32_t *)malloc((this->nbSamples)*sizeof(float32_t) );
arm_sort_init_f32(&S, ARM_SORT_MERGE, ARM_SORT_ASCENDING); arm_merge_sort_instance_f32 S;
arm_sort_f32(&S,inp,outp,this->nbSamples); arm_merge_sort_init_f32(&S, ARM_SORT_ASCENDING, buf);
arm_merge_sort_f32(&S,inp,outp,this->nbSamples);
ASSERT_EMPTY_TAIL(output); ASSERT_EMPTY_TAIL(output);
@ -313,11 +314,12 @@
{ {
float32_t *inp = input.ptr(); float32_t *inp = input.ptr();
float32_t *outp = output.ptr(); float32_t *outp = output.ptr();
arm_sort_instance_f32 S; float32_t *buf = buffer.ptr();
buf = (float32_t *)malloc((this->nbSamples)*sizeof(float32_t) );
arm_merge_sort_instance_f32 S;
arm_sort_init_f32(&S, ARM_SORT_MERGE, ARM_SORT_ASCENDING); arm_merge_sort_init_f32(&S, ARM_SORT_ASCENDING, buf);
arm_merge_sort_f32(&S,inp,outp,this->nbSamples);
arm_sort_f32(&S,inp,outp,this->nbSamples);
ASSERT_EMPTY_TAIL(output); ASSERT_EMPTY_TAIL(output);
@ -424,10 +426,11 @@
const float32_t *inpY = inputY.ptr(); const float32_t *inpY = inputY.ptr();
const float32_t *outX = outputX.ptr(); const float32_t *outX = outputX.ptr();
float32_t *outp = output.ptr(); float32_t *outp = output.ptr();
float32_t *buf = buffer.ptr();
buf=(float32_t*)malloc((3*4-1)*sizeof(float32_t));
arm_spline_instance_f32 S; arm_spline_instance_f32 S;
arm_spline_init_f32(&S, 4, ARM_SPLINE_PARABOLIC_RUNOUT); arm_spline_init_f32(&S, 4, ARM_SPLINE_PARABOLIC_RUNOUT, buf);
arm_spline_f32(&S, inpX, inpY, outX, outp, 20); arm_spline_f32(&S, inpX, inpY, outX, outp, 20);
ASSERT_EMPTY_TAIL(output); ASSERT_EMPTY_TAIL(output);
@ -440,10 +443,11 @@
const float32_t *inpY = inputY.ptr(); const float32_t *inpY = inputY.ptr();
const float32_t *outX = outputX.ptr(); const float32_t *outX = outputX.ptr();
float32_t *outp = output.ptr(); float32_t *outp = output.ptr();
float32_t *buf = buffer.ptr();
buf=(float32_t*)malloc((3*9-1)*sizeof(float32_t));
arm_spline_instance_f32 S; arm_spline_instance_f32 S;
arm_spline_init_f32(&S, 9, ARM_SPLINE_NATURAL); arm_spline_init_f32(&S, 9, ARM_SPLINE_NATURAL, buf);
arm_spline_f32(&S, inpX, inpY, outX, outp, 33); arm_spline_f32(&S, inpX, inpY, outX, outp, 33);
ASSERT_EMPTY_TAIL(output); ASSERT_EMPTY_TAIL(output);
@ -456,10 +460,11 @@
const float32_t *inpY = inputY.ptr(); const float32_t *inpY = inputY.ptr();
const float32_t *outX = outputX.ptr(); const float32_t *outX = outputX.ptr();
float32_t *outp = output.ptr(); float32_t *outp = output.ptr();
float32_t *buf = buffer.ptr();
buf=(float32_t*)malloc((3*3-1)*sizeof(float32_t));
arm_spline_instance_f32 S; arm_spline_instance_f32 S;
arm_spline_init_f32(&S, 3, ARM_SPLINE_PARABOLIC_RUNOUT); arm_spline_init_f32(&S, 3, ARM_SPLINE_PARABOLIC_RUNOUT, buf);
arm_spline_f32(&S, inpX, inpY, outX, outp, 30); arm_spline_f32(&S, inpX, inpY, outX, outp, 30);
ASSERT_EMPTY_TAIL(output); ASSERT_EMPTY_TAIL(output);

Loading…
Cancel
Save