Corrected spline interpolation and merge sort
diff --git a/CMSIS/DSP/Include/arm_math.h b/CMSIS/DSP/Include/arm_math.h
index 0cd2ee7..2cd6375 100644
--- a/CMSIS/DSP/Include/arm_math.h
+++ b/CMSIS/DSP/Include/arm_math.h
@@ -2264,11 +2264,9 @@
/**< Heap sort */
ARM_SORT_INSERTION = 3,
/**< Insertion sort */
- ARM_SORT_MERGE = 4,
- /**< Merge sort */
- ARM_SORT_QUICK = 5,
+ ARM_SORT_QUICK = 4,
/**< Quick sort */
- ARM_SORT_SELECTION = 6
+ ARM_SORT_SELECTION = 5
/**< Selection sort */
} arm_sort_alg;
@@ -2315,6 +2313,37 @@
arm_sort_dir dir);
/**
+ * @brief Instance structure for the sorting algorithms.
+ */
+ typedef struct
+ {
+ arm_sort_dir dir; /**< Sorting order (direction) */
+ float32_t * buffer; /**< Working buffer */
+ } arm_merge_sort_instance_f32;
+
+ /**
+ * @param[in] S points to an instance of the sorting structure.
+ * @param[in,out] pSrc points to the block of input data.
+ * @param[out] pDst points to the block of output data
+ * @param[in] blockSize number of samples to process.
+ */
+ void arm_merge_sort_f32(
+ const arm_merge_sort_instance_f32 * S,
+ float32_t *pSrc,
+ float32_t *pDst,
+ uint32_t blockSize);
+
+ /**
+ * @param[in,out] S points to an instance of the sorting structure.
+ * @param[in] dir Sorting order.
+ * @param[in] buffer Working buffer.
+ */
+ void arm_merge_sort_init_f32(
+ arm_merge_sort_instance_f32 * S,
+ arm_sort_dir dir,
+ float32_t * buffer);
+
+ /**
* @brief Struct for specifying cubic spline type
*/
typedef enum
@@ -2330,6 +2359,7 @@
{
uint32_t n_x; /**< Number of known data points */
arm_spline_type type; /**< Type (boundary conditions) */
+ float32_t * buffer;
} arm_spline_instance_f32;
/**
@@ -2347,7 +2377,7 @@
const float32_t * y,
const float32_t * xq,
float32_t * pDst,
- uint32_t blockSize);
+ uint32_t blockSize);
/**
* @brief Initialization function for the floating-point cubic spline interpolation.
@@ -2358,7 +2388,8 @@
void arm_spline_init_f32(
arm_spline_instance_f32 * S,
uint32_t n,
- arm_spline_type type);
+ arm_spline_type type,
+ float32_t * buffer);
/**
* @brief Instance structure for the floating-point matrix structure.
diff --git a/CMSIS/DSP/PrivateInclude/arm_sorting.h b/CMSIS/DSP/PrivateInclude/arm_sorting.h
index 4a40540..ec002b2 100644
--- a/CMSIS/DSP/PrivateInclude/arm_sorting.h
+++ b/CMSIS/DSP/PrivateInclude/arm_sorting.h
@@ -74,18 +74,6 @@
* @param[out] pDst points to the block of output data
* @param[in] blockSize number of samples to process.
*/
- void arm_merge_sort_f32(
- const arm_sort_instance_f32 * S,
- float32_t *pSrc,
- float32_t *pDst,
- uint32_t blockSize);
-
- /**
- * @param[in] S points to an instance of the sorting structure.
- * @param[in] pSrc points to the block of input data.
- * @param[out] pDst points to the block of output data
- * @param[in] blockSize number of samples to process.
- */
void arm_quick_sort_f32(
const arm_sort_instance_f32 * S,
float32_t * pSrc,
diff --git a/CMSIS/DSP/Source/SupportFunctions/arm_merge_sort_f32.c b/CMSIS/DSP/Source/SupportFunctions/arm_merge_sort_f32.c
index 7e3180b..2e026a3 100644
--- a/CMSIS/DSP/Source/SupportFunctions/arm_merge_sort_f32.c
+++ b/CMSIS/DSP/Source/SupportFunctions/arm_merge_sort_f32.c
@@ -92,24 +92,35 @@
* divide the input array in sublists and merge them to produce
* longer sorted sublists until there is only one list remaining.
*
- * @par A work array is always needed, hence pSrc and pDst cannot be
- * equal and the results will be stored in pDst.
+ * @par A work array is always needed. It must be allocated by the user
+ * linked to the instance at initialization time.
+ *
+ * @par It's an in-place algorithm. In order to obtain an out-of-place
+ * function, a memcpy of the source vector is performed
*/
void arm_merge_sort_f32(
- const arm_sort_instance_f32 * S,
+ const arm_merge_sort_instance_f32 * S,
float32_t *pSrc,
float32_t *pDst,
uint32_t blockSize)
{
- // A work array is needed: pDst
- if(pSrc != pDst) // out-of-place only
- {
- memcpy(pDst, pSrc, blockSize*sizeof(float32_t));
+ float32_t * pA;
- arm_merge_sort_core_f32(pSrc, 0, blockSize, pDst, S->dir);
+ /* Out-of-place */
+ if(pSrc != pDst)
+ {
+ memcpy(pDst, pSrc, blockSize*sizeof(float32_t));
+ pA = pDst;
}
+ else
+ pA = pSrc;
+
+ /* A working buffer is needed */
+ memcpy(S->buffer, pSrc, blockSize*sizeof(float32_t));
+
+ arm_merge_sort_core_f32(S->buffer, 0, blockSize, pA, S->dir);
}
/**
@} end of Sorting group
diff --git a/CMSIS/DSP/Source/SupportFunctions/arm_merge_sort_init_f32.c b/CMSIS/DSP/Source/SupportFunctions/arm_merge_sort_init_f32.c
new file mode 100644
index 0000000..3a2ff07
--- /dev/null
+++ b/CMSIS/DSP/Source/SupportFunctions/arm_merge_sort_init_f32.c
@@ -0,0 +1,35 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_merge_sort_init_f32.c
+ * Description: Floating point merge sort initialization function
+ *
+ * $Date: 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+void arm_merge_sort_init_f32(arm_merge_sort_instance_f32 * S, arm_sort_dir dir, float32_t * buffer)
+{
+ S->dir = dir;
+ S->buffer = buffer;
+}
diff --git a/CMSIS/DSP/Source/SupportFunctions/arm_sort_f32.c b/CMSIS/DSP/Source/SupportFunctions/arm_sort_f32.c
index fea7aad..bb1cf7e 100644
--- a/CMSIS/DSP/Source/SupportFunctions/arm_sort_f32.c
+++ b/CMSIS/DSP/Source/SupportFunctions/arm_sort_f32.c
@@ -66,10 +66,6 @@
arm_insertion_sort_f32(S, pSrc, pDst, blockSize);
break;
- case ARM_SORT_MERGE:
- arm_merge_sort_f32(S, pSrc, pDst, blockSize);
- break;
-
case ARM_SORT_QUICK:
arm_quick_sort_f32(S, pSrc, pDst, blockSize);
break;
diff --git a/CMSIS/DSP/Source/SupportFunctions/arm_spline_interp_f32.c b/CMSIS/DSP/Source/SupportFunctions/arm_spline_interp_f32.c
index e3a8a37..2954b57 100644
--- a/CMSIS/DSP/Source/SupportFunctions/arm_spline_interp_f32.c
+++ b/CMSIS/DSP/Source/SupportFunctions/arm_spline_interp_f32.c
@@ -163,28 +163,16 @@
const float32_t * y,
const float32_t * xq,
float32_t * pDst,
- uint32_t blockSize)
+ uint32_t blockSize)
{
-
- /*
-
- As explained in arm_spline_interp_init_f32.c, this must be > 1
-
- */
int32_t n = S->n_x;
arm_spline_type type = S->type;
float32_t hi, hm1;
-
- /*
-
- Temporary variables for system AX=B.
- This will be replaced by new arguments for the function.
-
- */
- float32_t u[MAX_DATA_POINTS], z[MAX_DATA_POINTS], c[MAX_DATA_POINTS];
+ float32_t * u = (S->buffer); /* (n-1)-long buffer for u elements */
+ float32_t * z = (S->buffer)+(n-1); /* n-long buffer for z elements */
+ float32_t * c = (S->buffer)+(n+n-1); /* n-long buffer for c elements */
float32_t Bi, li;
-
float32_t bi, di;
float32_t x_sc;
@@ -231,7 +219,7 @@
hi = x[i+1]-x[i];
Bi = 3*(y[i+1]-y[i])/hi - 3*(y[i]-y[i-1])/hm1;
/* l(i) = a(ii)-a(i,i-1)*u(i-1) = 2[h(i-1)+h(i)]-h(i-1)*u(i-1) */
- li = 2*(hi+hm1) - hm1*u[i-1];
+ li = 2*(hi+hm1) - hm1*u[i-1];
/* u(i) = a(i,i+1)/l(i) = h(i)/l(i) */
u[i] = hi/li;
/* z(i) = [B(i)-h(i-1)*z(i-1)]/l(i) */
@@ -264,12 +252,12 @@
/* === Compute b(i) and d(i) from c(i) and create output for x(i)<x<x(i+1) === */
for (i=0; i<n-1; i++)
{
- hi = x[i+1]-x[i];
+ hi = x[i+1]-x[i];
bi = (y[i+1]-y[i])/hi-hi*(c[i+1]+2*c[i])/3;
di = (c[i+1]-c[i])/(3*hi);
#ifdef ARM_MATH_NEON
- xiv = vdupq_n_f32(x[i]);
+ xiv = vdupq_n_f32(x[i]);
aiv = vdupq_n_f32(y[i]);
biv = vdupq_n_f32(bi);
@@ -277,42 +265,42 @@
div = vdupq_n_f32(di);
while( *(pXq+4) <= x[i+1] && blkCnt > 4 )
- {
- /* Load [xq(k) xq(k+1) xq(k+2) xq(k+3)] */
+ {
+ /* Load [xq(k) xq(k+1) xq(k+2) xq(k+3)] */
xqv = vld1q_f32(pXq);
- pXq+=4;
-
- /* Compute [xq(k)-x(i) xq(k+1)-x(i) xq(k+2)-x(i) xq(k+3)-x(i)] */
+ pXq+=4;
+
+ /* Compute [xq(k)-x(i) xq(k+1)-x(i) xq(k+2)-x(i) xq(k+3)-x(i)] */
diff = vsubq_f32(xqv, xiv);
- temp = diff;
-
- /* y(i) = a(i) + ... */
+ temp = diff;
+
+ /* y(i) = a(i) + ... */
yv = aiv;
- /* ... + b(i)*(x-x(i)) + ... */
- yv = vmlaq_f32(yv, biv, temp);
- /* ... + c(i)*(x-x(i))^2 + ... */
- temp = vmulq_f32(temp, diff);
- yv = vmlaq_f32(yv, civ, temp);
+ /* ... + b(i)*(x-x(i)) + ... */
+ yv = vmlaq_f32(yv, biv, temp);
+ /* ... + c(i)*(x-x(i))^2 + ... */
+ temp = vmulq_f32(temp, diff);
+ yv = vmlaq_f32(yv, civ, temp);
/* ... + d(i)*(x-x(i))^3 */
- temp = vmulq_f32(temp, diff);
- yv = vmlaq_f32(yv, div, temp);
-
+ temp = vmulq_f32(temp, diff);
+ yv = vmlaq_f32(yv, div, temp);
+
/* Store [y(k) y(k+1) y(k+2) y(k+3)] */
- vst1q_f32(pDst, yv);
- pDst+=4;
-
- blkCnt-=4;
- }
+ vst1q_f32(pDst, yv);
+ pDst+=4;
+
+ blkCnt-=4;
+ }
#endif
- while( *pXq <= x[i+1] && blkCnt > 0 )
- {
- x_sc = *pXq++;
+ while( *pXq <= x[i+1] && blkCnt > 0 )
+ {
+ x_sc = *pXq++;
*pDst = y[i]+bi*(x_sc-x[i])+c[i]*(x_sc-x[i])*(x_sc-x[i])+di*(x_sc-x[i])*(x_sc-x[i])*(x_sc-x[i]);
pDst++;
- blkCnt--;
- }
+ blkCnt--;
+ }
}
/* == Create output for remaining samples (x>=x(n)) == */
diff --git a/CMSIS/DSP/Source/SupportFunctions/arm_spline_interp_init_f32.c b/CMSIS/DSP/Source/SupportFunctions/arm_spline_interp_init_f32.c
index e014f20..d5065d9 100644
--- a/CMSIS/DSP/Source/SupportFunctions/arm_spline_interp_init_f32.c
+++ b/CMSIS/DSP/Source/SupportFunctions/arm_spline_interp_init_f32.c
@@ -47,15 +47,16 @@
void arm_spline_init_f32(
arm_spline_instance_f32 * S,
uint32_t n,
- arm_spline_type type)
+ arm_spline_type type,
+ float32_t * buffer)
{
- S->n_x = n;
- S->type = type;
-
+ S->n_x = n;
+ S->type = type;
/* Type (boundary conditions):
- * 0: Natural spline ( S1''(x1)=0 ; Sn''(xn)=0 )
- * 1: Parabolic runout spline ( S1''(x1)=S2''(x2) ; Sn-1''(xn-1)=Sn''(xn) )
+ * - Natural spline ( S1''(x1)=0 ; Sn''(xn)=0 )
+ * - Parabolic runout spline ( S1''(x1)=S2''(x2) ; Sn-1''(xn-1)=Sn''(xn) )
*/
+ S->buffer = buffer;
}
/**
diff --git a/CMSIS/DSP/Testing/Include/Tests/SupportTestsF32.h b/CMSIS/DSP/Testing/Include/Tests/SupportTestsF32.h
index 2830f02..0fe693c 100755
--- a/CMSIS/DSP/Testing/Include/Tests/SupportTestsF32.h
+++ b/CMSIS/DSP/Testing/Include/Tests/SupportTestsF32.h
@@ -12,9 +12,10 @@
Client::Pattern<float32_t> input;
Client::Pattern<float32_t> coefs;
- Client::Pattern<float32_t> inputX;
- Client::Pattern<float32_t> inputY;
- Client::Pattern<float32_t> outputX;
+ Client::Pattern<float32_t> inputX;
+ Client::Pattern<float32_t> inputY;
+ Client::Pattern<float32_t> outputX;
+ Client::Pattern<float32_t> buffer;
Client::LocalPattern<float32_t> output;
Client::LocalPattern<q15_t> outputQ15;
diff --git a/CMSIS/DSP/Testing/Source/Tests/SupportTestsF32.cpp b/CMSIS/DSP/Testing/Source/Tests/SupportTestsF32.cpp
index 5846837..8f9ea25 100755
--- a/CMSIS/DSP/Testing/Source/Tests/SupportTestsF32.cpp
+++ b/CMSIS/DSP/Testing/Source/Tests/SupportTestsF32.cpp
@@ -297,11 +297,12 @@
{
float32_t *inp = input.ptr();
float32_t *outp = output.ptr();
- arm_sort_instance_f32 S;
+ float32_t *buf = buffer.ptr();
+ buf = (float32_t *)malloc((this->nbSamples)*sizeof(float32_t) );
+ arm_merge_sort_instance_f32 S;
- arm_sort_init_f32(&S, ARM_SORT_MERGE, ARM_SORT_ASCENDING);
-
- arm_sort_f32(&S,inp,outp,this->nbSamples);
+ arm_merge_sort_init_f32(&S, ARM_SORT_ASCENDING, buf);
+ arm_merge_sort_f32(&S,inp,outp,this->nbSamples);
ASSERT_EMPTY_TAIL(output);
@@ -313,11 +314,12 @@
{
float32_t *inp = input.ptr();
float32_t *outp = output.ptr();
- arm_sort_instance_f32 S;
+ float32_t *buf = buffer.ptr();
+ buf = (float32_t *)malloc((this->nbSamples)*sizeof(float32_t) );
+ arm_merge_sort_instance_f32 S;
- arm_sort_init_f32(&S, ARM_SORT_MERGE, ARM_SORT_ASCENDING);
-
- arm_sort_f32(&S,inp,outp,this->nbSamples);
+ arm_merge_sort_init_f32(&S, ARM_SORT_ASCENDING, buf);
+ arm_merge_sort_f32(&S,inp,outp,this->nbSamples);
ASSERT_EMPTY_TAIL(output);
@@ -424,10 +426,11 @@
const float32_t *inpY = inputY.ptr();
const float32_t *outX = outputX.ptr();
float32_t *outp = output.ptr();
-
+ float32_t *buf = buffer.ptr();
+ buf=(float32_t*)malloc((3*4-1)*sizeof(float32_t));
arm_spline_instance_f32 S;
- arm_spline_init_f32(&S, 4, ARM_SPLINE_PARABOLIC_RUNOUT);
+ arm_spline_init_f32(&S, 4, ARM_SPLINE_PARABOLIC_RUNOUT, buf);
arm_spline_f32(&S, inpX, inpY, outX, outp, 20);
ASSERT_EMPTY_TAIL(output);
@@ -440,10 +443,11 @@
const float32_t *inpY = inputY.ptr();
const float32_t *outX = outputX.ptr();
float32_t *outp = output.ptr();
-
+ float32_t *buf = buffer.ptr();
+ buf=(float32_t*)malloc((3*9-1)*sizeof(float32_t));
arm_spline_instance_f32 S;
- arm_spline_init_f32(&S, 9, ARM_SPLINE_NATURAL);
+ arm_spline_init_f32(&S, 9, ARM_SPLINE_NATURAL, buf);
arm_spline_f32(&S, inpX, inpY, outX, outp, 33);
ASSERT_EMPTY_TAIL(output);
@@ -456,10 +460,11 @@
const float32_t *inpY = inputY.ptr();
const float32_t *outX = outputX.ptr();
float32_t *outp = output.ptr();
-
+ float32_t *buf = buffer.ptr();
+ buf=(float32_t*)malloc((3*3-1)*sizeof(float32_t));
arm_spline_instance_f32 S;
- arm_spline_init_f32(&S, 3, ARM_SPLINE_PARABOLIC_RUNOUT);
+ arm_spline_init_f32(&S, 3, ARM_SPLINE_PARABOLIC_RUNOUT, buf);
arm_spline_f32(&S, inpX, inpY, outX, outp, 30);
ASSERT_EMPTY_TAIL(output);