From f51b3c33177b0c6d109327415ef1e521bae7437d Mon Sep 17 00:00:00 2001
From: Christophe Favergeon <Christophe.Favergeon@arm.com>
Date: Tue, 11 Aug 2020 13:05:34 +0200
Subject: [PATCH] CMSIS-DSP: Added f16 versions of the distance functions

---
 Include/arm_vec_math_f16.h                    |  39 +++
 Include/dsp/distance_functions_f16.h          | 132 ++++++++
 Source/DistanceFunctions/CMakeLists.txt       |  38 ++-
 .../DistanceFunctions/DistanceFunctionsF16.c  |  36 +++
 .../arm_braycurtis_distance_f16.c             | 141 ++++++++
 .../arm_canberra_distance_f16.c               | 159 +++++++++
 .../arm_chebyshev_distance_f16.c              | 135 ++++++++
 .../arm_cityblock_distance_f16.c              | 116 +++++++
 .../arm_correlation_distance_f16.c            |  88 +++++
 .../arm_cosine_distance_f16.c                 |  74 +++++
 .../arm_euclidean_distance_f16.c              | 118 +++++++
 .../arm_jensenshannon_distance_f16.c          | 164 ++++++++++
 .../arm_minkowski_distance_f16.c              | 127 ++++++++
 Testing/CMakeLists.txt                        |   1 +
 Testing/Include/Tests/DistanceTestsF16.h      |  30 ++
 Testing/Source/Tests/DistanceTestsF16.cpp     | 303 ++++++++++++++++++
 Testing/desc_f16.txt                          |  45 +++
 17 files changed, 1743 insertions(+), 3 deletions(-)
 create mode 100755 Source/DistanceFunctions/DistanceFunctionsF16.c
 create mode 100755 Source/DistanceFunctions/arm_braycurtis_distance_f16.c
 create mode 100755 Source/DistanceFunctions/arm_canberra_distance_f16.c
 create mode 100755 Source/DistanceFunctions/arm_chebyshev_distance_f16.c
 create mode 100755 Source/DistanceFunctions/arm_cityblock_distance_f16.c
 create mode 100755 Source/DistanceFunctions/arm_correlation_distance_f16.c
 create mode 100755 Source/DistanceFunctions/arm_cosine_distance_f16.c
 create mode 100755 Source/DistanceFunctions/arm_euclidean_distance_f16.c
 create mode 100755 Source/DistanceFunctions/arm_jensenshannon_distance_f16.c
 create mode 100755 Source/DistanceFunctions/arm_minkowski_distance_f16.c
 create mode 100755 Testing/Include/Tests/DistanceTestsF16.h
 create mode 100755 Testing/Source/Tests/DistanceTestsF16.cpp

diff --git a/Include/arm_vec_math_f16.h b/Include/arm_vec_math_f16.h
index bd292c94..0c1f441a 100755
--- a/Include/arm_vec_math_f16.h
+++ b/Include/arm_vec_math_f16.h
@@ -40,6 +40,39 @@ extern "C"
 
 static const float16_t __logf_rng_f16=0.693147180f16;
 
+/* fast inverse approximation (3x newton) */
+__STATIC_INLINE f16x8_t vrecip_medprec_f16(
+    f16x8_t x)
+{
+    q15x8_t         m;
+    f16x8_t         b;
+    any16x8_t       xinv;
+    f16x8_t         ax = vabsq(x);
+
+    xinv.f = ax;
+
+    m = 0x03c00 - (xinv.i & 0x07c00);
+    xinv.i = xinv.i + m;
+    xinv.f = 1.41176471f16 - 0.47058824f16 * xinv.f;
+    xinv.i = xinv.i + m;
+
+    b = 2.0f16 - xinv.f * ax;
+    xinv.f = xinv.f * b;
+
+    b = 2.0f16 - xinv.f * ax;
+    xinv.f = xinv.f * b;
+
+    b = 2.0f16 - xinv.f * ax;
+    xinv.f = xinv.f * b;
+
+    xinv.f = vdupq_m(xinv.f, F16INFINITY, vcmpeqq(x, 0.0f));
+    /*
+     * restore sign
+     */
+    xinv.f = vnegq_m(xinv.f, xinv.f, vcmpltq(x, 0.0f));
+
+    return xinv.f;
+}
 
 /* fast inverse approximation (4x newton) */
 __STATIC_INLINE f16x8_t vrecip_hiprec_f16(
@@ -212,6 +245,12 @@ __STATIC_INLINE float16x8_t arm_vec_exponent_f16(float16x8_t x, int16_t nb)
     return (r);
 }
 
+__STATIC_INLINE f16x8_t vpowq_f16(
+    f16x8_t val,
+    f16x8_t n)
+{
+    return vexpq_f16(vmulq_f16(n, vlogq_f16(val)));
+}
 
 
 #endif /* (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)*/
diff --git a/Include/dsp/distance_functions_f16.h b/Include/dsp/distance_functions_f16.h
index 7c05a182..d115308d 100755
--- a/Include/dsp/distance_functions_f16.h
+++ b/Include/dsp/distance_functions_f16.h
@@ -26,12 +26,144 @@
 #ifndef _DISTANCE_FUNCTIONS_F16_H_
 #define _DISTANCE_FUNCTIONS_F16_H_
 
+#include "arm_math_types_f16.h"
+#include "arm_math_memory.h"
+
+#include "dsp/none.h"
+#include "dsp/utils.h"
+
+#include "dsp/statistics_functions_f16.h"
+#include "dsp/basic_math_functions_f16.h"
+#include "dsp/fast_math_functions_f16.h"
+
 #ifdef   __cplusplus
 extern "C"
 {
 #endif
 
 #if defined(ARM_FLOAT16_SUPPORTED)
+
+/**
+ * @brief        Euclidean distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float16_t arm_euclidean_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+/**
+ * @brief        Bray-Curtis distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float16_t arm_braycurtis_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+/**
+ * @brief        Canberra distance between two vectors
+ *
+ * This function may divide by zero when samples pA[i] and pB[i] are both zero.
+ * The result of the computation will be correct. So the division per zero may be
+ * ignored.
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+
+/**
+ * @brief        Chebyshev distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+
+/**
+ * @brief        Cityblock (Manhattan) distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float16_t arm_cityblock_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+/**
+ * @brief        Correlation distance between two vectors
+ *
+ * The input vectors are modified in place !
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float16_t arm_correlation_distance_f16(float16_t *pA,float16_t *pB, uint32_t blockSize);
+
+/**
+ * @brief        Cosine distance between two vectors
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float16_t arm_cosine_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize);
+
+/**
+ * @brief        Jensen-Shannon distance between two vectors
+ *
+ * This function is assuming that elements of second vector are > 0
+ * and 0 only when the corresponding element of first vector is 0.
+ * Otherwise the result of the computation does not make sense
+ * and for speed reasons, the cases returning NaN or Infinity are not
+ * managed.
+ *
+ * When the function is computing x log (x / y) with x 0 and y 0,
+ * it will compute the right value (0) but a division per zero will occur
+ * and shoudl be ignored in client code.
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB,uint32_t blockSize);
+
+/**
+ * @brief        Minkowski distance between two vectors
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    n          Norm order (>= 2)
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+
+
+float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, int32_t order, uint32_t blockSize);
+
+
 #endif /*defined(ARM_FLOAT16_SUPPORTED)*/
 #ifdef   __cplusplus
 }
diff --git a/Source/DistanceFunctions/CMakeLists.txt b/Source/DistanceFunctions/CMakeLists.txt
index a965a91f..7f6c389b 100755
--- a/Source/DistanceFunctions/CMakeLists.txt
+++ b/Source/DistanceFunctions/CMakeLists.txt
@@ -7,7 +7,28 @@ include(configDsp)
 
 file(GLOB SRC "./*_*.c")
 
-add_library(CMSISDSPDistance STATIC ${SRC})
+add_library(CMSISDSPDistance STATIC)
+
+target_sources(CMSISDSPDistance PRIVATE arm_boolean_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_braycurtis_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_canberra_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_chebyshev_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_cityblock_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_correlation_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_cosine_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_dice_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_euclidean_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_hamming_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_jaccard_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_jensenshannon_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_kulsinski_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_minkowski_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_rogerstanimoto_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_russellrao_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_sokalmichener_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_sokalsneath_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_yule_distance.c)
+
 
 configLib(CMSISDSPDistance ${ROOT})
 configDsp(CMSISDSPDistance ${ROOT})
@@ -16,5 +37,16 @@ configDsp(CMSISDSPDistance ${ROOT})
 target_include_directories(CMSISDSPDistance PUBLIC "${DSP}/Include")
 target_include_directories(CMSISDSPDistance PRIVATE ".")
 
-
-
+if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
+target_sources(CMSISDSPDistance PRIVATE arm_braycurtis_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_canberra_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_chebyshev_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_cityblock_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_correlation_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_cosine_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_euclidean_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_jensenshannon_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_minkowski_distance_f16.c)
+endif()
+
+ 
\ No newline at end of file
diff --git a/Source/DistanceFunctions/DistanceFunctionsF16.c b/Source/DistanceFunctions/DistanceFunctionsF16.c
new file mode 100755
index 00000000..a0be2d44
--- /dev/null
+++ b/Source/DistanceFunctions/DistanceFunctionsF16.c
@@ -0,0 +1,36 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        DistanceFunctions.c
+ * Description:  Combination of all distance function f16 source files.
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_braycurtis_distance_f16.c"
+#include "arm_canberra_distance_f16.c"
+#include "arm_chebyshev_distance_f16.c"
+#include "arm_cityblock_distance_f16.c"
+#include "arm_correlation_distance_f16.c"
+#include "arm_cosine_distance_f16.c"
+#include "arm_euclidean_distance_f16.c"
+#include "arm_jensenshannon_distance_f16.c"
+#include "arm_minkowski_distance_f16.c"
+
diff --git a/Source/DistanceFunctions/arm_braycurtis_distance_f16.c b/Source/DistanceFunctions/arm_braycurtis_distance_f16.c
new file mode 100755
index 00000000..172dae9b
--- /dev/null
+++ b/Source/DistanceFunctions/arm_braycurtis_distance_f16.c
@@ -0,0 +1,141 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_braycurtis_distance_f16.c
+ * Description:  Bray-Curtis distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+
+
+/**
+  @addtogroup FloatDist
+  @{
+ */
+
+
+/**
+ * @brief        Bray-Curtis distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+
+float16_t arm_braycurtis_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+    float16_t       accumDiff = 0.0f, accumSum = 0.0f;
+    uint32_t        blkCnt;
+    f16x8_t         a, b, c, accumDiffV, accumSumV;
+
+
+    accumDiffV = vdupq_n_f16(0.0f);
+    accumSumV = vdupq_n_f16(0.0f);
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        c = vabdq(a, b);
+        accumDiffV = vaddq(accumDiffV, c);
+
+        c = vaddq_f16(a, b);
+        c = vabsq_f16(c);
+        accumSumV = vaddq(accumSumV, c);
+
+        pA += 8;
+        pB += 8;
+        blkCnt--;
+    }
+
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+
+        a = vldrhq_z_f16(pA, p0);
+        b = vldrhq_z_f16(pB, p0);
+
+        c = vabdq(a, b);
+        accumDiffV = vaddq_m(accumDiffV, accumDiffV, c, p0);
+
+        c = vaddq_f16(a, b);
+        c = vabsq_f16(c);
+        accumSumV = vaddq_m(accumSumV, accumSumV, c, p0);
+    }
+
+    accumDiff = vecAddAcrossF16Mve(accumDiffV);
+    accumSum = vecAddAcrossF16Mve(accumSumV);
+
+    /*
+       It is assumed that accumSum is not zero. Since it is the sum of several absolute
+       values it would imply that all of them are zero. It is very unlikely for long vectors.
+     */
+    return (accumDiff / accumSum);
+}
+#else
+
+float16_t arm_braycurtis_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+   float16_t accumDiff=0.0f, accumSum=0.0f, tmpA, tmpB;
+
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+      accumDiff += fabsf(tmpA - tmpB);
+      accumSum += fabsf(tmpA + tmpB);
+      blockSize --;
+   }
+   /*
+
+   It is assumed that accumSum is not zero. Since it is the sum of several absolute
+   values it would imply that all of them are zero. It is very unlikely for long vectors.
+
+   */
+   return(accumDiff / accumSum);
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of FloatDist group
+ */
+
+/**
+ * @} end of groupDistance group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
diff --git a/Source/DistanceFunctions/arm_canberra_distance_f16.c b/Source/DistanceFunctions/arm_canberra_distance_f16.c
new file mode 100755
index 00000000..186ea034
--- /dev/null
+++ b/Source/DistanceFunctions/arm_canberra_distance_f16.c
@@ -0,0 +1,159 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_canberra_distance_f16.c
+ * Description:  Canberra distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @addtogroup FloatDist
+  @{
+ */
+
+
+/**
+ * @brief        Canberra distance between two vectors
+ *
+ * This function may divide by zero when samples pA[i] and pB[i] are both zero.
+ * The result of the computation will be correct. So the division per zero may be
+ * ignored.
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math_f16.h"
+
+float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+    float16_t       accum = 0.0f;
+    uint32_t         blkCnt;
+    f16x8_t         a, b, c, accumV;
+
+    accumV = vdupq_n_f16(0.0f);
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        c = vabdq(a, b);
+
+        a = vabsq(a);
+        b = vabsq(b);
+        a = vaddq(a, b);
+
+        /* 
+         * May divide by zero when a and b have both the same lane at zero.
+         */
+        a = vrecip_hiprec_f16(a);
+
+        /*
+         * Force result of a division by 0 to 0. It the behavior of the
+         * sklearn canberra function.
+         */
+        a = vdupq_m_n_f16(a, 0.0f, vcmpeqq(a, 0.0f));
+        c = vmulq(c, a);
+        accumV = vaddq(accumV, c);
+
+        pA += 8;
+        pB += 8;
+        blkCnt--;
+    }
+
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+
+        a = vldrhq_z_f16(pA, p0);
+        b = vldrhq_z_f16(pB, p0);
+
+        c = vabdq(a, b);
+
+        a = vabsq(a);
+        b = vabsq(b);
+        a = vaddq(a, b);
+
+        /* 
+         * May divide by zero when a and b have both the same lane at zero.
+         */
+        a = vrecip_hiprec_f16(a);
+
+        /*
+         * Force result of a division by 0 to 0. It the behavior of the
+         * sklearn canberra function.
+         */
+        a = vdupq_m_n_f16(a, 0.0f, vcmpeqq(a, 0.0f));
+        c = vmulq(c, a);
+        accumV = vaddq_m(accumV, accumV, c, p0);
+    }
+
+    accum = vecAddAcrossF16Mve(accumV);
+
+    return (accum);
+}
+
+
+#else
+float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+   float16_t accum=0.0f, tmpA, tmpB,diff,sum;
+
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+
+      diff = fabsf(tmpA - tmpB);
+      sum = fabsf(tmpA) + fabsf(tmpB);
+      if ((tmpA != 0.0f) || (tmpB != 0.0f))
+      {
+         accum += (diff / sum);
+      }
+      blockSize --;
+   }
+   return(accum);
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of FloatDist group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
diff --git a/Source/DistanceFunctions/arm_chebyshev_distance_f16.c b/Source/DistanceFunctions/arm_chebyshev_distance_f16.c
new file mode 100755
index 00000000..f6ab96c0
--- /dev/null
+++ b/Source/DistanceFunctions/arm_chebyshev_distance_f16.c
@@ -0,0 +1,135 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_chebyshev_distance_f16.c
+ * Description:  Chebyshev distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @addtogroup FloatDist
+  @{
+ */
+
+
+/**
+ * @brief        Chebyshev distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math.h"
+
+float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+    uint32_t        blkCnt;     /* loop counters */
+    f16x8_t         vecA, vecB;
+    f16x8_t         vecDiff = vdupq_n_f16(0.0);
+    float16_t       maxValue = 0.0;
+
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U) {
+        vecA = vld1q(pA);
+        pA += 8;
+        vecB = vld1q(pB);
+        pB += 8;
+        /*
+         * update per-lane max.
+         */
+        vecDiff = vmaxnmaq(vsubq(vecA, vecB), vecDiff);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+
+        vecA = vldrhq_z_f16(pA, p0);
+        vecB = vldrhq_z_f16(pB, p0);
+
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+        vecDiff = vmaxnmaq_m(vecDiff, vsubq(vecA, vecB), p0);
+    }
+    /*
+     * Get max value across the vector
+     */
+    return vmaxnmavq(maxValue, vecDiff);
+}
+
+#else
+float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+   float16_t diff=0.0f,  maxVal,tmpA, tmpB;
+
+   tmpA = *pA++;
+   tmpB = *pB++;
+   diff = fabsf(tmpA - tmpB);
+   maxVal = diff;
+   blockSize--;
+
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+      diff = fabsf(tmpA - tmpB);
+      if (diff > maxVal)
+      {
+        maxVal = diff;
+      }
+      blockSize --;
+   }
+  
+   return(maxVal);
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of FloatDist group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
diff --git a/Source/DistanceFunctions/arm_cityblock_distance_f16.c b/Source/DistanceFunctions/arm_cityblock_distance_f16.c
new file mode 100755
index 00000000..e9810b7f
--- /dev/null
+++ b/Source/DistanceFunctions/arm_cityblock_distance_f16.c
@@ -0,0 +1,116 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cityblock_distance_f16.c
+ * Description:  Cityblock (Manhattan) distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+/**
+  @addtogroup FloatDist
+  @{
+ */
+
+
+/**
+ * @brief        Cityblock (Manhattan) distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math.h"
+
+float16_t arm_cityblock_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+    uint32_t        blkCnt;
+    f16x8_t         a, b, accumV, tempV;
+
+    accumV = vdupq_n_f16(0.0f);
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        tempV = vabdq(a, b);
+        accumV = vaddq(accumV, tempV);
+
+        pA += 8;
+        pB += 8;
+        blkCnt--;
+    }
+
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+
+        a = vldrhq_z_f16(pA, p0);
+        b = vldrhq_z_f16(pB, p0);
+
+        tempV = vabdq(a, b);
+        accumV = vaddq_m(accumV, accumV, tempV, p0);
+    }
+
+    return vecAddAcrossF16Mve(accumV);
+}
+
+#else
+float16_t arm_cityblock_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+   float16_t accum,tmpA, tmpB;
+
+   accum = 0.0f;
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+      accum  += fabsf(tmpA - tmpB);
+      
+      blockSize --;
+   }
+  
+   return(accum);
+}
+#endif
+
+/**
+ * @} end of FloatDist group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
diff --git a/Source/DistanceFunctions/arm_correlation_distance_f16.c b/Source/DistanceFunctions/arm_correlation_distance_f16.c
new file mode 100755
index 00000000..e3b3a788
--- /dev/null
+++ b/Source/DistanceFunctions/arm_correlation_distance_f16.c
@@ -0,0 +1,88 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_correlation_distance_f16.c
+ * Description:  Correlation distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+
+/**
+  @addtogroup FloatDist
+  @{
+ */
+
+
+/**
+ * @brief        Correlation distance between two vectors
+ *
+ * The input vectors are modified in place !
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float16_t arm_correlation_distance_f16(float16_t *pA,float16_t *pB, uint32_t blockSize)
+{
+    float16_t ma,mb,pwra,pwrb,dot,tmp;
+
+    arm_mean_f16(pA, blockSize, &ma);
+    arm_mean_f16(pB, blockSize, &mb);
+
+    arm_offset_f16(pA, -ma, pA, blockSize);
+    arm_offset_f16(pB, -mb, pB, blockSize);
+
+    arm_power_f16(pA, blockSize, &pwra);
+    arm_power_f16(pB, blockSize, &pwrb);
+
+    arm_dot_prod_f16(pA,pB,blockSize,&dot);
+
+    dot = dot / blockSize;
+    pwra = pwra / blockSize;
+    pwrb = pwrb / blockSize;
+
+    arm_sqrt_f16(pwra * pwrb,&tmp);
+ 
+    return(1.0f - dot / tmp);
+
+   
+}
+
+
+
+/**
+ * @} end of FloatDist group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
diff --git a/Source/DistanceFunctions/arm_cosine_distance_f16.c b/Source/DistanceFunctions/arm_cosine_distance_f16.c
new file mode 100755
index 00000000..207fa970
--- /dev/null
+++ b/Source/DistanceFunctions/arm_cosine_distance_f16.c
@@ -0,0 +1,74 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cosine_distance_f16.c
+ * Description:  Cosine distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @addtogroup FloatDist
+  @{
+ */
+
+
+
+/**
+ * @brief        Cosine distance between two vectors
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float16_t arm_cosine_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+    float16_t pwra,pwrb,dot,tmp;
+
+    arm_power_f16(pA, blockSize, &pwra);
+    arm_power_f16(pB, blockSize, &pwrb);
+
+    arm_dot_prod_f16(pA,pB,blockSize,&dot);
+
+    arm_sqrt_f16(pwra * pwrb, &tmp);
+    return(1.0f - dot / tmp);
+
+}
+
+
+
+/**
+ * @} end of FloatDist group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
diff --git a/Source/DistanceFunctions/arm_euclidean_distance_f16.c b/Source/DistanceFunctions/arm_euclidean_distance_f16.c
new file mode 100755
index 00000000..97f41aa9
--- /dev/null
+++ b/Source/DistanceFunctions/arm_euclidean_distance_f16.c
@@ -0,0 +1,118 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_euclidean_distance_f16.c
+ * Description:  Euclidean distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+
+/**
+  @addtogroup FloatDist
+  @{
+ */
+
+
+/**
+ * @brief        Euclidean distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math.h"
+float16_t arm_euclidean_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+    uint32_t        blkCnt;
+    float16_t       tmp;
+    f16x8_t         a, b, accumV, tempV;
+
+    accumV = vdupq_n_f16(0.0f);
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        tempV = vsubq(a, b);
+        accumV = vfmaq(accumV, tempV, tempV);
+
+        pA += 8;
+        pB += 8;
+        blkCnt--;
+    }
+
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+
+        a = vldrhq_z_f16(pA, p0);
+        b = vldrhq_z_f16(pB, p0);
+
+        tempV = vsubq(a, b);
+        accumV = vfmaq_m(accumV, tempV, tempV, p0);
+    }
+
+    arm_sqrt_f16(vecAddAcrossF16Mve(accumV), &tmp);
+    return (tmp);
+}
+
+#else
+float16_t arm_euclidean_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+   float16_t accum=0.0f,tmp;
+
+   while(blockSize > 0)
+   {
+      tmp = *pA++ - *pB++;
+      accum += SQ(tmp);
+      blockSize --;
+   }
+   arm_sqrt_f16(accum,&tmp);
+   return(tmp);
+}
+
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of FloatDist group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
diff --git a/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c b/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c
new file mode 100755
index 00000000..83a499c7
--- /dev/null
+++ b/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c
@@ -0,0 +1,164 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_jensenshannon_distance_f16.c
+ * Description:  Jensen-Shannon distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @addtogroup FloatDist
+  @{
+ */
+
+#if !defined(ARM_MATH_MVEF) || defined(ARM_MATH_AUTOVECTORIZE)
+/// @private
+__STATIC_INLINE float16_t rel_entr(float16_t x, float16_t y)
+{
+    return (x * logf(x / y));
+}
+#endif
+
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math_f16.h"
+
+float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+    uint32_t        blkCnt;
+    float16_t       tmp;
+    f16x8_t         a, b, t, tmpV, accumV;
+
+    accumV = vdupq_n_f16(0.0f);
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        t = vaddq(a, b);
+        t = vmulq(t, 0.5f);
+
+        tmpV = vmulq(a, vrecip_medprec_f16(t));
+        tmpV = vlogq_f16(tmpV);
+        accumV = vfmaq(accumV, a, tmpV);
+
+        tmpV = vmulq_f16(b, vrecip_medprec_f16(t));
+        tmpV = vlogq_f16(tmpV);
+        accumV = vfmaq(accumV, b, tmpV);
+
+        pA += 8;
+        pB += 8;
+        blkCnt--;
+    }
+
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+
+        a = vldrhq_z_f16(pA, p0);
+        b = vldrhq_z_f16(pB, p0);
+
+        t = vaddq(a, b);
+        t = vmulq(t, 0.5f);
+
+        tmpV = vmulq_f16(a, vrecip_medprec_f16(t));
+        tmpV = vlogq_f16(tmpV);
+        accumV = vfmaq_m_f16(accumV, a, tmpV, p0);
+
+        tmpV = vmulq_f16(b, vrecip_medprec_f16(t));
+        tmpV = vlogq_f16(tmpV);
+        accumV = vfmaq_m_f16(accumV, b, tmpV, p0);
+
+    }
+
+    arm_sqrt_f16(vecAddAcrossF16Mve(accumV) / 2.0f, &tmp);
+    return (tmp);
+}
+
+#else
+
+
+/**
+ * @brief        Jensen-Shannon distance between two vectors
+ *
+ * This function is assuming that elements of second vector are > 0
+ * and 0 only when the corresponding element of first vector is 0.
+ * Otherwise the result of the computation does not make sense
+ * and for speed reasons, the cases returning NaN or Infinity are not
+ * managed.
+ *
+ * When the function is computing x log (x / y) with x == 0 and y == 0,
+ * it will compute the right result (0) but a division by zero will occur
+ * and should be ignored in client code.
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+
+float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+    float16_t left, right,sum, result, tmp;
+    uint32_t i;
+
+    left = 0.0f; 
+    right = 0.0f;
+    for(i=0; i < blockSize; i++)
+    {
+      tmp = (pA[i] + pB[i]) / 2.0f;
+      left  += rel_entr(pA[i], tmp);
+      right += rel_entr(pB[i], tmp);
+    }
+
+
+    sum = left + right;
+    arm_sqrt_f16(sum/2.0f, &result);
+    return(result);
+
+}
+
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+ * @} end of FloatDist group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
diff --git a/Source/DistanceFunctions/arm_minkowski_distance_f16.c b/Source/DistanceFunctions/arm_minkowski_distance_f16.c
new file mode 100755
index 00000000..35cf9f6c
--- /dev/null
+++ b/Source/DistanceFunctions/arm_minkowski_distance_f16.c
@@ -0,0 +1,127 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_minkowski_distance_f16.c
+ * Description:  Minkowski distance between two vectors
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @addtogroup FloatDist
+  @{
+ */
+
+
+/**
+ * @brief        Minkowski distance between two vectors
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    order      Distance order
+ * @param[in]    blockSize  Number of samples
+ * @return distance
+ *
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math_f16.h"
+
+float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, int32_t order, uint32_t blockSize)
+{
+    uint32_t        blkCnt;
+    f16x8_t         a, b, tmpV, accumV, sumV;
+
+    sumV = vdupq_n_f16(0.0f);
+    accumV = vdupq_n_f16(0.0f);
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        tmpV = vabdq(a, b);
+        tmpV = vpowq_f16(tmpV, vdupq_n_f16(order));
+        sumV = vaddq(sumV, tmpV);
+
+        pA += 8;
+        pB += 8;
+        blkCnt--;
+    }
+
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+
+        a = vldrhq_z_f16(pA, p0);
+        b = vldrhq_z_f16(pB, p0);
+
+        tmpV = vabdq(a, b);
+        tmpV = vpowq_f16(tmpV, vdupq_n_f16(order));
+        sumV = vaddq_m(sumV, sumV, tmpV, p0);
+    }
+
+    return (powf(vecAddAcrossF16Mve(sumV), (1.0f / (float16_t) order)));
+}
+
+
+#else
+
+
+float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, int32_t order, uint32_t blockSize)
+{
+    float16_t sum;
+    uint32_t i;
+
+    sum = 0.0f; 
+    for(i=0; i < blockSize; i++)
+    {
+       sum += powf(fabsf(pA[i] - pB[i]),order);
+    }
+
+
+    return(powf(sum,(1.0f/order)));
+
+}
+
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of FloatDist group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
diff --git a/Testing/CMakeLists.txt b/Testing/CMakeLists.txt
index e6f46703..c9378aea 100644
--- a/Testing/CMakeLists.txt
+++ b/Testing/CMakeLists.txt
@@ -340,6 +340,7 @@ set(TESTSRC16
   Source/Tests/SupportTestsF16.cpp
   Source/Tests/SupportBarTestsF16.cpp
   Source/Tests/FastMathF16.cpp
+  Source/Tests/DistanceTestsF16.cpp
   )
 endif()
 endif() 
diff --git a/Testing/Include/Tests/DistanceTestsF16.h b/Testing/Include/Tests/DistanceTestsF16.h
new file mode 100755
index 00000000..7ba9dedc
--- /dev/null
+++ b/Testing/Include/Tests/DistanceTestsF16.h
@@ -0,0 +1,30 @@
+#include "Test.h"
+#include "Pattern.h"
+
+#include "dsp/distance_functions_f16.h"
+
+class DistanceTestsF16:public Client::Suite
+    {
+        public:
+            DistanceTestsF16(Testing::testID_t id);
+            virtual void setUp(Testing::testID_t,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr);
+            virtual void tearDown(Testing::testID_t,Client::PatternMgr *mgr);
+        private:
+            #include "DistanceTestsF16_decl.h"
+            
+            Client::Pattern<float16_t> inputA;
+            Client::Pattern<float16_t> inputB;
+            Client::Pattern<int16_t> dims;
+
+            Client::LocalPattern<float16_t> output;
+            Client::LocalPattern<float16_t> tmpA;
+            Client::LocalPattern<float16_t> tmpB;
+
+            // Reference patterns are not loaded when we are in dump mode
+            Client::RefPattern<float16_t> ref;
+
+            int vecDim;
+            int nbPatterns;
+
+
+    };
diff --git a/Testing/Source/Tests/DistanceTestsF16.cpp b/Testing/Source/Tests/DistanceTestsF16.cpp
new file mode 100755
index 00000000..12ec71e0
--- /dev/null
+++ b/Testing/Source/Tests/DistanceTestsF16.cpp
@@ -0,0 +1,303 @@
+#include "DistanceTestsF16.h"
+#include <stdio.h>
+#include "Error.h"
+#include "Test.h"
+
+#define REL_ERROR (2e-3)
+
+#define REL_JS_ERROR (3e-2)
+
+#define REL_MK_ERROR (1e-2)
+
+
+    void DistanceTestsF16::test_braycurtis_distance_f16()
+    {
+       const float16_t *inpA = inputA.ptr();
+       const float16_t *inpB = inputB.ptr();
+
+       float16_t *outp = output.ptr();
+       
+       for(int i=0; i < this->nbPatterns ; i ++)
+       {
+          *outp = arm_braycurtis_distance_f16(inpA, inpB, this->vecDim);
+         
+          inpA += this->vecDim;
+          inpB += this->vecDim;
+          outp ++;
+       }
+
+        ASSERT_REL_ERROR(output,ref,REL_ERROR);
+    } 
+ 
+    void DistanceTestsF16::test_canberra_distance_f16()
+    {
+       const float16_t *inpA = inputA.ptr();
+       const float16_t *inpB = inputB.ptr();
+
+       float16_t *outp = output.ptr();
+       
+       for(int i=0; i < this->nbPatterns ; i ++)
+       {
+          *outp = arm_canberra_distance_f16(inpA, inpB, this->vecDim);
+         
+          inpA += this->vecDim;
+          inpB += this->vecDim;
+          outp ++;
+       }
+
+        ASSERT_REL_ERROR(output,ref,REL_ERROR);
+    } 
+
+    void DistanceTestsF16::test_chebyshev_distance_f16()
+    {
+       const float16_t *inpA = inputA.ptr();
+       const float16_t *inpB = inputB.ptr();
+
+       float16_t *outp = output.ptr();
+       
+       for(int i=0; i < this->nbPatterns ; i ++)
+       {
+          *outp = arm_chebyshev_distance_f16(inpA, inpB, this->vecDim);
+         
+          inpA += this->vecDim;
+          inpB += this->vecDim;
+          outp ++;
+       }
+
+        ASSERT_REL_ERROR(output,ref,REL_ERROR);
+    } 
+
+    void DistanceTestsF16::test_cityblock_distance_f16()
+    {
+       const float16_t *inpA = inputA.ptr();
+       const float16_t *inpB = inputB.ptr();
+
+       float16_t *outp = output.ptr();
+       
+       for(int i=0; i < this->nbPatterns ; i ++)
+       {
+          *outp = arm_cityblock_distance_f16(inpA, inpB, this->vecDim);
+         
+          inpA += this->vecDim;
+          inpB += this->vecDim;
+          outp ++;
+       }
+
+        ASSERT_REL_ERROR(output,ref,REL_ERROR);
+    } 
+
+    void DistanceTestsF16::test_correlation_distance_f16()
+    {
+       const float16_t *inpA = inputA.ptr();
+       const float16_t *inpB = inputB.ptr();
+
+       float16_t *tmpap = tmpA.ptr();
+       float16_t *tmpbp = tmpB.ptr();
+
+       float16_t *outp = output.ptr();
+       
+       for(int i=0; i < this->nbPatterns ; i ++)
+       {
+          memcpy(tmpap, inpA, sizeof(float16_t) * this->vecDim);
+          memcpy(tmpbp, inpB, sizeof(float16_t) * this->vecDim);
+          
+          *outp = arm_correlation_distance_f16(tmpap, tmpbp, this->vecDim);
+         
+          inpA += this->vecDim;
+          inpB += this->vecDim;
+          outp ++;
+       }
+
+        ASSERT_REL_ERROR(output,ref,REL_ERROR);
+    } 
+
+    void DistanceTestsF16::test_cosine_distance_f16()
+    {
+       const float16_t *inpA = inputA.ptr();
+       const float16_t *inpB = inputB.ptr();
+
+       float16_t *outp = output.ptr();
+       
+       for(int i=0; i < this->nbPatterns ; i ++)
+       {
+          *outp = arm_cosine_distance_f16(inpA, inpB, this->vecDim);
+         
+          inpA += this->vecDim;
+          inpB += this->vecDim;
+          outp ++;
+       }
+
+        ASSERT_REL_ERROR(output,ref,REL_ERROR);
+    } 
+
+    void DistanceTestsF16::test_euclidean_distance_f16()
+    {
+       const float16_t *inpA = inputA.ptr();
+       const float16_t *inpB = inputB.ptr();
+
+       float16_t *outp = output.ptr();
+       
+       for(int i=0; i < this->nbPatterns ; i ++)
+       {
+          *outp = arm_euclidean_distance_f16(inpA, inpB, this->vecDim);
+         
+          inpA += this->vecDim;
+          inpB += this->vecDim;
+          outp ++;
+       }
+
+        ASSERT_REL_ERROR(output,ref,REL_ERROR);
+    } 
+
+    void DistanceTestsF16::test_jensenshannon_distance_f16()
+    {
+       const float16_t *inpA = inputA.ptr();
+       const float16_t *inpB = inputB.ptr();
+
+       float16_t *outp = output.ptr();
+
+      
+       
+       for(int i=0; i < this->nbPatterns ; i ++)
+       {
+          *outp = arm_jensenshannon_distance_f16(inpA, inpB, this->vecDim);
+         
+          inpA += this->vecDim;
+          inpB += this->vecDim;
+          outp ++;
+       }
+
+        ASSERT_REL_ERROR(output,ref,REL_JS_ERROR);
+    } 
+
+    void DistanceTestsF16::test_minkowski_distance_f16()
+    {
+       const float16_t *inpA = inputA.ptr();
+       const float16_t *inpB = inputB.ptr();
+       const int16_t   *dimsp= dims.ptr();
+       dimsp += 2;
+
+       float16_t *outp = output.ptr();
+       
+       for(int i=0; i < this->nbPatterns ; i ++)
+       {
+          *outp = arm_minkowski_distance_f16(inpA, inpB, *dimsp,this->vecDim);
+         
+          inpA += this->vecDim;
+          inpB += this->vecDim;
+          outp ++;
+          dimsp ++;
+       }
+
+        ASSERT_REL_ERROR(output,ref,REL_MK_ERROR);
+    } 
+  
+  
+    void DistanceTestsF16::setUp(Testing::testID_t id,std::vector<Testing::param_t>& paramsArgs,Client::PatternMgr *mgr)
+    {
+
+        (void)paramsArgs;
+        if ((id != DistanceTestsF16::TEST_MINKOWSKI_DISTANCE_F16_9) && (id != DistanceTestsF16::TEST_JENSENSHANNON_DISTANCE_F16_8))
+        {
+            inputA.reload(DistanceTestsF16::INPUTA_F16_ID,mgr);
+            inputB.reload(DistanceTestsF16::INPUTB_F16_ID,mgr);
+            dims.reload(DistanceTestsF16::DIMS_S16_ID,mgr);
+            
+            const int16_t   *dimsp = dims.ptr();
+            
+            this->nbPatterns=dimsp[0];
+            this->vecDim=dimsp[1];
+            output.create(this->nbPatterns,DistanceTestsF16::OUT_F16_ID,mgr);
+        }
+
+        switch(id)
+        {
+            case DistanceTestsF16::TEST_BRAYCURTIS_DISTANCE_F16_1:
+            {
+              ref.reload(DistanceTestsF16::REF1_F16_ID,mgr);
+            }
+            break;
+
+            case DistanceTestsF16::TEST_CANBERRA_DISTANCE_F16_2:
+            {
+              ref.reload(DistanceTestsF16::REF2_F16_ID,mgr);
+            }
+            break;
+
+            case DistanceTestsF16::TEST_CHEBYSHEV_DISTANCE_F16_3:
+            {
+              ref.reload(DistanceTestsF16::REF3_F16_ID,mgr);
+            }
+            break;
+
+            case DistanceTestsF16::TEST_CITYBLOCK_DISTANCE_F16_4:
+            {
+              ref.reload(DistanceTestsF16::REF4_F16_ID,mgr);
+            }
+            break;
+
+            case DistanceTestsF16::TEST_CORRELATION_DISTANCE_F16_5:
+            {
+              ref.reload(DistanceTestsF16::REF5_F16_ID,mgr);
+              tmpA.create(this->vecDim,DistanceTestsF16::TMPA_F16_ID,mgr);
+              tmpB.create(this->vecDim,DistanceTestsF16::TMPB_F16_ID,mgr);
+            }
+            break;
+
+            case DistanceTestsF16::TEST_COSINE_DISTANCE_F16_6:
+            {
+              ref.reload(DistanceTestsF16::REF6_F16_ID,mgr);
+            }
+            break;
+
+            case DistanceTestsF16::TEST_EUCLIDEAN_DISTANCE_F16_7:
+            {
+              ref.reload(DistanceTestsF16::REF7_F16_ID,mgr);
+            }
+            break;
+
+            case DistanceTestsF16::TEST_JENSENSHANNON_DISTANCE_F16_8:
+            {
+              inputA.reload(DistanceTestsF16::INPUTA_JEN_F16_ID,mgr);
+              inputB.reload(DistanceTestsF16::INPUTB_JEN_F16_ID,mgr);
+              dims.reload(DistanceTestsF16::DIMS_S16_ID,mgr);
+              
+              const int16_t   *dimsp = dims.ptr();
+              
+              this->nbPatterns=dimsp[0];
+              this->vecDim=dimsp[1];
+              output.create(this->nbPatterns,DistanceTestsF16::OUT_F16_ID,mgr);
+
+              ref.reload(DistanceTestsF16::REF8_F16_ID,mgr);
+            }
+            break;
+
+            case DistanceTestsF16::TEST_MINKOWSKI_DISTANCE_F16_9:
+            {
+              inputA.reload(DistanceTestsF16::INPUTA_F16_ID,mgr);
+              inputB.reload(DistanceTestsF16::INPUTB_F16_ID,mgr);
+              dims.reload(DistanceTestsF16::DIMS_MINKOWSKI_S16_ID,mgr);
+              
+              const int16_t   *dimsp = dims.ptr();
+              
+              this->nbPatterns=dimsp[0];
+              this->vecDim=dimsp[1];
+              output.create(this->nbPatterns,DistanceTestsF16::OUT_F16_ID,mgr);
+
+              ref.reload(DistanceTestsF16::REF9_F16_ID,mgr);
+            }
+            break;
+
+        }
+
+       
+
+       
+
+    }
+
+    void DistanceTestsF16::tearDown(Testing::testID_t id,Client::PatternMgr *mgr)
+    {
+       (void)id;
+       output.dump(mgr);
+    }
diff --git a/Testing/desc_f16.txt b/Testing/desc_f16.txt
index 21635bf4..7ed73db5 100755
--- a/Testing/desc_f16.txt
+++ b/Testing/desc_f16.txt
@@ -382,6 +382,51 @@ group Root {
           }
         }
 
+        group Distance Tests {
+           class = DistanceTests
+           folder = Distance
+
+           suite Distance Tests F16 {
+              class = DistanceTestsF16
+              folder = DistanceF16
+
+              Pattern DIMS_S16_ID : Dims1_s16.txt 
+              Pattern DIMS_MINKOWSKI_S16_ID : Dims9_s16.txt 
+
+              Pattern INPUTA_F16_ID : InputA1_f16.txt
+              Pattern INPUTB_F16_ID : InputB1_f16.txt
+
+              Pattern INPUTA_JEN_F16_ID : InputA8_f16.txt
+              Pattern INPUTB_JEN_F16_ID : InputB8_f16.txt
+
+              Pattern REF1_F16_ID : Ref1_f16.txt
+              Pattern REF2_F16_ID : Ref2_f16.txt
+              Pattern REF3_F16_ID : Ref3_f16.txt
+              Pattern REF4_F16_ID : Ref4_f16.txt
+              Pattern REF5_F16_ID : Ref5_f16.txt
+              Pattern REF6_F16_ID : Ref6_f16.txt
+              Pattern REF7_F16_ID : Ref7_f16.txt
+              Pattern REF8_F16_ID : Ref8_f16.txt
+              Pattern REF9_F16_ID : Ref9_f16.txt
+
+              Output  OUT_F16_ID : Output
+              Output  TMPA_F16_ID : TmpA
+              Output  TMPB_F16_ID : TmpB
+
+              Functions {
+                 arm_braycurtis_distance_f16:test_braycurtis_distance_f16
+                 arm_canberra_distance_f16:test_canberra_distance_f16
+                 arm_chebyshev_distance_f16:test_chebyshev_distance_f16
+                 arm_cityblock_distance_f16:test_cityblock_distance_f16
+                 arm_correlation_distance_f16:test_correlation_distance_f16
+                 arm_cosine_distance_f16:test_cosine_distance_f16
+                 arm_euclidean_distance_f16:test_euclidean_distance_f16
+                 arm_jensenshannon_distance_f16:test_jensenshannon_distance_f16
+                 arm_minkowski_distance_f16:test_minkowski_distance_f16
+              }
+           }
+        }
+
         group Filtering Tests {
            class = FilteringTests
            folder = Filtering