1 files changed, 274 insertions, 0 deletions
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f16.c
new file mode 100644
index 0000000..4241eb5
--- /dev/null
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f16.c
@@ -0,0 +1,274 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_absmax_f16.c
+ * Description:  Maximum value of a absolute values of a floating-point vector
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/statistics_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
+#include <limits.h>
+#endif
+
+/**
+  @ingroup groupStats
+ */
+
+
+/**
+  @addtogroup AbsMax
+  @{
+ */
+
+/**
+  @brief         Maximum value of absolute values of a floating-point vector.
+  @param[in]     pSrc       points to the input vector
+  @param[in]     blockSize  number of samples in input vector
+  @param[out]    pResult    maximum value returned here
+  @param[out]    pIndex     index of maximum value returned here
+  @return        none
+ */
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+void arm_absmax_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex)
+{
+      uint16_t        blkCnt;           /* loop counters */
+    f16x8_t       vecSrc;
+    float16_t const *pSrcVec;
+    f16x8_t       curExtremValVec = vdupq_n_f16(F16_ABSMIN);
+    float16_t       maxValue = F16_ABSMIN;
+    uint16_t        idx = blockSize;
+    uint16x8_t    indexVec;
+    uint16x8_t    curExtremIdxVec;
+    mve_pred16_t    p0;
+
+
+    indexVec = vidupq_u16((uint32_t)0, 1);
+    curExtremIdxVec = vdupq_n_u16(0);
+
+    pSrcVec = (float16_t const *) pSrc;
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U)
+    {
+        vecSrc = vldrhq_f16(pSrcVec);  
+        pSrcVec += 8;
+        vecSrc = vabsq(vecSrc);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+        p0 = vcmpgeq(vecSrc, curExtremValVec);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+
+        indexVec = indexVec +  8;
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U)
+    {
+        vecSrc = vldrhq_f16(pSrcVec);  
+        pSrcVec += 8;
+        vecSrc = vabsq(vecSrc);
+
+        p0 = vctp16q(blkCnt);
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+        p0 = vcmpgeq_m(vecSrc, curExtremValVec, p0);
+        curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
+        curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
+    }
+    /*
+     * Get max value across the vector
+     */
+    maxValue = vmaxnmvq(maxValue, curExtremValVec);
+    /*
+     * set index for lower values to max possible index
+     */
+    p0 = vcmpgeq(curExtremValVec, maxValue);
+    indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
+    /*
+     * Get min index which is thus for a max value
+     */
+    idx = vminvq(idx, indexVec);
+    /*
+     * Save result
+     */
+    *pIndex = idx;
+    *pResult = maxValue;
+}
+#else
+#if defined(ARM_MATH_LOOPUNROLL)
+void arm_absmax_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex)
+{
+        float16_t cur_absmax, out;                     /* Temporary variables to store the output value. */\
+        uint32_t blkCnt, outIndex;                     /* Loop counter */                                   \
+        uint32_t index;                                /* index of maximum value */                         \
+                                                                                                            \
+  /* Initialize index value to zero. */                                                                     \
+  outIndex = 0U;                                                                                            \
+  /* Load first input value that act as reference value for comparision */                                  \
+  out = *pSrc++;                                                                                            \
+  out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out;                                                                             \
+  /* Initialize index of extrema value. */                                                                  \
+  index = 0U;                                                                                               \
+                                                                                                            \
+  /* Loop unrolling: Compute 4 outputs at a time */                                                         \
+  blkCnt = (blockSize - 1U) >> 2U;                                                                          \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    /* Initialize cur_absmax to next consecutive values one by one */                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    /* compare for the extrema value */                                                                     \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                         \
+    {                                                                                                       \
+      /* Update the extrema value and it's index */                                                         \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 1U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 2U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 3U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                          \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = index + 4U;                                                                                \
+    }                                                                                                       \
+                                                                                                            \
+    index += 4U;                                                                                            \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Loop unrolling: Compute remaining outputs */                                                           \
+  blkCnt = (blockSize - 1U) % 4U;                                                                           \
+                                                                                                            \
+                                                                                                            \
+  while (blkCnt > 0U)                                                                                       \
+  {                                                                                                         \
+    cur_absmax = *pSrc++;                                                                                     \
+    cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax;                                                                 \
+    if ((_Float16)cur_absmax > (_Float16)out)                                                                         \
+    {                                                                                                       \
+      out = cur_absmax;                                                                                       \
+      outIndex = blockSize - blkCnt;                                                                        \
+    }                                                                                                       \
+                                                                                                            \
+    /* Decrement loop counter */                                                                            \
+    blkCnt--;                                                                                               \
+  }                                                                                                         \
+                                                                                                            \
+  /* Store the extrema value and it's index into destination pointers */                                    \
+  *pResult = out;                                                                                           \
+  *pIndex = outIndex;  
+}
+#else
+void arm_absmax_f16(
+  const float16_t * pSrc,
+        uint32_t blockSize,
+        float16_t * pResult,
+        uint32_t * pIndex)
+{
+        float16_t maxVal, out;                         /* Temporary variables to store the output value. */
+        uint32_t blkCnt, outIndex;                     /* Loop counter */
+
+  /* Initialise index value to zero. */
+  outIndex = 0U;
+
+  /* Load first input value that act as reference value for comparision */
+  out = (_Float16)fabsf((float32_t)*pSrc++);
+
+  /* Initialize blkCnt with number of samples */
+  blkCnt = (blockSize - 1U);
+
+  while (blkCnt > 0U)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal = (_Float16)fabsf((float32_t)*pSrc++);
+
+    /* compare for the maximum value */
+    if ((_Float16)out < (_Float16)maxVal)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement loop counter */
+    blkCnt--;
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+#endif /* defined(ARM_MATH_LOOPUNROLL) */
+#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
+/**
+  @} end of AbsMax group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+