summaryrefslogtreecommitdiffstats
path: root/Drivers/CMSIS/DSP/Source/StatisticsFunctions
diff options
context:
space:
mode:
Diffstat (limited to 'Drivers/CMSIS/DSP/Source/StatisticsFunctions')
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/CMakeLists.txt133
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c153
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f32.c636
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q15.c349
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q31.c350
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q7.c404
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f32.c364
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q15.c270
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q31.c259
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q7.c265
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f32.c631
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q15.c352
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q31.c352
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q7.c433
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f32.c404
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q15.c309
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q31.c286
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q7.c316
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f32.c368
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q15.c283
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q31.c265
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f32.c271
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q15.c334
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q31.c306
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f32.c527
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q15.c394
-rw-r--r--Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q31.c361
27 files changed, 4040 insertions, 5335 deletions
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/CMakeLists.txt b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/CMakeLists.txt
index 480985f..1a56e61 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/CMakeLists.txt
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/CMakeLists.txt
@@ -1,117 +1,16 @@
-cmake_minimum_required (VERSION 3.14)
-
-project(CMSISDSPStatistics)
-
-include(configLib)
-include(configDsp)
-
-
-
-add_library(CMSISDSPStatistics STATIC)
-
-target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f64.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f64.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_dot_prod_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_max_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_max_f64.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f64.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_f64.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_max_q15.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_max_q31.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_max_q7.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_q31.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_q15.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_q7.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_mean_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_mean_f64.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_mean_q15.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_mean_q31.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_mean_q7.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_min_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_min_f64.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_min_q15.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_min_q31.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_min_q7.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_q31.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_q15.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_q7.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_power_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_power_f64.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_power_q15.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_power_q31.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_power_q7.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_rms_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_rms_q15.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_rms_q31.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_std_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_std_f64.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_std_q15.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_std_q31.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_var_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_var_f64.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_var_q15.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_var_q31.c)
-
-target_sources(CMSISDSPStatistics PRIVATE arm_absmax_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmax_f64.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmax_q15.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmax_q31.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmax_q7.c)
-
-target_sources(CMSISDSPStatistics PRIVATE arm_absmin_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmin_f64.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmin_q15.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmin_q31.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmin_q7.c)
-
-target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_f64.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_q15.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_q31.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_q7.c)
-
-target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_f64.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q15.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q31.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q7.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_mse_q7.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_mse_q15.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_mse_q31.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_mse_f16.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_mse_f32.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_mse_f64.c)
-
-configLib(CMSISDSPStatistics ${ROOT})
-configDsp(CMSISDSPStatistics ${ROOT})
-
-### Includes
-target_include_directories(CMSISDSPStatistics PUBLIC "${DSP}/Include")
-
-
-
-if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
-target_sources(CMSISDSPStatistics PRIVATE arm_max_f16.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_min_f16.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_mean_f16.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_power_f16.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_rms_f16.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_std_f16.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_var_f16.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f16.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f16.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_dot_prod_f16.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_f16.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f16.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_f16.c)
-
-target_sources(CMSISDSPStatistics PRIVATE arm_absmax_f16.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmin_f16.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_f16.c)
-target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_f16.c)
-endif() \ No newline at end of file
+cmake_minimum_required (VERSION 3.6)
+
+project(CMSISDSPStatistics)
+
+
+file(GLOB SRC "./*_*.c")
+
+add_library(CMSISDSPStatistics STATIC ${SRC})
+
+configdsp(CMSISDSPStatistics ..)
+
+### Includes
+target_include_directories(CMSISDSPStatistics PUBLIC "${DSP}/../../Include")
+
+
+
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c
index 5446992..ae74d02 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c
@@ -1,100 +1,53 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: StatisticsFunctions.c
- * Description: Combination of all statistics function source files.
- *
- * $Date: 16. March 2020
- * $Revision: V1.1.0
- *
- * Target Processor: Cortex-M cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "arm_entropy_f32.c"
-#include "arm_entropy_f64.c"
-#include "arm_kullback_leibler_f32.c"
-#include "arm_kullback_leibler_f64.c"
-#include "arm_logsumexp_dot_prod_f32.c"
-#include "arm_logsumexp_f32.c"
-#include "arm_max_f32.c"
-#include "arm_max_f64.c"
-#include "arm_max_q15.c"
-#include "arm_max_q31.c"
-#include "arm_max_q7.c"
-#include "arm_max_no_idx_f32.c"
-#include "arm_max_no_idx_f64.c"
-#include "arm_max_no_idx_q31.c"
-#include "arm_max_no_idx_q15.c"
-#include "arm_max_no_idx_q7.c"
-#include "arm_mean_f32.c"
-#include "arm_mean_f64.c"
-#include "arm_mean_q15.c"
-#include "arm_mean_q31.c"
-#include "arm_mean_q7.c"
-#include "arm_min_f32.c"
-#include "arm_min_f64.c"
-#include "arm_min_q15.c"
-#include "arm_min_q31.c"
-#include "arm_min_q7.c"
-#include "arm_min_no_idx_f32.c"
-#include "arm_min_no_idx_f64.c"
-#include "arm_min_no_idx_q31.c"
-#include "arm_min_no_idx_q15.c"
-#include "arm_min_no_idx_q7.c"
-#include "arm_power_f32.c"
-#include "arm_power_f64.c"
-#include "arm_power_q15.c"
-#include "arm_power_q31.c"
-#include "arm_power_q7.c"
-#include "arm_rms_f32.c"
-#include "arm_rms_q15.c"
-#include "arm_rms_q31.c"
-#include "arm_std_f32.c"
-#include "arm_std_f64.c"
-#include "arm_std_q15.c"
-#include "arm_std_q31.c"
-#include "arm_var_f32.c"
-#include "arm_var_f64.c"
-#include "arm_var_q15.c"
-#include "arm_var_q31.c"
-#include "arm_absmax_f32.c"
-#include "arm_absmax_f64.c"
-#include "arm_absmax_q15.c"
-#include "arm_absmax_q31.c"
-#include "arm_absmax_q7.c"
-#include "arm_absmin_f32.c"
-#include "arm_absmin_f64.c"
-#include "arm_absmin_q15.c"
-#include "arm_absmin_q31.c"
-#include "arm_absmin_q7.c"
-#include "arm_absmax_no_idx_f32.c"
-#include "arm_absmax_no_idx_f64.c"
-#include "arm_absmax_no_idx_q15.c"
-#include "arm_absmax_no_idx_q31.c"
-#include "arm_absmax_no_idx_q7.c"
-#include "arm_absmin_no_idx_f32.c"
-#include "arm_absmin_no_idx_f64.c"
-#include "arm_absmin_no_idx_q15.c"
-#include "arm_absmin_no_idx_q31.c"
-#include "arm_absmin_no_idx_q7.c"
-#include "arm_mse_q7.c"
-#include "arm_mse_q15.c"
-#include "arm_mse_q31.c"
-#include "arm_mse_f32.c"
-#include "arm_mse_f64.c"
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: StatisticsFunctions.c
+ * Description: Combination of all statistics function source files.
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.0.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_max_f32.c"
+#include "arm_max_q15.c"
+#include "arm_max_q31.c"
+#include "arm_max_q7.c"
+#include "arm_mean_f32.c"
+#include "arm_mean_q15.c"
+#include "arm_mean_q31.c"
+#include "arm_mean_q7.c"
+#include "arm_min_f32.c"
+#include "arm_min_q15.c"
+#include "arm_min_q31.c"
+#include "arm_min_q7.c"
+#include "arm_power_f32.c"
+#include "arm_power_q15.c"
+#include "arm_power_q31.c"
+#include "arm_power_q7.c"
+#include "arm_rms_f32.c"
+#include "arm_rms_q15.c"
+#include "arm_rms_q31.c"
+#include "arm_std_f32.c"
+#include "arm_std_q15.c"
+#include "arm_std_q31.c"
+#include "arm_var_f32.c"
+#include "arm_var_q15.c"
+#include "arm_var_q31.c"
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f32.c
index d82b039..f6ffafe 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f32.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f32.c
@@ -1,365 +1,271 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_max_f32.c
- * Description: Maximum value of a floating-point vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
-#include <limits.h>
-#endif
-
-/**
- @ingroup groupStats
- */
-
-/**
- @defgroup Max Maximum
-
- Computes the maximum value of an array of data.
- The function returns both the maximum value and its position within the array.
- There are separate functions for floating-point, Q31, Q15, and Q7 data types.
- */
-
-/**
- @addtogroup Max
- @{
- */
-
-/**
- @brief Maximum value of a floating-point vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult maximum value returned here
- @param[out] pIndex index of maximum value returned here
- @return none
- */
-
-#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_max_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult,
- uint32_t * pIndex)
-{
- uint32_t blkCnt;
- f32x4_t vecSrc;
- f32x4_t curExtremValVec = vdupq_n_f32(F32_MIN);
- float32_t maxValue = F32_MIN;
- uint32_t idx = blockSize;
- uint32x4_t indexVec;
- uint32x4_t curExtremIdxVec;
- uint32_t curIdx = 0;
- mve_pred16_t p0;
- float32_t tmp;
-
-
- indexVec = vidupq_wb_u32(&curIdx, 1);
- curExtremIdxVec = vdupq_n_u32(0);
-
- /* Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
- while (blkCnt > 0U)
- {
- vecSrc = vldrwq_f32(pSrc);
- /*
- * Get current max per lane and current index per lane
- * when a max is selected
- */
- p0 = vcmpgeq(vecSrc, curExtremValVec);
- curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
- curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
-
- indexVec = vidupq_wb_u32(&curIdx, 1);
-
- pSrc += 4;
- /* Decrement the loop counter */
- blkCnt--;
- }
-
-
- /*
- * Get max value across the vector
- */
- maxValue = vmaxnmvq(maxValue, curExtremValVec);
- /*
- * set index for lower values to max possible index
- */
- p0 = vcmpgeq(curExtremValVec, maxValue);
- indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
- /*
- * Get min index which is thus for a max value
- */
- idx = vminvq(idx, indexVec);
-
- /* Tail */
- blkCnt = blockSize & 0x3;
-
- while (blkCnt > 0U)
- {
- /* Initialize tmp to the next consecutive values one by one */
- tmp = *pSrc++;
-
- /* compare for the maximum value */
- if (maxValue < tmp)
- {
- /* Update the maximum value and it's index */
- maxValue = tmp;
- idx = blockSize - blkCnt;
- }
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /*
- * Save result
- */
- *pIndex = idx;
- *pResult = maxValue;
-}
-
-#else
-#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_max_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult,
- uint32_t * pIndex)
-{
- float32_t maxVal1, out; /* Temporary variables to store the output value. */
- uint32_t blkCnt, outIndex; /* loop counter */
-
- float32x4_t outV, srcV;
- float32x2_t outV2;
-
- uint32x4_t idxV;
- uint32x4_t maxIdx;
- static const uint32_t indexInit[4]={4,5,6,7};
- static const uint32_t countVInit[4]={0,1,2,3};
-
- uint32x4_t index;
- uint32x4_t delta;
- uint32x4_t countV;
- uint32x2_t countV2;
-
- maxIdx = vdupq_n_u32(ULONG_MAX);
- delta = vdupq_n_u32(4);
- index = vld1q_u32(indexInit);
- countV = vld1q_u32(countVInit);
-
-
- /* Initialise the index value to zero. */
- outIndex = 0U;
-
- /* Load first input value that act as reference value for comparison */
- if (blockSize <= 3)
- {
- out = *pSrc++;
-
- blkCnt = blockSize - 1;
-
- while (blkCnt > 0U)
- {
- /* Initialize maxVal to the next consecutive values one by one */
- maxVal1 = *pSrc++;
-
- /* compare for the maximum value */
- if (out < maxVal1)
- {
- /* Update the maximum value and it's index */
- out = maxVal1;
- outIndex = blockSize - blkCnt;
- }
-
- /* Decrement the loop counter */
- blkCnt--;
- }
- }
- else
- {
- outV = vld1q_f32(pSrc);
- pSrc += 4;
-
- /* Compute 4 outputs at a time */
- blkCnt = (blockSize - 4 ) >> 2U;
-
- while (blkCnt > 0U)
- {
- srcV = vld1q_f32(pSrc);
- pSrc += 4;
-
- idxV = vcgtq_f32(srcV, outV);
- outV = vbslq_f32(idxV, srcV, outV );
- countV = vbslq_u32(idxV, index,countV );
-
- index = vaddq_u32(index,delta);
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- outV2 = vpmax_f32(vget_low_f32(outV),vget_high_f32(outV));
- outV2 = vpmax_f32(outV2,outV2);
- out = vget_lane_f32(outV2, 0);
-
- idxV = vceqq_f32(outV, vdupq_n_f32(out));
- countV = vbslq_u32(idxV, countV,maxIdx);
-
- countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
- countV2 = vpmin_u32(countV2,countV2);
- outIndex = vget_lane_u32(countV2,0);
-
- /* if (blockSize - 1U) is not multiple of 4 */
- blkCnt = (blockSize - 4 ) % 4U;
-
- while (blkCnt > 0U)
- {
- /* Initialize maxVal to the next consecutive values one by one */
- maxVal1 = *pSrc++;
-
- /* compare for the maximum value */
- if (out < maxVal1)
- {
- /* Update the maximum value and it's index */
- out = maxVal1;
- outIndex = blockSize - blkCnt ;
- }
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
-
- }
-
- /* Store the maximum value and it's index into destination pointers */
- *pResult = out;
- *pIndex = outIndex;
-}
-#else
-void arm_max_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult,
- uint32_t * pIndex)
-{
- float32_t maxVal, out; /* Temporary variables to store the output value. */
- uint32_t blkCnt, outIndex; /* Loop counter */
-
-#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
- uint32_t index; /* index of maximum value */
-#endif
-
- /* Initialise index value to zero. */
- outIndex = 0U;
-
- /* Load first input value that act as reference value for comparision */
- out = *pSrc++;
-
-#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
- /* Initialise index of maximum value. */
- index = 0U;
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = (blockSize - 1U) >> 2U;
-
- while (blkCnt > 0U)
- {
- /* Initialize maxVal to next consecutive values one by one */
- maxVal = *pSrc++;
-
- /* compare for the maximum value */
- if (out < maxVal)
- {
- /* Update the maximum value and it's index */
- out = maxVal;
- outIndex = index + 1U;
- }
-
- maxVal = *pSrc++;
- if (out < maxVal)
- {
- out = maxVal;
- outIndex = index + 2U;
- }
-
- maxVal = *pSrc++;
- if (out < maxVal)
- {
- out = maxVal;
- outIndex = index + 3U;
- }
-
- maxVal = *pSrc++;
- if (out < maxVal)
- {
- out = maxVal;
- outIndex = index + 4U;
- }
-
- index += 4U;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = (blockSize - 1U) % 4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = (blockSize - 1U);
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* Initialize maxVal to the next consecutive values one by one */
- maxVal = *pSrc++;
-
- /* compare for the maximum value */
- if (out < maxVal)
- {
- /* Update the maximum value and it's index */
- out = maxVal;
- outIndex = blockSize - blkCnt;
- }
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Store the maximum value and it's index into destination pointers */
- *pResult = out;
- *pIndex = outIndex;
-}
-#endif /* #if defined(ARM_MATH_NEON) */
-#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
-
-/**
- @} end of Max group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_max_f32.c
+ * Description: Maximum value of a floating-point vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#if defined(ARM_MATH_NEON)
+#include <limits.h>
+#endif
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @defgroup Max Maximum
+
+ Computes the maximum value of an array of data.
+ The function returns both the maximum value and its position within the array.
+ There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ @addtogroup Max
+ @{
+ */
+
+/**
+ @brief Maximum value of a floating-point vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult maximum value returned here
+ @param[out] pIndex index of maximum value returned here
+ @return none
+ */
+#if defined(ARM_MATH_NEON)
+void arm_max_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult,
+ uint32_t * pIndex)
+{
+ float32_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ float32x4_t outV, srcV;
+ float32x2_t outV2;
+
+ uint32x4_t idxV;
+ uint32x4_t maxIdx={ULONG_MAX,ULONG_MAX,ULONG_MAX,ULONG_MAX};
+ uint32x4_t index={4,5,6,7};
+ uint32x4_t delta={4,4,4,4};
+ uint32x4_t countV={0,1,2,3};
+ uint32x2_t countV2;
+
+ /* Initialise the count value. */
+ count = 0U;
+
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+
+ /* Load first input value that act as reference value for comparison */
+ if (blockSize <= 3)
+ {
+ out = *pSrc++;
+
+ blkCnt = blockSize - 1;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal1)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+ }
+ else
+ {
+ outV = vld1q_f32(pSrc);
+ pSrc += 4;
+
+ /* Compute 4 outputs at a time */
+ blkCnt = (blockSize - 4 ) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ srcV = vld1q_f32(pSrc);
+ pSrc += 4;
+
+ idxV = vcgtq_f32(srcV, outV);
+ outV = vbslq_f32(idxV, srcV, outV );
+ countV = vbslq_u32(idxV, index,countV );
+
+ index = vaddq_u32(index,delta);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ outV2 = vpmax_f32(vget_low_f32(outV),vget_high_f32(outV));
+ outV2 = vpmax_f32(outV2,outV2);
+ out = outV2[0];
+
+ idxV = vceqq_f32(outV, vdupq_n_f32(out));
+ countV = vbslq_u32(idxV, countV,maxIdx);
+
+ countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
+ countV2 = vpmin_u32(countV2,countV2);
+ outIndex = countV2[0];
+
+ /* if (blockSize - 1U) is not multiple of 4 */
+ blkCnt = (blockSize - 4 ) % 4U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal1)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal1;
+ outIndex = blockSize - blkCnt ;
+ }
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+
+ }
+
+ /* Store the maximum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+#else
+void arm_max_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult,
+ uint32_t * pIndex)
+{
+ float32_t maxVal, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ uint32_t index; /* index of maximum value */
+#endif
+
+ /* Initialise index value to zero. */
+ outIndex = 0U;
+
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ /* Initialise index of maximum value. */
+ index = 0U;
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to next consecutive values one by one */
+ maxVal = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal;
+ outIndex = index + 1U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 2U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 3U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 4U;
+ }
+
+ index += 4U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store the maximum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+#endif /* #if defined(ARM_MATH_NEON) */
+/**
+ @} end of Max group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q15.c
index 5715e37..dbead54 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q15.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q15.c
@@ -1,201 +1,148 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_max_q15.c
- * Description: Maximum value of a Q15 vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @addtogroup Max
- @{
- */
-
-/**
- @brief Maximum value of a Q15 vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult maximum value returned here
- @param[out] pIndex index of maximum value returned here
- @return none
- */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#include "arm_helium_utils.h"
-
-void arm_max_q15(
- const q15_t * pSrc,
- uint32_t blockSize,
- q15_t * pResult,
- uint32_t * pIndex)
-{
- int32_t blkCnt; /* loop counters */
- q15x8_t extremValVec = vdupq_n_s16(Q15_MIN);
- q15_t maxValue = Q15_MIN;
- uint16x8_t indexVec;
- uint16x8_t extremIdxVec;
- mve_pred16_t p0;
- uint16_t extremIdxArr[8];
-
- indexVec = vidupq_u16(0U, 1);
-
- blkCnt = blockSize;
- do {
- mve_pred16_t p = vctp16q(blkCnt);
- q15x8_t extremIdxVal = vld1q_z_s16(pSrc, p);
- /*
- * Get current max per lane and current index per lane
- * when a max is selected
- */
- p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
-
- extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
- /* store per-lane extrema indexes */
- vst1q_p_u16(extremIdxArr, indexVec, p0);
-
- indexVec += 8;
- pSrc += 8;
- blkCnt -= 8;
- }
- while (blkCnt > 0);
-
-
- /* Get max value across the vector */
- maxValue = vmaxvq(maxValue, extremValVec);
-
- /* set index for lower values to max possible index */
- p0 = vcmpgeq(extremValVec, maxValue);
- extremIdxVec = vld1q_u16(extremIdxArr);
-
- indexVec = vpselq(extremIdxVec, vdupq_n_u16(blockSize - 1), p0);
- *pIndex = vminvq(blockSize - 1, indexVec);
- *pResult = maxValue;
-}
-
-#else
-void arm_max_q15(
- const q15_t * pSrc,
- uint32_t blockSize,
- q15_t * pResult,
- uint32_t * pIndex)
-{
- q15_t maxVal, out; /* Temporary variables to store the output value. */
- uint32_t blkCnt, outIndex; /* Loop counter */
-
-#if defined (ARM_MATH_LOOPUNROLL)
- uint32_t index; /* index of maximum value */
-#endif
-
- /* Initialise index value to zero. */
- outIndex = 0U;
- /* Load first input value that act as reference value for comparision */
- out = *pSrc++;
-
-#if defined (ARM_MATH_LOOPUNROLL)
- /* Initialise index of maximum value. */
- index = 0U;
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = (blockSize - 1U) >> 2U;
-
- while (blkCnt > 0U)
- {
- /* Initialize maxVal to next consecutive values one by one */
- maxVal = *pSrc++;
-
- /* compare for the maximum value */
- if (out < maxVal)
- {
- /* Update the maximum value and it's index */
- out = maxVal;
- outIndex = index + 1U;
- }
-
- maxVal = *pSrc++;
- if (out < maxVal)
- {
- out = maxVal;
- outIndex = index + 2U;
- }
-
- maxVal = *pSrc++;
- if (out < maxVal)
- {
- out = maxVal;
- outIndex = index + 3U;
- }
-
- maxVal = *pSrc++;
- if (out < maxVal)
- {
- out = maxVal;
- outIndex = index + 4U;
- }
-
- index += 4U;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = (blockSize - 1U) % 4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = (blockSize - 1U);
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* Initialize maxVal to the next consecutive values one by one */
- maxVal = *pSrc++;
-
- /* compare for the maximum value */
- if (out < maxVal)
- {
- /* Update the maximum value and it's index */
- out = maxVal;
- outIndex = blockSize - blkCnt;
- }
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Store the maximum value and it's index into destination pointers */
- *pResult = out;
- *pIndex = outIndex;
-}
-#endif /* defined(ARM_MATH_MVEI) */
-/**
- @} end of Max group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_max_q15.c
+ * Description: Maximum value of a Q15 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup Max
+ @{
+ */
+
+/**
+ @brief Maximum value of a Q15 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult maximum value returned here
+ @param[out] pIndex index of maximum value returned here
+ @return none
+ */
+
+void arm_max_q15(
+ const q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult,
+ uint32_t * pIndex)
+{
+ q15_t maxVal, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ uint32_t index; /* index of maximum value */
+#endif
+
+ /* Initialise index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ /* Initialise index of maximum value. */
+ index = 0U;
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to next consecutive values one by one */
+ maxVal = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal;
+ outIndex = index + 1U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 2U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 3U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 4U;
+ }
+
+ index += 4U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store the maximum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ @} end of Max group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q31.c
index fed900b..5c6badf 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q31.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q31.c
@@ -1,202 +1,148 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_max_q31.c
- * Description: Maximum value of a Q31 vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @addtogroup Max
- @{
- */
-
-/**
- @brief Maximum value of a Q31 vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult maximum value returned here
- @param[out] pIndex index of maximum value returned here
- @return none
- */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#include "arm_helium_utils.h"
-
-void arm_max_q31(
- const q31_t * pSrc,
- uint32_t blockSize,
- q31_t * pResult,
- uint32_t * pIndex)
-{
- int32_t blkCnt; /* loop counters */
- q31x4_t extremValVec = vdupq_n_s32(Q31_MIN);
- q31_t maxValue = Q31_MIN;
- uint32x4_t indexVec;
- uint32x4_t extremIdxVec;
- mve_pred16_t p0;
- uint32_t extremIdxArr[4];
-
- indexVec = vidupq_u32(0U, 1);
-
- blkCnt = blockSize;
- do {
- mve_pred16_t p = vctp32q(blkCnt);
- q31x4_t extremIdxVal = vld1q_z_s32(pSrc, p);
- /*
- * Get current max per lane and current index per lane
- * when a max is selected
- */
- p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
-
- extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
- /* store per-lane extrema indexes */
- vst1q_p_u32(extremIdxArr, indexVec, p0);
-
- indexVec += 4;
- pSrc += 4;
- blkCnt -= 4;
- }
- while (blkCnt > 0);
-
-
- /* Get max value across the vector */
- maxValue = vmaxvq(maxValue, extremValVec);
-
- /* set index for lower values to max possible index */
- p0 = vcmpgeq(extremValVec, maxValue);
- extremIdxVec = vld1q_u32(extremIdxArr);
-
- indexVec = vpselq(extremIdxVec, vdupq_n_u32(blockSize - 1), p0);
- *pIndex = vminvq(blockSize - 1, indexVec);
- *pResult = maxValue;
-}
-
-#else
-void arm_max_q31(
- const q31_t * pSrc,
- uint32_t blockSize,
- q31_t * pResult,
- uint32_t * pIndex)
-{
- q31_t maxVal, out; /* Temporary variables to store the output value. */
- uint32_t blkCnt, outIndex; /* Loop counter */
-
-#if defined (ARM_MATH_LOOPUNROLL)
- uint32_t index; /* index of maximum value */
-#endif
-
- /* Initialise index value to zero. */
- outIndex = 0U;
- /* Load first input value that act as reference value for comparision */
- out = *pSrc++;
-
-#if defined (ARM_MATH_LOOPUNROLL)
- /* Initialise index of maximum value. */
- index = 0U;
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = (blockSize - 1U) >> 2U;
-
- while (blkCnt > 0U)
- {
- /* Initialize maxVal to next consecutive values one by one */
- maxVal = *pSrc++;
-
- /* compare for the maximum value */
- if (out < maxVal)
- {
- /* Update the maximum value and it's index */
- out = maxVal;
- outIndex = index + 1U;
- }
-
- maxVal = *pSrc++;
- if (out < maxVal)
- {
- out = maxVal;
- outIndex = index + 2U;
- }
-
- maxVal = *pSrc++;
- if (out < maxVal)
- {
- out = maxVal;
- outIndex = index + 3U;
- }
-
- maxVal = *pSrc++;
- if (out < maxVal)
- {
- out = maxVal;
- outIndex = index + 4U;
- }
-
- index += 4U;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = (blockSize - 1U) % 4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = (blockSize - 1U);
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* Initialize maxVal to the next consecutive values one by one */
- maxVal = *pSrc++;
-
- /* compare for the maximum value */
- if (out < maxVal)
- {
- /* Update the maximum value and it's index */
- out = maxVal;
- outIndex = blockSize - blkCnt;
- }
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Store the maximum value and it's index into destination pointers */
- *pResult = out;
- *pIndex = outIndex;
-}
-#endif /* defined(ARM_MATH_MVEI) */
-
-/**
- @} end of Max group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_max_q31.c
+ * Description: Maximum value of a Q31 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup Max
+ @{
+ */
+
+/**
+ @brief Maximum value of a Q31 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult maximum value returned here
+ @param[out] pIndex index of maximum value returned here
+ @return none
+ */
+
+void arm_max_q31(
+ const q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult,
+ uint32_t * pIndex)
+{
+ q31_t maxVal, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ uint32_t index; /* index of maximum value */
+#endif
+
+ /* Initialise index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ /* Initialise index of maximum value. */
+ index = 0U;
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to next consecutive values one by one */
+ maxVal = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal;
+ outIndex = index + 1U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 2U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 3U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 4U;
+ }
+
+ index += 4U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store the maximum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ @} end of Max group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q7.c
index 5deae64..d4977e5 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q7.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q7.c
@@ -1,256 +1,148 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_max_q7.c
- * Description: Maximum value of a Q7 vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @addtogroup Max
- @{
- */
-
-/**
- @brief Maximum value of a Q7 vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult maximum value returned here
- @param[out] pIndex index of maximum value returned here
- @return none
- */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#include "arm_helium_utils.h"
-
-static void arm_small_blk_max_q7(
- const q7_t * pSrc,
- uint16_t blockSize,
- q7_t * pResult,
- uint32_t * pIndex)
-{
- int32_t blkCnt; /* loop counters */
- q7x16_t extremValVec = vdupq_n_s8(Q7_MIN);
- q7_t maxValue = Q7_MIN;
- uint8x16_t indexVec;
- uint8x16_t extremIdxVec;
- mve_pred16_t p0;
- uint8_t extremIdxArr[16];
-
- indexVec = vidupq_u8(0U, 1);
-
- blkCnt = blockSize;
- do {
- mve_pred16_t p = vctp8q(blkCnt);
- q7x16_t extremIdxVal = vld1q_z_s8(pSrc, p);
- /*
- * Get current max per lane and current index per lane
- * when a max is selected
- */
- p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
-
- extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
- /* store per-lane extrema indexes */
- vst1q_p_u8(extremIdxArr, indexVec, p0);
-
- indexVec += 16;
- pSrc += 16;
- blkCnt -= 16;
- }
- while (blkCnt > 0);
-
-
- /* Get max value across the vector */
- maxValue = vmaxvq(maxValue, extremValVec);
-
- /* set index for lower values to max possible index */
- p0 = vcmpgeq(extremValVec, maxValue);
- extremIdxVec = vld1q_u8(extremIdxArr);
-
- indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0);
- *pIndex = vminvq_u8(blockSize - 1, indexVec);
- *pResult = maxValue;
-}
-
-void arm_max_q7(
- const q7_t * pSrc,
- uint32_t blockSize,
- q7_t * pResult,
- uint32_t * pIndex)
-{
- int32_t totalSize = blockSize;
- const uint16_t sub_blk_sz = UINT8_MAX + 1;
-
- if (totalSize <= sub_blk_sz)
- {
- arm_small_blk_max_q7(pSrc, blockSize, pResult, pIndex);
- }
- else
- {
- uint32_t curIdx = 0;
- q7_t curBlkExtr = Q7_MIN;
- uint32_t curBlkPos = 0;
- uint32_t curBlkIdx = 0;
- /*
- * process blocks of 255 elts
- */
- while (totalSize >= sub_blk_sz)
- {
- const q7_t *curSrc = pSrc;
-
- arm_small_blk_max_q7(curSrc, sub_blk_sz, pResult, pIndex);
- if (*pResult > curBlkExtr)
- {
- /*
- * update partial extrema
- */
- curBlkExtr = *pResult;
- curBlkPos = *pIndex;
- curBlkIdx = curIdx;
- }
- curIdx++;
- pSrc += sub_blk_sz;
- totalSize -= sub_blk_sz;
- }
- /*
- * remainder
- */
- arm_small_blk_max_q7(pSrc, totalSize, pResult, pIndex);
- if (*pResult > curBlkExtr)
- {
- curBlkExtr = *pResult;
- curBlkPos = *pIndex;
- curBlkIdx = curIdx;
- }
- *pIndex = curBlkIdx * sub_blk_sz + curBlkPos;
- *pResult = curBlkExtr;
- }
-}
-#else
-void arm_max_q7(
- const q7_t * pSrc,
- uint32_t blockSize,
- q7_t * pResult,
- uint32_t * pIndex)
-{
- q7_t maxVal, out; /* Temporary variables to store the output value. */
- uint32_t blkCnt, outIndex; /* Loop counter */
-
-#if defined (ARM_MATH_LOOPUNROLL)
- uint32_t index; /* index of maximum value */
-#endif
-
- /* Initialise index value to zero. */
- outIndex = 0U;
- /* Load first input value that act as reference value for comparision */
- out = *pSrc++;
-
-#if defined (ARM_MATH_LOOPUNROLL)
- /* Initialise index of maximum value. */
- index = 0U;
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = (blockSize - 1U) >> 2U;
-
- while (blkCnt > 0U)
- {
- /* Initialize maxVal to next consecutive values one by one */
- maxVal = *pSrc++;
-
- /* compare for the maximum value */
- if (out < maxVal)
- {
- /* Update the maximum value and it's index */
- out = maxVal;
- outIndex = index + 1U;
- }
-
- maxVal = *pSrc++;
- if (out < maxVal)
- {
- out = maxVal;
- outIndex = index + 2U;
- }
-
- maxVal = *pSrc++;
- if (out < maxVal)
- {
- out = maxVal;
- outIndex = index + 3U;
- }
-
- maxVal = *pSrc++;
- if (out < maxVal)
- {
- out = maxVal;
- outIndex = index + 4U;
- }
-
- index += 4U;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = (blockSize - 1U) % 4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = (blockSize - 1U);
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* Initialize maxVal to the next consecutive values one by one */
- maxVal = *pSrc++;
-
- /* compare for the maximum value */
- if (out < maxVal)
- {
- /* Update the maximum value and it's index */
- out = maxVal;
- outIndex = blockSize - blkCnt;
- }
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Store the maximum value and it's index into destination pointers */
- *pResult = out;
- *pIndex = outIndex;
-}
-#endif /* defined(ARM_MATH_MVEI) */
-
-/**
- @} end of Max group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_max_q7.c
+ * Description: Maximum value of a Q7 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup Max
+ @{
+ */
+
+/**
+ @brief Maximum value of a Q7 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult maximum value returned here
+ @param[out] pIndex index of maximum value returned here
+ @return none
+ */
+
+void arm_max_q7(
+ const q7_t * pSrc,
+ uint32_t blockSize,
+ q7_t * pResult,
+ uint32_t * pIndex)
+{
+ q7_t maxVal, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ uint32_t index; /* index of maximum value */
+#endif
+
+ /* Initialise index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ /* Initialise index of maximum value. */
+ index = 0U;
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to next consecutive values one by one */
+ maxVal = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal;
+ outIndex = index + 1U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 2U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 3U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 4U;
+ }
+
+ index += 4U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store the maximum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ @} end of Max group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f32.c
index dd6d817..3373f82 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f32.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f32.c
@@ -1,198 +1,166 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_mean_f32.c
- * Description: Mean value of a floating-point vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-
-/**
- @addtogroup mean
- @{
- */
-
-/**
- @brief Mean value of a floating-point vector.
- @param[in] pSrc points to the input vector.
- @param[in] blockSize number of samples in input vector.
- @param[out] pResult mean value returned here.
- @return none
- */
-#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#include "arm_helium_utils.h"
-
-void arm_mean_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult)
-{
- uint32_t blkCnt; /* loop counters */
- f32x4_t vecSrc;
- f32x4_t sumVec = vdupq_n_f32(0.0f);
- float32_t sum = 0.0f;
-
- /* Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
- while (blkCnt > 0U)
- {
- vecSrc = vldrwq_f32(pSrc);
- sumVec = vaddq_f32(sumVec, vecSrc);
-
- blkCnt --;
- pSrc += 4;
- }
-
- sum = vecAddAcrossF32Mve(sumVec);
-
- /* Tail */
- blkCnt = blockSize & 0x3;
-
- while (blkCnt > 0U)
- {
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
- sum += *pSrc++;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- *pResult = sum / (float32_t) blockSize;
-}
-
-
-#else
-#if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_mean_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult)
-{
- float32_t sum = 0.0f; /* Temporary result storage */
- float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */
- float32x2_t sumV2;
-
- uint32_t blkCnt; /* Loop counter */
-
- float32x4_t inV;
-
- blkCnt = blockSize >> 2U;
-
- /* Compute 4 outputs at a time.
- ** a second loop below computes the remaining 1 to 3 samples. */
- while (blkCnt > 0U)
- {
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
- inV = vld1q_f32(pSrc);
- sumV = vaddq_f32(sumV, inV);
-
- pSrc += 4;
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
- sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
-
- /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
- ** No loop unrolling is used. */
- blkCnt = blockSize & 3;
-
- while (blkCnt > 0U)
- {
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
- sum += *pSrc++;
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
- /* Store the result to the destination */
- *pResult = sum / (float32_t) blockSize;
-}
-#else
-void arm_mean_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult)
-{
- uint32_t blkCnt; /* Loop counter */
- float32_t sum = 0.0f; /* Temporary result storage */
-
-#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
-
- while (blkCnt > 0U)
- {
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
- sum += *pSrc++;
-
- sum += *pSrc++;
-
- sum += *pSrc++;
-
- sum += *pSrc++;
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
- sum += *pSrc++;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
- /* Store result to destination */
- *pResult = (sum / blockSize);
-}
-#endif /* #if defined(ARM_MATH_NEON) */
-#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
-
-/**
- @} end of mean group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_mean_f32.c
+ * Description: Mean value of a floating-point vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @defgroup mean Mean
+
+ Calculates the mean of the input vector. Mean is defined as the average of the elements in the vector.
+ The underlying algorithm is used:
+
+ <pre>
+ Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]) / blockSize;
+ </pre>
+
+ There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ @addtogroup mean
+ @{
+ */
+
+/**
+ @brief Mean value of a floating-point vector.
+ @param[in] pSrc points to the input vector.
+ @param[in] blockSize number of samples in input vector.
+ @param[out] pResult mean value returned here.
+ @return none
+ */
+#if defined(ARM_MATH_NEON_EXPERIMENTAL)
+void arm_mean_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t sum = 0.0f; /* Temporary result storage */
+ float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */
+ float32x2_t sumV2;
+
+ uint32_t blkCnt; /* Loop counter */
+
+ float32_t in1, in2, in3, in4;
+ float32x4_t inV;
+
+ blkCnt = blockSize >> 2U;
+
+ /* Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ inV = vld1q_f32(pSrc);
+ sumV = vaddq_f32(sumV, inV);
+
+ pSrc += 4;
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
+ sum = sumV2[0] + sumV2[1];
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize & 3;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store the result to the destination */
+ *pResult = sum / (float32_t) blockSize;
+}
+#else
+void arm_mean_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ float32_t sum = 0.0f; /* Temporary result storage */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ sum += *pSrc++;
+
+ sum += *pSrc++;
+
+ sum += *pSrc++;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store result to destination */
+ *pResult = (sum / blockSize);
+}
+#endif /* #if defined(ARM_MATH_NEON) */
+
+/**
+ @} end of mean group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q15.c
index 54949a1..09e6da9 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q15.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q15.c
@@ -1,156 +1,114 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_mean_q15.c
- * Description: Mean value of a Q15 vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @addtogroup mean
- @{
- */
-
-/**
- @brief Mean value of a Q15 vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult mean value returned here
- @return none
-
- @par Scaling and Overflow Behavior
- The function is implemented using a 32-bit internal accumulator.
- The input is represented in 1.15 format and is accumulated in a 32-bit
- accumulator in 17.15 format.
- There is no risk of internal overflow with this approach, and the
- full precision of intermediate result is preserved.
- Finally, the accumulator is truncated to yield a result of 1.15 format.
- */
-
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_mean_q15(
- const q15_t * pSrc,
- uint32_t blockSize,
- q15_t * pResult)
-{
- uint32_t blkCnt; /* loop counters */
- q15x8_t vecSrc;
- q31_t sum = 0L;
-
- /* Compute 8 outputs at a time */
- blkCnt = blockSize >> 3U;
- while (blkCnt > 0U)
- {
- vecSrc = vldrhq_s16(pSrc);
- /*
- * sum lanes
- */
- sum = vaddvaq(sum, vecSrc);
-
- blkCnt--;
- pSrc += 8;
- }
-
- /* Tail */
- blkCnt = blockSize & 0x7;
-
- while (blkCnt > 0U)
- {
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
- sum += *pSrc++;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
- /* Store the result to the destination */
- *pResult = (q15_t) (sum / (int32_t) blockSize);
-}
-#else
-void arm_mean_q15(
- const q15_t * pSrc,
- uint32_t blockSize,
- q15_t * pResult)
-{
- uint32_t blkCnt; /* Loop counter */
- q31_t sum = 0; /* Temporary result storage */
-
-#if defined (ARM_MATH_LOOPUNROLL)
- q31_t in;
-#endif
-
-#if defined (ARM_MATH_LOOPUNROLL)
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
-
- while (blkCnt > 0U)
- {
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
- in = read_q15x2_ia (&pSrc);
- sum += ((in << 16U) >> 16U);
- sum += (in >> 16U);
-
- in = read_q15x2_ia (&pSrc);
- sum += ((in << 16U) >> 16U);
- sum += (in >> 16U);
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
- sum += *pSrc++;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
- /* Store result to destination */
- *pResult = (q15_t) (sum / (int32_t) blockSize);
-}
-#endif /* defined(ARM_MATH_MVEI) */
-
-/**
- @} end of mean group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_mean_q15.c
+ * Description: Mean value of a Q15 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup mean
+ @{
+ */
+
+/**
+ @brief Mean value of a Q15 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult mean value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 32-bit internal accumulator.
+ The input is represented in 1.15 format and is accumulated in a 32-bit
+ accumulator in 17.15 format.
+ There is no risk of internal overflow with this approach, and the
+ full precision of intermediate result is preserved.
+ Finally, the accumulator is truncated to yield a result of 1.15 format.
+ */
+
+void arm_mean_q15(
+ const q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q31_t sum = 0; /* Temporary result storage */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ q31_t in;
+#endif
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ in = read_q15x2_ia ((q15_t **) &pSrc);
+ sum += ((in << 16U) >> 16U);
+ sum += (in >> 16U);
+
+ in = read_q15x2_ia ((q15_t **) &pSrc);
+ sum += ((in << 16U) >> 16U);
+ sum += (in >> 16U);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store result to destination */
+ *pResult = (q15_t) (sum / (int32_t) blockSize);
+}
+
+/**
+ @} end of mean group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q31.c
index b33ed00..7667ad8 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q31.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q31.c
@@ -1,149 +1,110 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_mean_q31.c
- * Description: Mean value of a Q31 vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @addtogroup mean
- @{
- */
-
-/**
- @brief Mean value of a Q31 vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult mean value returned here
- @return none
-
- @par Scaling and Overflow Behavior
- The function is implemented using a 64-bit internal accumulator.
- The input is represented in 1.31 format and is accumulated in a 64-bit
- accumulator in 33.31 format.
- There is no risk of internal overflow with this approach, and the
- full precision of intermediate result is preserved.
- Finally, the accumulator is truncated to yield a result of 1.31 format.
- */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_mean_q31(
- const q31_t * pSrc,
- uint32_t blockSize,
- q31_t * pResult)
-{
- uint32_t blkCnt; /* loop counters */
- q31x4_t vecSrc;
- q63_t sum = 0LL;
-
-
- /* Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
- while (blkCnt > 0U)
- {
-
- vecSrc = vldrwq_s32(pSrc);
- /*
- * sum lanes
- */
- sum = vaddlvaq(sum, vecSrc);
-
- blkCnt --;
- pSrc += 4;
- }
-
- /* Tail */
- blkCnt = blockSize & 0x3;
-
- while (blkCnt > 0U)
- {
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
- sum += *pSrc++;
- blkCnt --;
- }
-
- *pResult = arm_div_q63_to_q31(sum, blockSize);
-}
-#else
-void arm_mean_q31(
- const q31_t * pSrc,
- uint32_t blockSize,
- q31_t * pResult)
-{
- uint32_t blkCnt; /* Loop counter */
- q63_t sum = 0; /* Temporary result storage */
-
-#if defined (ARM_MATH_LOOPUNROLL)
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
-
- while (blkCnt > 0U)
- {
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
- sum += *pSrc++;
-
- sum += *pSrc++;
-
- sum += *pSrc++;
-
- sum += *pSrc++;
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
- sum += *pSrc++;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
- /* Store result to destination */
- *pResult = (q31_t) (sum / blockSize);
-}
-#endif /* defined(ARM_MATH_MVEI) */
-
-/**
- @} end of mean group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_mean_q31.c
+ * Description: Mean value of a Q31 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup mean
+ @{
+ */
+
+/**
+ @brief Mean value of a Q31 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult mean value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 64-bit internal accumulator.
+ The input is represented in 1.31 format and is accumulated in a 64-bit
+ accumulator in 33.31 format.
+ There is no risk of internal overflow with this approach, and the
+ full precision of intermediate result is preserved.
+ Finally, the accumulator is truncated to yield a result of 1.31 format.
+ */
+
+void arm_mean_q31(
+ const q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q63_t sum = 0; /* Temporary result storage */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ sum += *pSrc++;
+
+ sum += *pSrc++;
+
+ sum += *pSrc++;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store result to destination */
+ *pResult = (q31_t) (sum / blockSize);
+}
+
+/**
+ @} end of mean group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q7.c
index f0701eb..3f90ea8 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q7.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q7.c
@@ -1,153 +1,112 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_mean_q7.c
- * Description: Mean value of a Q7 vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @addtogroup mean
- @{
- */
-
-/**
- @brief Mean value of a Q7 vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult mean value returned here
- @return none
-
- @par Scaling and Overflow Behavior
- The function is implemented using a 32-bit internal accumulator.
- The input is represented in 1.7 format and is accumulated in a 32-bit
- accumulator in 25.7 format.
- There is no risk of internal overflow with this approach, and the
- full precision of intermediate result is preserved.
- Finally, the accumulator is truncated to yield a result of 1.7 format.
- */
-
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-void arm_mean_q7(
- const q7_t * pSrc,
- uint32_t blockSize,
- q7_t * pResult)
-{
- uint32_t blkCnt; /* loop counters */
- q7x16_t vecSrc;
- q31_t sum = 0L;
-
-
- blkCnt = blockSize >> 4;
- while (blkCnt > 0U)
- {
- vecSrc = vldrbq_s8(pSrc);
- /*
- * sum lanes
- */
- sum = vaddvaq(sum, vecSrc);
-
- blkCnt--;
- pSrc += 16;
- }
-
- blkCnt = blockSize & 0xF;
- while (blkCnt > 0U)
- {
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
- sum += *pSrc++;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
- /* Store the result to the destination */
- *pResult = (q7_t) (sum / (int32_t) blockSize);
-}
-#else
-void arm_mean_q7(
- const q7_t * pSrc,
- uint32_t blockSize,
- q7_t * pResult)
-{
- uint32_t blkCnt; /* Loop counter */
- q31_t sum = 0; /* Temporary result storage */
-
-#if defined (ARM_MATH_LOOPUNROLL)
- q31_t in;
-#endif
-
-#if defined (ARM_MATH_LOOPUNROLL)
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
-
- while (blkCnt > 0U)
- {
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
- in = read_q7x4_ia (&pSrc);
- sum += ((in << 24U) >> 24U);
- sum += ((in << 16U) >> 24U);
- sum += ((in << 8U) >> 24U);
- sum += (in >> 24U);
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
- sum += *pSrc++;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
- /* Store result to destination */
- *pResult = (q7_t) (sum / (int32_t) blockSize);
-}
-#endif /* defined(ARM_MATH_MVEI) */
-
-/**
- @} end of mean group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_mean_q7.c
+ * Description: Mean value of a Q7 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup mean
+ @{
+ */
+
+/**
+ @brief Mean value of a Q7 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult mean value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 32-bit internal accumulator.
+ The input is represented in 1.7 format and is accumulated in a 32-bit
+ accumulator in 25.7 format.
+ There is no risk of internal overflow with this approach, and the
+ full precision of intermediate result is preserved.
+ Finally, the accumulator is truncated to yield a result of 1.7 format.
+ */
+
+void arm_mean_q7(
+ const q7_t * pSrc,
+ uint32_t blockSize,
+ q7_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q31_t sum = 0; /* Temporary result storage */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ q31_t in;
+#endif
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ in = read_q7x4_ia ((q7_t **) &pSrc);
+ sum += ((in << 24U) >> 24U);
+ sum += ((in << 16U) >> 24U);
+ sum += ((in << 8U) >> 24U);
+ sum += (in >> 24U);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store result to destination */
+ *pResult = (q7_t) (sum / (int32_t) blockSize);
+}
+
+/**
+ @} end of mean group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f32.c
index ad8a472..f6504aa 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f32.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f32.c
@@ -1,363 +1,268 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_min_f32.c
- * Description: Minimum value of a floating-point vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
-#include <limits.h>
-#endif
-
-
-/**
- @ingroup groupStats
- */
-
-/**
- @defgroup Min Minimum
-
- Computes the minimum value of an array of data.
- The function returns both the minimum value and its position within the array.
- There are separate functions for floating-point, Q31, Q15, and Q7 data types.
- */
-
-/**
- @addtogroup Min
- @{
- */
-
-/**
- @brief Minimum value of a floating-point vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult minimum value returned here
- @param[out] pIndex index of minimum value returned here
- @return none
- */
-
-#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-void arm_min_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult,
- uint32_t * pIndex)
-{
- uint32_t blkCnt; /* loop counters */
- f32x4_t vecSrc;
- float32_t const *pSrcVec;
- f32x4_t curExtremValVec = vdupq_n_f32(F32_MAX);
- float32_t minValue = F32_MAX;
- uint32_t idx = blockSize;
- uint32x4_t indexVec;
- uint32x4_t curExtremIdxVec;
- float32_t tmp;
- mve_pred16_t p0;
-
- indexVec = vidupq_u32((uint32_t)0, 1);
- curExtremIdxVec = vdupq_n_u32(0);
-
- pSrcVec = (float32_t const *) pSrc;
- /* Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
- while (blkCnt > 0U)
- {
- vecSrc = vldrwq_f32(pSrcVec);
- pSrcVec += 4;
- /*
- * Get current max per lane and current index per lane
- * when a max is selected
- */
- p0 = vcmpleq(vecSrc, curExtremValVec);
- curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
- curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
-
- indexVec = indexVec + 4;
- /*
- * Decrement the blockSize loop counter
- */
- blkCnt--;
- }
-
- /*
- * Get min value across the vector
- */
- minValue = vminnmvq(minValue, curExtremValVec);
- /*
- * set index for lower values to max possible index
- */
- p0 = vcmpleq(curExtremValVec, minValue);
- indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
- /*
- * Get min index which is thus for a max value
- */
- idx = vminvq(idx, indexVec);
-
- /*
- * tail
- */
- blkCnt = blockSize & 0x3;
-
- while (blkCnt > 0U)
- {
- /* Initialize minVal to the next consecutive values one by one */
- tmp = *pSrc++;
-
- /* compare for the minimum value */
- if (minValue > tmp)
- {
- /* Update the minimum value and it's index */
- minValue = tmp;
- idx = blockSize - blkCnt;
- }
- blkCnt--;
- }
- /*
- * Save result
- */
- *pIndex = idx;
- *pResult = minValue;
-}
-
-#else
-#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_min_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult,
- uint32_t * pIndex)
-{
- float32_t maxVal1, out; /* Temporary variables to store the output value. */
- uint32_t blkCnt, outIndex; /* loop counter */
-
- float32x4_t outV, srcV;
- float32x2_t outV2;
-
- uint32x4_t idxV;
- static const uint32_t indexInit[4]={4,5,6,7};
- static const uint32_t countVInit[4]={0,1,2,3};
- uint32x4_t maxIdx;
- uint32x4_t index;
- uint32x4_t delta;
- uint32x4_t countV;
- uint32x2_t countV2;
-
- maxIdx = vdupq_n_u32(ULONG_MAX);
- delta = vdupq_n_u32(4);
- index = vld1q_u32(indexInit);
- countV = vld1q_u32(countVInit);
-
- /* Initialise the index value to zero. */
- outIndex = 0U;
-
- /* Load first input value that act as reference value for comparison */
- if (blockSize <= 3)
- {
- out = *pSrc++;
-
- blkCnt = blockSize - 1;
-
- while (blkCnt > 0U)
- {
- /* Initialize maxVal to the next consecutive values one by one */
- maxVal1 = *pSrc++;
-
- /* compare for the maximum value */
- if (out > maxVal1)
- {
- /* Update the maximum value and it's index */
- out = maxVal1;
- outIndex = blockSize - blkCnt;
- }
-
- /* Decrement the loop counter */
- blkCnt--;
- }
- }
- else
- {
- outV = vld1q_f32(pSrc);
- pSrc += 4;
-
- /* Compute 4 outputs at a time */
- blkCnt = (blockSize - 4 ) >> 2U;
-
- while (blkCnt > 0U)
- {
- srcV = vld1q_f32(pSrc);
- pSrc += 4;
-
- idxV = vcltq_f32(srcV, outV);
- outV = vbslq_f32(idxV, srcV, outV );
- countV = vbslq_u32(idxV, index,countV );
-
- index = vaddq_u32(index,delta);
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- outV2 = vpmin_f32(vget_low_f32(outV),vget_high_f32(outV));
- outV2 = vpmin_f32(outV2,outV2);
- out = vget_lane_f32(outV2,0);
-
- idxV = vceqq_f32(outV, vdupq_n_f32(out));
- countV = vbslq_u32(idxV, countV,maxIdx);
-
- countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
- countV2 = vpmin_u32(countV2,countV2);
- outIndex = vget_lane_u32(countV2,0);
-
- /* if (blockSize - 1U) is not multiple of 4 */
- blkCnt = (blockSize - 4 ) % 4U;
-
- while (blkCnt > 0U)
- {
- /* Initialize maxVal to the next consecutive values one by one */
- maxVal1 = *pSrc++;
-
- /* compare for the maximum value */
- if (out > maxVal1)
- {
- /* Update the maximum value and it's index */
- out = maxVal1;
- outIndex = blockSize - blkCnt ;
- }
-
- /* Decrement the loop counter */
- blkCnt--;
- }
- }
-
- /* Store the maximum value and it's index into destination pointers */
- *pResult = out;
- *pIndex = outIndex;
-}
-#else
-void arm_min_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult,
- uint32_t * pIndex)
-{
- float32_t minVal, out; /* Temporary variables to store the output value. */
- uint32_t blkCnt, outIndex; /* Loop counter */
-
-#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
- uint32_t index; /* index of maximum value */
-#endif
-
- /* Initialise index value to zero. */
- outIndex = 0U;
-
- /* Load first input value that act as reference value for comparision */
- out = *pSrc++;
-
-#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
- /* Initialise index of maximum value. */
- index = 0U;
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = (blockSize - 1U) >> 2U;
-
- while (blkCnt > 0U)
- {
- /* Initialize minVal to next consecutive values one by one */
- minVal = *pSrc++;
-
- /* compare for the minimum value */
- if (out > minVal)
- {
- /* Update the minimum value and it's index */
- out = minVal;
- outIndex = index + 1U;
- }
-
- minVal = *pSrc++;
- if (out > minVal)
- {
- out = minVal;
- outIndex = index + 2U;
- }
-
- minVal = *pSrc++;
- if (out > minVal)
- {
- out = minVal;
- outIndex = index + 3U;
- }
-
- minVal = *pSrc++;
- if (out > minVal)
- {
- out = minVal;
- outIndex = index + 4U;
- }
-
- index += 4U;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = (blockSize - 1U) % 4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = (blockSize - 1U);
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* Initialize minVal to the next consecutive values one by one */
- minVal = *pSrc++;
-
- /* compare for the minimum value */
- if (out > minVal)
- {
- /* Update the minimum value and it's index */
- out = minVal;
- outIndex = blockSize - blkCnt;
- }
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Store the minimum value and it's index into destination pointers */
- *pResult = out;
- *pIndex = outIndex;
-}
-#endif /* #if defined(ARM_MATH_NEON) */
-#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
-
-/**
- @} end of Min group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_min_f32.c
+ * Description: Minimum value of a floating-point vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include <limits.h>
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @defgroup Min Minimum
+
+ Computes the minimum value of an array of data.
+ The function returns both the minimum value and its position within the array.
+ There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ @addtogroup Min
+ @{
+ */
+
+/**
+ @brief Minimum value of a floating-point vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult minimum value returned here
+ @param[out] pIndex index of minimum value returned here
+ @return none
+ */
+#if defined(ARM_MATH_NEON)
+void arm_min_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult,
+ uint32_t * pIndex)
+{
+ float32_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ float32x4_t outV, srcV;
+ float32x2_t outV2;
+
+ uint32x4_t idxV;
+ uint32x4_t maxIdx={ULONG_MAX,ULONG_MAX,ULONG_MAX,ULONG_MAX};
+ uint32x4_t index={4,5,6,7};
+ uint32x4_t delta={4,4,4,4};
+ uint32x4_t countV={0,1,2,3};
+ uint32x2_t countV2;
+
+ /* Initialise the count value. */
+ count = 0U;
+
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+
+ /* Load first input value that act as reference value for comparison */
+ if (blockSize <= 3)
+ {
+ out = *pSrc++;
+
+ blkCnt = blockSize - 1;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out > maxVal1)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+ }
+ else
+ {
+ outV = vld1q_f32(pSrc);
+ pSrc += 4;
+
+ /* Compute 4 outputs at a time */
+ blkCnt = (blockSize - 4 ) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ srcV = vld1q_f32(pSrc);
+ pSrc += 4;
+
+ idxV = vcltq_f32(srcV, outV);
+ outV = vbslq_f32(idxV, srcV, outV );
+ countV = vbslq_u32(idxV, index,countV );
+
+ index = vaddq_u32(index,delta);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ outV2 = vpmin_f32(vget_low_f32(outV),vget_high_f32(outV));
+ outV2 = vpmin_f32(outV2,outV2);
+ out = outV2[0];
+
+ idxV = vceqq_f32(outV, vdupq_n_f32(out));
+ countV = vbslq_u32(idxV, countV,maxIdx);
+
+ countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
+ countV2 = vpmin_u32(countV2,countV2);
+ outIndex = countV2[0];
+
+ /* if (blockSize - 1U) is not multiple of 4 */
+ blkCnt = (blockSize - 4 ) % 4U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out > maxVal1)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal1;
+ outIndex = blockSize - blkCnt ;
+ }
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+ }
+
+ /* Store the maximum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+#else
+void arm_min_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult,
+ uint32_t * pIndex)
+{
+ float32_t minVal, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ uint32_t index; /* index of maximum value */
+#endif
+
+ /* Initialise index value to zero. */
+ outIndex = 0U;
+
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ /* Initialise index of maximum value. */
+ index = 0U;
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to next consecutive values one by one */
+ minVal = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal;
+ outIndex = index + 1U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 2U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 3U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 4U;
+ }
+
+ index += 4U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store the minimum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+#endif /* #if defined(ARM_MATH_NEON) */
+
+/**
+ @} end of Min group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q15.c
index f31019d..67fd49e 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q15.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q15.c
@@ -1,203 +1,149 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_min_q15.c
- * Description: Minimum value of a Q15 vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-
-/**
- @addtogroup Min
- @{
- */
-
-/**
- @brief Minimum value of a Q15 vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult minimum value returned here
- @param[out] pIndex index of minimum value returned here
- @return none
- */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#include "arm_helium_utils.h"
-
-void arm_min_q15(
- const q15_t * pSrc,
- uint32_t blockSize,
- q15_t * pResult,
- uint32_t * pIndex)
-{
-
- int32_t blkCnt; /* loop counters */
- q15x8_t extremValVec = vdupq_n_s16(Q15_MAX);
- q15_t minValue = Q15_MAX;
- uint16x8_t indexVec;
- uint16x8_t extremIdxVec;
- mve_pred16_t p0;
- uint16_t extremIdxArr[8];
-
- indexVec = vidupq_u16(0U, 1);
-
- blkCnt = blockSize;
- do {
- mve_pred16_t p = vctp16q(blkCnt);
- q15x8_t extremIdxVal = vld1q_z_s16(pSrc, p);
- /*
- * Get current min per lane and current index per lane
- * when a min is selected
- */
- p0 = vcmpleq_m(extremIdxVal, extremValVec, p);
-
- extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
- /* store per-lane extrema indexes */
- vst1q_p_u16(extremIdxArr, indexVec, p0);
-
- indexVec += 8;
- pSrc += 8;
- blkCnt -= 8;
- }
- while (blkCnt > 0);
-
- /* Get min value across the vector */
- minValue = vminvq(minValue, extremValVec);
-
- /* set index for lower values to min possible index */
- p0 = vcmpleq(extremValVec, minValue);
- extremIdxVec = vld1q_u16(extremIdxArr);
-
- indexVec = vpselq(extremIdxVec, vdupq_n_u16(blockSize - 1), p0);
- *pIndex = vminvq(blockSize - 1, indexVec);
- *pResult = minValue;
-
-}
-#else
-void arm_min_q15(
- const q15_t * pSrc,
- uint32_t blockSize,
- q15_t * pResult,
- uint32_t * pIndex)
-{
- q15_t minVal, out; /* Temporary variables to store the output value. */
- uint32_t blkCnt, outIndex; /* Loop counter */
-
-#if defined (ARM_MATH_LOOPUNROLL)
- uint32_t index; /* index of maximum value */
-#endif
-
- /* Initialise index value to zero. */
- outIndex = 0U;
- /* Load first input value that act as reference value for comparision */
- out = *pSrc++;
-
-#if defined (ARM_MATH_LOOPUNROLL)
- /* Initialise index of maximum value. */
- index = 0U;
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = (blockSize - 1U) >> 2U;
-
- while (blkCnt > 0U)
- {
- /* Initialize minVal to next consecutive values one by one */
- minVal = *pSrc++;
-
- /* compare for the minimum value */
- if (out > minVal)
- {
- /* Update the minimum value and it's index */
- out = minVal;
- outIndex = index + 1U;
- }
-
- minVal = *pSrc++;
- if (out > minVal)
- {
- out = minVal;
- outIndex = index + 2U;
- }
-
- minVal = *pSrc++;
- if (out > minVal)
- {
- out = minVal;
- outIndex = index + 3U;
- }
-
- minVal = *pSrc++;
- if (out > minVal)
- {
- out = minVal;
- outIndex = index + 4U;
- }
-
- index += 4U;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = (blockSize - 1U) % 4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = (blockSize - 1U);
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* Initialize minVal to the next consecutive values one by one */
- minVal = *pSrc++;
-
- /* compare for the minimum value */
- if (out > minVal)
- {
- /* Update the minimum value and it's index */
- out = minVal;
- outIndex = blockSize - blkCnt;
- }
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Store the minimum value and it's index into destination pointers */
- *pResult = out;
- *pIndex = outIndex;
-}
-#endif /* defined(ARM_MATH_MVEI) */
-
-/**
- @} end of Min group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_min_q15.c
+ * Description: Minimum value of a Q15 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+
+/**
+ @addtogroup Min
+ @{
+ */
+
+/**
+ @brief Minimum value of a Q15 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult minimum value returned here
+ @param[out] pIndex index of minimum value returned here
+ @return none
+ */
+
+void arm_min_q15(
+ const q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult,
+ uint32_t * pIndex)
+{
+ q15_t minVal, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ uint32_t index; /* index of maximum value */
+#endif
+
+ /* Initialise index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ /* Initialise index of maximum value. */
+ index = 0U;
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to next consecutive values one by one */
+ minVal = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal;
+ outIndex = index + 1U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 2U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 3U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 4U;
+ }
+
+ index += 4U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store the minimum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ @} end of Min group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q31.c
index c993004..c362173 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q31.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q31.c
@@ -1,203 +1,149 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_min_q31.c
- * Description: Minimum value of a Q31 vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-
-/**
- @addtogroup Min
- @{
- */
-
-/**
- @brief Minimum value of a Q31 vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult minimum value returned here
- @param[out] pIndex index of minimum value returned here
- @return none
- */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#include "arm_helium_utils.h"
-
-void arm_min_q31(
- const q31_t * pSrc,
- uint32_t blockSize,
- q31_t * pResult,
- uint32_t * pIndex)
-{
- int32_t blkCnt; /* loop counters */
- q31x4_t extremValVec = vdupq_n_s32(Q31_MAX);
- q31_t minValue = Q31_MAX;
- uint32x4_t indexVec;
- uint32x4_t extremIdxVec;
- mve_pred16_t p0;
- uint32_t extremIdxArr[4];
-
- indexVec = vidupq_u32(0U, 1);
-
- blkCnt = blockSize;
- do {
- mve_pred16_t p = vctp32q(blkCnt);
- q31x4_t extremIdxVal = vld1q_z_s32(pSrc, p);
- /*
- * Get current min per lane and current index per lane
- * when a min is selected
- */
- p0 = vcmpleq_m(extremIdxVal, extremValVec, p);
-
- extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
- /* store per-lane extrema indexes */
- vst1q_p_u32(extremIdxArr, indexVec, p0);
-
- indexVec += 4;
- pSrc += 4;
- blkCnt -= 4;
- }
- while (blkCnt > 0);
-
-
- /* Get min value across the vector */
- minValue = vminvq(minValue, extremValVec);
-
- /* set index for lower values to min possible index */
- p0 = vcmpleq(extremValVec, minValue);
- extremIdxVec = vld1q_u32(extremIdxArr);
-
- indexVec = vpselq(extremIdxVec, vdupq_n_u32(blockSize - 1), p0);
- *pIndex = vminvq(blockSize - 1, indexVec);
- *pResult = minValue;
-}
-
-#else
-void arm_min_q31(
- const q31_t * pSrc,
- uint32_t blockSize,
- q31_t * pResult,
- uint32_t * pIndex)
-{
- q31_t minVal, out; /* Temporary variables to store the output value. */
- uint32_t blkCnt, outIndex; /* Loop counter */
-
-#if defined (ARM_MATH_LOOPUNROLL)
- uint32_t index; /* index of maximum value */
-#endif
-
- /* Initialise index value to zero. */
- outIndex = 0U;
- /* Load first input value that act as reference value for comparision */
- out = *pSrc++;
-
-#if defined (ARM_MATH_LOOPUNROLL)
- /* Initialise index of maximum value. */
- index = 0U;
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = (blockSize - 1U) >> 2U;
-
- while (blkCnt > 0U)
- {
- /* Initialize minVal to next consecutive values one by one */
- minVal = *pSrc++;
-
- /* compare for the minimum value */
- if (out > minVal)
- {
- /* Update the minimum value and it's index */
- out = minVal;
- outIndex = index + 1U;
- }
-
- minVal = *pSrc++;
- if (out > minVal)
- {
- out = minVal;
- outIndex = index + 2U;
- }
-
- minVal = *pSrc++;
- if (out > minVal)
- {
- out = minVal;
- outIndex = index + 3U;
- }
-
- minVal = *pSrc++;
- if (out > minVal)
- {
- out = minVal;
- outIndex = index + 4U;
- }
-
- index += 4U;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = (blockSize - 1U) % 4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = (blockSize - 1U);
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* Initialize minVal to the next consecutive values one by one */
- minVal = *pSrc++;
-
- /* compare for the minimum value */
- if (out > minVal)
- {
- /* Update the minimum value and it's index */
- out = minVal;
- outIndex = blockSize - blkCnt;
- }
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Store the minimum value and it's index into destination pointers */
- *pResult = out;
- *pIndex = outIndex;
-}
-#endif /* defined(ARM_MATH_MVEI) */
-
-/**
- @} end of Min group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_min_q31.c
+ * Description: Minimum value of a Q31 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+
+/**
+ @addtogroup Min
+ @{
+ */
+
+/**
+ @brief Minimum value of a Q31 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult minimum value returned here
+ @param[out] pIndex index of minimum value returned here
+ @return none
+ */
+
+void arm_min_q31(
+ const q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult,
+ uint32_t * pIndex)
+{
+ q31_t minVal, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ uint32_t index; /* index of maximum value */
+#endif
+
+ /* Initialise index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ /* Initialise index of maximum value. */
+ index = 0U;
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to next consecutive values one by one */
+ minVal = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal;
+ outIndex = index + 1U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 2U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 3U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 4U;
+ }
+
+ index += 4U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store the minimum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ @} end of Min group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q7.c
index 3e5aae5..4329630 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q7.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q7.c
@@ -1,284 +1,149 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_min_q7.c
- * Description: Minimum value of a Q7 vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-
-/**
- @addtogroup Min
- @{
- */
-
-/**
- @brief Minimum value of a Q7 vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult minimum value returned here
- @param[out] pIndex index of minimum value returned here
- @return none
- */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#include "arm_helium_utils.h"
-
-static void arm_small_blk_min_q7(
- const q7_t * pSrc,
- uint8_t blockSize,
- q7_t * pResult,
- uint32_t * pIndex)
-{
- uint32_t blkCnt; /* loop counters */
- q7x16_t vecSrc;
- q7x16_t curExtremValVec = vdupq_n_s8(Q7_MAX);
- q7_t minValue = Q7_MAX,temp;
- uint32_t idx = blockSize;
- uint8x16_t indexVec;
- uint8x16_t curExtremIdxVec;
- mve_pred16_t p0;
-
-
- indexVec = vidupq_u8((uint32_t)0, 1);
- curExtremIdxVec = vdupq_n_u8(0);
-
- blkCnt = blockSize >> 4;
- while (blkCnt > 0U)
- {
- vecSrc = vldrbq_s8(pSrc);
- pSrc += 16;
- /*
- * Get current min per lane and current index per lane
- * when a min is selected
- */
- p0 = vcmpleq(vecSrc, curExtremValVec);
- curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
- curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
-
- indexVec = indexVec + 16;
- /*
- * Decrement the blockSize loop counter
- */
- blkCnt--;
- }
-
- /*
- * Get min value across the vector
- */
- minValue = vminvq(minValue, curExtremValVec);
- /*
- * set index for lower values to min possible index
- */
- p0 = vcmpleq(curExtremValVec, minValue);
- indexVec = vpselq(curExtremIdxVec, vdupq_n_u8(blockSize), p0);
- /*
- * Get min index which is thus for a min value
- */
- idx = vminvq(idx, indexVec);
-
- blkCnt = blockSize & 0xF;
- while (blkCnt > 0U)
- {
- /* Initialize minVal to the next consecutive values one by one */
- temp = *pSrc++;
-
- /* compare for the minimum value */
- if (minValue > temp)
- {
- /* Update the minimum value and it's index */
- minValue = temp;
- idx = blockSize - blkCnt;
- }
-
- /* Decrement loop counter */
- blkCnt--;
- }
- /*
- * Save result
- */
- *pIndex = idx;
- *pResult = minValue;
-}
-
-void arm_min_q7(
- const q7_t * pSrc,
- uint32_t blockSize,
- q7_t * pResult,
- uint32_t * pIndex)
-{
- int32_t totalSize = blockSize;
-
- if (totalSize <= UINT8_MAX)
- {
- arm_small_blk_min_q7(pSrc, blockSize, pResult, pIndex);
- }
- else
- {
- uint32_t curIdx = 0;
- q7_t curBlkExtr = Q7_MAX;
- uint32_t curBlkPos = 0;
- uint32_t curBlkIdx = 0;
- /*
- * process blocks of 255 elts
- */
- while (totalSize >= UINT8_MAX)
- {
- const q7_t *curSrc = pSrc;
-
- arm_small_blk_min_q7(curSrc, UINT8_MAX, pResult, pIndex);
- if (*pResult < curBlkExtr)
- {
- /*
- * update partial extrema
- */
- curBlkExtr = *pResult;
- curBlkPos = *pIndex;
- curBlkIdx = curIdx;
- }
- curIdx++;
- pSrc += UINT8_MAX;
- totalSize -= UINT8_MAX;
- }
- /*
- * remainder
- */
- arm_small_blk_min_q7(pSrc, totalSize, pResult, pIndex);
- if (*pResult < curBlkExtr)
- {
- curBlkExtr = *pResult;
- curBlkPos = *pIndex;
- curBlkIdx = curIdx;
- }
- *pIndex = curBlkIdx * UINT8_MAX + curBlkPos;
- *pResult = curBlkExtr;
- }
-}
-#else
-void arm_min_q7(
- const q7_t * pSrc,
- uint32_t blockSize,
- q7_t * pResult,
- uint32_t * pIndex)
-{
- q7_t minVal, out; /* Temporary variables to store the output value. */
- uint32_t blkCnt, outIndex; /* Loop counter */
-
-#if defined (ARM_MATH_LOOPUNROLL)
- uint32_t index; /* index of maximum value */
-#endif
-
- /* Initialise index value to zero. */
- outIndex = 0U;
- /* Load first input value that act as reference value for comparision */
- out = *pSrc++;
-
-#if defined (ARM_MATH_LOOPUNROLL)
- /* Initialise index of maximum value. */
- index = 0U;
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = (blockSize - 1U) >> 2U;
-
- while (blkCnt > 0U)
- {
- /* Initialize minVal to next consecutive values one by one */
- minVal = *pSrc++;
-
- /* compare for the minimum value */
- if (out > minVal)
- {
- /* Update the minimum value and it's index */
- out = minVal;
- outIndex = index + 1U;
- }
-
- minVal = *pSrc++;
- if (out > minVal)
- {
- out = minVal;
- outIndex = index + 2U;
- }
-
- minVal = *pSrc++;
- if (out > minVal)
- {
- out = minVal;
- outIndex = index + 3U;
- }
-
- minVal = *pSrc++;
- if (out > minVal)
- {
- out = minVal;
- outIndex = index + 4U;
- }
-
- index += 4U;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = (blockSize - 1U) % 4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = (blockSize - 1U);
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* Initialize minVal to the next consecutive values one by one */
- minVal = *pSrc++;
-
- /* compare for the minimum value */
- if (out > minVal)
- {
- /* Update the minimum value and it's index */
- out = minVal;
- outIndex = blockSize - blkCnt;
- }
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Store the minimum value and it's index into destination pointers */
- *pResult = out;
- *pIndex = outIndex;
-}
-#endif /* defined(ARM_MATH_MVEI) */
-
-/**
- @} end of Min group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_min_q7.c
+ * Description: Minimum value of a Q7 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+
+/**
+ @addtogroup Min
+ @{
+ */
+
+/**
+ @brief Minimum value of a Q7 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult minimum value returned here
+ @param[out] pIndex index of minimum value returned here
+ @return none
+ */
+
+void arm_min_q7(
+ const q7_t * pSrc,
+ uint32_t blockSize,
+ q7_t * pResult,
+ uint32_t * pIndex)
+{
+ q7_t minVal, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ uint32_t index; /* index of maximum value */
+#endif
+
+ /* Initialise index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ /* Initialise index of maximum value. */
+ index = 0U;
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to next consecutive values one by one */
+ minVal = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal;
+ outIndex = index + 1U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 2U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 3U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 4U;
+ }
+
+ index += 4U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store the minimum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ @} end of Min group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f32.c
index ce12e57..abf444b 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f32.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f32.c
@@ -1,229 +1,175 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_power_f32.c
- * Description: Sum of the squares of the elements of a floating-point vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @defgroup power Power
-
- Calculates the sum of the squares of the elements in the input vector.
- The underlying algorithm is used:
-
- <pre>
- Result = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + pSrc[2] * pSrc[2] + ... + pSrc[blockSize-1] * pSrc[blockSize-1];
- </pre>
-
- There are separate functions for floating point, Q31, Q15, and Q7 data types.
-
- Since the result is not divided by the length, those functions are in fact computing
- something which is more an energy than a power.
-
- */
-
-/**
- @addtogroup power
- @{
- */
-
-/**
- @brief Sum of the squares of the elements of a floating-point vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult sum of the squares value returned here
- @return none
- */
-#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#include "arm_helium_utils.h"
-
-void arm_power_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult)
-{
- uint32_t blkCnt; /* loop counters */
- f32x4_t vecSrc;
- f32x4_t sumVec = vdupq_n_f32(0.0f);
- float32_t sum = 0.0f;
- float32_t in;
-
- /* Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
- while (blkCnt > 0U)
- {
- vecSrc = vldrwq_f32(pSrc);
- /*
- * sum lanes
- */
- sumVec = vfmaq(sumVec, vecSrc, vecSrc);
-
- blkCnt --;
- pSrc += 4;
- }
- sum = vecAddAcrossF32Mve(sumVec);
-
- /*
- * tail
- */
- blkCnt = blockSize & 0x3;
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- /* Compute Power and store result in a temporary variable, sum. */
- in = *pSrc++;
- sum += in * in;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- *pResult = sum;
-}
-#else
-#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_power_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult)
-{
- float32_t sum = 0.0f; /* accumulator */
- float32_t in; /* Temporary variable to store input value */
- uint32_t blkCnt; /* loop counter */
-
- float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */
- float32x2_t sumV2;
- float32x4_t inV;
-
- blkCnt = blockSize >> 2U;
-
- /* Compute 4 outputs at a time.
- ** a second loop below computes the remaining 1 to 3 samples. */
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
- /* Compute Power and then store the result in a temporary variable, sum. */
- inV = vld1q_f32(pSrc);
- sumV = vmlaq_f32(sumV, inV, inV);
- pSrc += 4;
-
- /* Decrement the loop counter */
- blkCnt--;
- }
- sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
- sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
-
- /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
- ** No loop unrolling is used. */
- blkCnt = blockSize % 0x4U;
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
- /* compute power and then store the result in a temporary variable, sum. */
- in = *pSrc++;
- sum += in * in;
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- /* Store the result to the destination */
- *pResult = sum;
-}
-#else
-void arm_power_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult)
-{
- uint32_t blkCnt; /* Loop counter */
- float32_t sum = 0.0f; /* Temporary result storage */
- float32_t in; /* Temporary variable to store input value */
-
-#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- /* Compute Power and store result in a temporary variable, sum. */
- in = *pSrc++;
- sum += in * in;
-
- in = *pSrc++;
- sum += in * in;
-
- in = *pSrc++;
- sum += in * in;
-
- in = *pSrc++;
- sum += in * in;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- /* Compute Power and store result in a temporary variable, sum. */
- in = *pSrc++;
- sum += in * in;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Store result to destination */
- *pResult = sum;
-}
-#endif /* #if defined(ARM_MATH_NEON) */
-#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
-
-/**
- @} end of power group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_power_f32.c
+ * Description: Sum of the squares of the elements of a floating-point vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @defgroup power Power
+
+ Calculates the sum of the squares of the elements in the input vector.
+ The underlying algorithm is used:
+
+ <pre>
+ Result = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + pSrc[2] * pSrc[2] + ... + pSrc[blockSize-1] * pSrc[blockSize-1];
+ </pre>
+
+ There are separate functions for floating point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ @addtogroup power
+ @{
+ */
+
+/**
+ @brief Sum of the squares of the elements of a floating-point vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult sum of the squares value returned here
+ @return none
+ */
+#if defined(ARM_MATH_NEON)
+void arm_power_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t sum = 0.0f; /* accumulator */
+ float32_t in; /* Temporary variable to store input value */
+ uint32_t blkCnt; /* loop counter */
+
+ float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */
+ float32x2_t sumV2;
+ float32x4_t inV;
+
+ blkCnt = blockSize >> 2U;
+
+ /* Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ inV = vld1q_f32(pSrc);
+ sumV = vmlaq_f32(sumV, inV, inV);
+ pSrc += 4;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+ sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
+ sum = sumV2[0] + sumV2[1];
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* compute power and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Store the result to the destination */
+ *pResult = sum;
+}
+#else
+void arm_power_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ float32_t sum = 0.0f; /* Temporary result storage */
+ float32_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute Power and store result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in * in;
+
+ in = *pSrc++;
+ sum += in * in;
+
+ in = *pSrc++;
+ sum += in * in;
+
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute Power and store result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store result to destination */
+ *pResult = sum;
+}
+#endif /* #if defined(ARM_MATH_NEON) */
+
+/**
+ @} end of power group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q15.c
index 2c47f0b..62e567f 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q15.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q15.c
@@ -1,177 +1,132 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_power_q15.c
- * Description: Sum of the squares of the elements of a Q15 vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @addtogroup power
- @{
- */
-
-/**
- @brief Sum of the squares of the elements of a Q15 vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult sum of the squares value returned here
- @return none
-
- @par Scaling and Overflow Behavior
- The function is implemented using a 64-bit internal accumulator.
- The input is represented in 1.15 format.
- Intermediate multiplication yields a 2.30 format, and this
- result is added without saturation to a 64-bit accumulator in 34.30 format.
- With 33 guard bits in the accumulator, there is no risk of overflow, and the
- full precision of the intermediate multiplication is preserved.
- Finally, the return result is in 34.30 format.
- */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-void arm_power_q15(
- const q15_t * pSrc,
- uint32_t blockSize,
- q63_t * pResult)
-{
- uint32_t blkCnt; /* loop counters */
- q15x8_t vecSrc;
- q63_t sum = 0LL;
- q15_t in;
-
- /* Compute 8 outputs at a time */
- blkCnt = blockSize >> 3U;
- while (blkCnt > 0U)
- {
- vecSrc = vldrhq_s16(pSrc);
- /*
- * sum lanes
- */
- sum = vmlaldavaq(sum, vecSrc, vecSrc);
-
- blkCnt --;
- pSrc += 8;
- }
-
- /*
- * tail
- */
- blkCnt = blockSize & 0x7;
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- /* Compute Power and store result in a temporary variable, sum. */
- in = *pSrc++;
- sum += ((q31_t) in * in);
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- *pResult = sum;
-}
-#else
-void arm_power_q15(
- const q15_t * pSrc,
- uint32_t blockSize,
- q63_t * pResult)
-{
- uint32_t blkCnt; /* Loop counter */
- q63_t sum = 0; /* Temporary result storage */
- q15_t in; /* Temporary variable to store input value */
-
-#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
- q31_t in32; /* Temporary variable to store packed input value */
-#endif
-
-#if defined (ARM_MATH_LOOPUNROLL)
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- /* Compute Power and store result in a temporary variable, sum. */
-#if defined (ARM_MATH_DSP)
- in32 = read_q15x2_ia (&pSrc);
- sum = __SMLALD(in32, in32, sum);
-
- in32 = read_q15x2_ia (&pSrc);
- sum = __SMLALD(in32, in32, sum);
-#else
- in = *pSrc++;
- sum += ((q31_t) in * in);
-
- in = *pSrc++;
- sum += ((q31_t) in * in);
-
- in = *pSrc++;
- sum += ((q31_t) in * in);
-
- in = *pSrc++;
- sum += ((q31_t) in * in);
-#endif /* #if defined (ARM_MATH_DSP) */
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- /* Compute Power and store result in a temporary variable, sum. */
- in = *pSrc++;
- sum += ((q31_t) in * in);
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Store result in 34.30 format */
- *pResult = sum;
-}
-#endif /* defined(ARM_MATH_MVEI) */
-
-/**
- @} end of power group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_power_q15.c
+ * Description: Sum of the squares of the elements of a Q15 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup power
+ @{
+ */
+
+/**
+ @brief Sum of the squares of the elements of a Q15 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult sum of the squares value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 64-bit internal accumulator.
+ The input is represented in 1.15 format.
+ Intermediate multiplication yields a 2.30 format, and this
+ result is added without saturation to a 64-bit accumulator in 34.30 format.
+ With 33 guard bits in the accumulator, there is no risk of overflow, and the
+ full precision of the intermediate multiplication is preserved.
+ Finally, the return result is in 34.30 format.
+ */
+
+void arm_power_q15(
+ const q15_t * pSrc,
+ uint32_t blockSize,
+ q63_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q63_t sum = 0; /* Temporary result storage */
+ q15_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
+ q31_t in32; /* Temporary variable to store packed input value */
+#endif
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute Power and store result in a temporary variable, sum. */
+#if defined (ARM_MATH_DSP)
+ in32 = read_q15x2_ia ((q15_t **) &pSrc);
+ sum = __SMLALD(in32, in32, sum);
+
+ in32 = read_q15x2_ia ((q15_t **) &pSrc);
+ sum = __SMLALD(in32, in32, sum);
+#else
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute Power and store result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store result in 34.30 format */
+ *pResult = sum;
+}
+
+/**
+ @} end of power group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q31.c
index a39b3a7..aa51d0f 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q31.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q31.c
@@ -1,165 +1,121 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_power_q31.c
- * Description: Sum of the squares of the elements of a Q31 vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @addtogroup power
- @{
- */
-
-/**
- @brief Sum of the squares of the elements of a Q31 vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult sum of the squares value returned here
- @return none
-
- @par Scaling and Overflow Behavior
- The function is implemented using a 64-bit internal accumulator.
- The input is represented in 1.31 format.
- Intermediate multiplication yields a 2.62 format, and this
- result is truncated to 2.48 format by discarding the lower 14 bits.
- The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.
- With 15 guard bits in the accumulator, there is no risk of overflow, and the
- full precision of the intermediate multiplication is preserved.
- Finally, the return result is in 16.48 format.
- */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_power_q31(
- const q31_t * pSrc,
- uint32_t blockSize,
- q63_t * pResult)
-{
- uint32_t blkCnt; /* loop counters */
- q31x4_t vecSrc;
- q63_t sum = 0LL;
- q31_t in;
-
- /* Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
- while (blkCnt > 0U)
- {
- vecSrc = vldrwq_s32(pSrc);
- /*
- * sum lanes
- */
- sum = vrmlaldavhaq(sum, vecSrc, vecSrc);
-
- blkCnt --;
- pSrc += 4;
- }
-
- /*
- * tail
- */
- blkCnt = blockSize & 0x3;
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- /* Compute Power and store result in a temporary variable, sum. */
- in = *pSrc++;
- sum += ((q63_t) in * in) >> 8;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- *pResult = asrl(sum, 6);
-}
-#else
-void arm_power_q31(
- const q31_t * pSrc,
- uint32_t blockSize,
- q63_t * pResult)
-{
- uint32_t blkCnt; /* Loop counter */
- q63_t sum = 0; /* Temporary result storage */
- q31_t in; /* Temporary variable to store input value */
-
-#if defined (ARM_MATH_LOOPUNROLL)
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- /* Compute Power then shift intermediate results by 14 bits to maintain 16.48 format and store result in a temporary variable sum, providing 15 guard bits. */
- in = *pSrc++;
- sum += ((q63_t) in * in) >> 14U;
-
- in = *pSrc++;
- sum += ((q63_t) in * in) >> 14U;
-
- in = *pSrc++;
- sum += ((q63_t) in * in) >> 14U;
-
- in = *pSrc++;
- sum += ((q63_t) in * in) >> 14U;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- /* Compute Power and store result in a temporary variable, sum. */
- in = *pSrc++;
- sum += ((q63_t) in * in) >> 14U;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Store results in 16.48 format */
- *pResult = sum;
-}
-#endif /* defined(ARM_MATH_MVEI) */
-
-/**
- @} end of power group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_power_q31.c
+ * Description: Sum of the squares of the elements of a Q31 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup power
+ @{
+ */
+
+/**
+ @brief Sum of the squares of the elements of a Q31 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult sum of the squares value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 64-bit internal accumulator.
+ The input is represented in 1.31 format.
+ Intermediate multiplication yields a 2.62 format, and this
+ result is truncated to 2.48 format by discarding the lower 14 bits.
+ The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.
+ With 15 guard bits in the accumulator, there is no risk of overflow, and the
+ full precision of the intermediate multiplication is preserved.
+ Finally, the return result is in 16.48 format.
+ */
+
+void arm_power_q31(
+ const q31_t * pSrc,
+ uint32_t blockSize,
+ q63_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q63_t sum = 0; /* Temporary result storage */
+ q31_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute Power then shift intermediate results by 14 bits to maintain 16.48 format and store result in a temporary variable sum, providing 15 guard bits. */
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14U;
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14U;
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14U;
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute Power and store result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store results in 16.48 format */
+ *pResult = sum;
+}
+
+/**
+ @} end of power group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q7.c
index 0545f7c..eefa8ba 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q7.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q7.c
@@ -1,180 +1,136 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_power_q7.c
- * Description: Sum of the squares of the elements of a Q7 vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @addtogroup power
- @{
- */
-
-/**
- @brief Sum of the squares of the elements of a Q7 vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult sum of the squares value returned here
- @return none
-
- @par Scaling and Overflow Behavior
- The function is implemented using a 32-bit internal accumulator.
- The input is represented in 1.7 format.
- Intermediate multiplication yields a 2.14 format, and this
- result is added without saturation to an accumulator in 18.14 format.
- With 17 guard bits in the accumulator, there is no risk of overflow, and the
- full precision of the intermediate multiplication is preserved.
- Finally, the return result is in 18.14 format.
- */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_power_q7(
- const q7_t * pSrc,
- uint32_t blockSize,
- q31_t * pResult)
-{
- uint32_t blkCnt; /* loop counters */
- q7x16_t vecSrc;
- q31_t sum = 0LL;
- q7_t in;
-
- /* Compute 16 outputs at a time */
- blkCnt = blockSize >> 4U;
- while (blkCnt > 0U)
- {
- vecSrc = vldrbq_s8(pSrc);
- /*
- * sum lanes
- */
- sum = vmladavaq(sum, vecSrc, vecSrc);
-
- blkCnt--;
- pSrc += 16;
- }
-
- /*
- * tail
- */
- blkCnt = blockSize & 0xF;
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- /* Compute Power and store result in a temporary variable, sum. */
- in = *pSrc++;
- sum += ((q15_t) in * in);
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- *pResult = sum;
-}
-#else
-void arm_power_q7(
- const q7_t * pSrc,
- uint32_t blockSize,
- q31_t * pResult)
-{
- uint32_t blkCnt; /* Loop counter */
- q31_t sum = 0; /* Temporary result storage */
- q7_t in; /* Temporary variable to store input value */
-
-#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
- q31_t in32; /* Temporary variable to store packed input value */
- q31_t in1, in2; /* Temporary variables to store input value */
-#endif
-
-#if defined (ARM_MATH_LOOPUNROLL)
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- /* Compute Power and store result in a temporary variable, sum. */
-#if defined (ARM_MATH_DSP)
- in32 = read_q7x4_ia (&pSrc);
-
- in1 = __SXTB16(__ROR(in32, 8));
- in2 = __SXTB16(in32);
-
- /* calculate power and accumulate to accumulator */
- sum = __SMLAD(in1, in1, sum);
- sum = __SMLAD(in2, in2, sum);
-#else
- in = *pSrc++;
- sum += ((q15_t) in * in);
-
- in = *pSrc++;
- sum += ((q15_t) in * in);
-
- in = *pSrc++;
- sum += ((q15_t) in * in);
-
- in = *pSrc++;
- sum += ((q15_t) in * in);
-#endif /* #if defined (ARM_MATH_DSP) */
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- /* Compute Power and store result in a temporary variable, sum. */
- in = *pSrc++;
- sum += ((q15_t) in * in);
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Store result in 18.14 format */
- *pResult = sum;
-}
-#endif /* defined(ARM_MATH_MVEI) */
-
-/**
- @} end of power group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_power_q7.c
+ * Description: Sum of the squares of the elements of a Q7 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup power
+ @{
+ */
+
+/**
+ @brief Sum of the squares of the elements of a Q7 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult sum of the squares value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 32-bit internal accumulator.
+ The input is represented in 1.7 format.
+ Intermediate multiplication yields a 2.14 format, and this
+ result is added without saturation to an accumulator in 18.14 format.
+ With 17 guard bits in the accumulator, there is no risk of overflow, and the
+ full precision of the intermediate multiplication is preserved.
+ Finally, the return result is in 18.14 format.
+ */
+
+void arm_power_q7(
+ const q7_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q31_t sum = 0; /* Temporary result storage */
+ q7_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
+ q31_t in32; /* Temporary variable to store packed input value */
+ q31_t in1, in2; /* Temporary variables to store input value */
+#endif
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute Power and store result in a temporary variable, sum. */
+#if defined (ARM_MATH_DSP)
+ in32 = read_q7x4_ia ((q7_t **) &pSrc);
+
+ in1 = __SXTB16(__ROR(in32, 8));
+ in2 = __SXTB16(in32);
+
+ /* calculate power and accumulate to accumulator */
+ sum = __SMLAD(in1, in1, sum);
+ sum = __SMLAD(in2, in2, sum);
+#else
+ in = *pSrc++;
+ sum += ((q15_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q15_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q15_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q15_t) in * in);
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute Power and store result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += ((q15_t) in * in);
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store result in 18.14 format */
+ *pResult = sum;
+}
+
+/**
+ @} end of power group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f32.c
index cb45752..4f316fb 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f32.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f32.c
@@ -1,192 +1,176 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_rms_f32.c
- * Description: Root mean square value of the elements of a floating-point vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @defgroup RMS Root mean square (RMS)
-
- Calculates the Root Mean Square of the elements in the input vector.
- The underlying algorithm is used:
-
- <pre>
- Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize));
- </pre>
-
- There are separate functions for floating point, Q31, and Q15 data types.
- */
-
-/**
- @addtogroup RMS
- @{
- */
-
-/**
- @brief Root Mean Square of the elements of a floating-point vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult root mean square value returned here
- @return none
- */
-
-#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_rms_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult)
-{
- float32_t pow = 0.0f;
-
- arm_power_f32(pSrc, blockSize, &pow);
-
- /* Compute Rms and store the result in the destination */
- arm_sqrt_f32(pow / (float32_t) blockSize, pResult);
-}
-#else
-#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_rms_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult)
-{
- float32_t sum = 0.0f; /* accumulator */
- float32_t in; /* Temporary variable to store input value */
- uint32_t blkCnt; /* loop counter */
-
- float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */
- float32x2_t sumV2;
- float32x4_t inV;
-
- blkCnt = blockSize >> 2U;
-
- /* Compute 4 outputs at a time.
- ** a second loop below computes the remaining 1 to 3 samples. */
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
- /* Compute Power and then store the result in a temporary variable, sum. */
- inV = vld1q_f32(pSrc);
- sumV = vmlaq_f32(sumV, inV, inV);
- pSrc += 4;
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
- sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
-
- /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
- ** No loop unrolling is used. */
- blkCnt = blockSize % 0x4U;
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
- /* compute power and then store the result in a temporary variable, sum. */
- in = *pSrc++;
- sum += in * in;
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- /* Compute Rms and store the result in the destination */
- arm_sqrt_f32(sum / (float32_t) blockSize, pResult);
-}
-#else
-void arm_rms_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult)
-{
- uint32_t blkCnt; /* Loop counter */
- float32_t sum = 0.0f; /* Temporary result storage */
- float32_t in; /* Temporary variable to store input value */
-
-#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- in = *pSrc++;
- /* Compute sum of squares and store result in a temporary variable, sum. */
- sum += in * in;
-
- in = *pSrc++;
- sum += in * in;
-
- in = *pSrc++;
- sum += in * in;
-
- in = *pSrc++;
- sum += in * in;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- in = *pSrc++;
- /* Compute sum of squares and store result in a temporary variable. */
- sum += ( in * in);
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Compute Rms and store result in destination */
- arm_sqrt_f32(sum / (float32_t) blockSize, pResult);
-}
-#endif /* #if defined(ARM_MATH_NEON) */
-#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
-
-/**
- @} end of RMS group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_rms_f32.c
+ * Description: Root mean square value of the elements of a floating-point vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @defgroup RMS Root mean square (RMS)
+
+ Calculates the Root Mean Square of the elements in the input vector.
+ The underlying algorithm is used:
+
+ <pre>
+ Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize));
+ </pre>
+
+ There are separate functions for floating point, Q31, and Q15 data types.
+ */
+
+/**
+ @addtogroup RMS
+ @{
+ */
+
+/**
+ @brief Root Mean Square of the elements of a floating-point vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult root mean square value returned here
+ @return none
+ */
+#if defined(ARM_MATH_NEON)
+void arm_rms_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t sum = 0.0f; /* accumulator */
+ float32_t in; /* Temporary variable to store input value */
+ uint32_t blkCnt; /* loop counter */
+
+ float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */
+ float32x2_t sumV2;
+ float32x4_t inV;
+
+ blkCnt = blockSize >> 2U;
+
+ /* Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ inV = vld1q_f32(pSrc);
+ sumV = vmlaq_f32(sumV, inV, inV);
+ pSrc += 4;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
+ sum = sumV2[0] + sumV2[1];
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* compute power and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Rms and store the result in the destination */
+ arm_sqrt_f32(sum / (float32_t) blockSize, pResult);
+}
+#else
+void arm_rms_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ float32_t sum = 0.0f; /* Temporary result storage */
+ float32_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable, sum. */
+ sum += in * in;
+
+ in = *pSrc++;
+ sum += in * in;
+
+ in = *pSrc++;
+ sum += in * in;
+
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable. */
+ sum += ( in * in);
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Rms and store result in destination */
+ arm_sqrt_f32(sum / (float32_t) blockSize, pResult);
+}
+#endif /* #if defined(ARM_MATH_NEON) */
+
+/**
+ @} end of RMS group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q15.c
index 1df17b1..c8a0a13 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q15.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q15.c
@@ -1,149 +1,134 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_rms_q15.c
- * Description: Root Mean Square of the elements of a Q15 vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @addtogroup RMS
- @{
- */
-
-/**
- @brief Root Mean Square of the elements of a Q15 vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult root mean square value returned here
- @return none
-
- @par Scaling and Overflow Behavior
- The function is implemented using a 64-bit internal accumulator.
- The input is represented in 1.15 format.
- Intermediate multiplication yields a 2.30 format, and this
- result is added without saturation to a 64-bit accumulator in 34.30 format.
- With 33 guard bits in the accumulator, there is no risk of overflow, and the
- full precision of the intermediate multiplication is preserved.
- Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
- 15 bits, and then saturated to yield a result in 1.15 format.
- */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_rms_q15(
- const q15_t * pSrc,
- uint32_t blockSize,
- q15_t * pResult)
-{
- q63_t pow = 0.0f;
- q15_t normalizedPower;
-
- arm_power_q15(pSrc, blockSize, &pow);
-
- normalizedPower=__SSAT((pow / (q63_t) blockSize) >> 15,16);
- arm_sqrt_q15(normalizedPower, pResult);
-}
-#else
-void arm_rms_q15(
- const q15_t * pSrc,
- uint32_t blockSize,
- q15_t * pResult)
-{
- uint32_t blkCnt; /* Loop counter */
- q63_t sum = 0; /* Temporary result storage */
- q15_t in; /* Temporary variable to store input value */
-
-#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
- q31_t in32; /* Temporary variable to store input value */
-#endif
-
-#if defined (ARM_MATH_LOOPUNROLL)
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- /* Compute sum of squares and store result in a temporary variable. */
-#if defined (ARM_MATH_DSP)
- in32 = read_q15x2_ia (&pSrc);
- sum = __SMLALD(in32, in32, sum);
-
- in32 = read_q15x2_ia (&pSrc);
- sum = __SMLALD(in32, in32, sum);
-#else
- in = *pSrc++;
- sum += ((q31_t) in * in);
-
- in = *pSrc++;
- sum += ((q31_t) in * in);
-
- in = *pSrc++;
- sum += ((q31_t) in * in);
-
- in = *pSrc++;
- sum += ((q31_t) in * in);
-#endif /* #if defined (ARM_MATH_DSP) */
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- in = *pSrc++;
- /* Compute sum of squares and store result in a temporary variable. */
- sum += ((q31_t) in * in);
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Truncating and saturating the accumulator to 1.15 format */
- /* Store result in destination */
- arm_sqrt_q15(__SSAT((sum / (q63_t)blockSize) >> 15, 16), pResult);
-}
-#endif /* defined(ARM_MATH_MVEI) */
-
-/**
- @} end of RMS group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_rms_q15.c
+ * Description: Root Mean Square of the elements of a Q15 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup RMS
+ @{
+ */
+
+/**
+ @brief Root Mean Square of the elements of a Q15 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult root mean square value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 64-bit internal accumulator.
+ The input is represented in 1.15 format.
+ Intermediate multiplication yields a 2.30 format, and this
+ result is added without saturation to a 64-bit accumulator in 34.30 format.
+ With 33 guard bits in the accumulator, there is no risk of overflow, and the
+ full precision of the intermediate multiplication is preserved.
+ Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
+ 15 bits, and then saturated to yield a result in 1.15 format.
+ */
+
+void arm_rms_q15(
+ const q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q63_t sum = 0; /* Temporary result storage */
+ q15_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
+ q31_t in32; /* Temporary variable to store input value */
+#endif
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute sum of squares and store result in a temporary variable. */
+#if defined (ARM_MATH_DSP)
+ in32 = read_q15x2_ia ((q15_t **) &pSrc);
+ sum = __SMLALD(in32, in32, sum);
+
+ in32 = read_q15x2_ia ((q15_t **) &pSrc);
+ sum = __SMLALD(in32, in32, sum);
+#else
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable. */
+ sum += ((q31_t) in * in);
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Truncating and saturating the accumulator to 1.15 format */
+ /* Store result in destination */
+ arm_sqrt_q15(__SSAT((sum / (q63_t)blockSize) >> 15, 16), pResult);
+}
+
+/**
+ @} end of RMS group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q31.c
index ba39b7b..4cf086f 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q31.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q31.c
@@ -1,141 +1,124 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_rms_q31.c
- * Description: Root Mean Square of the elements of a Q31 vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @addtogroup RMS
- @{
- */
-
-/**
- @brief Root Mean Square of the elements of a Q31 vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult root mean square value returned here
- @return none
-
- @par Scaling and Overflow Behavior
- The function is implemented using an internal 64-bit accumulator.
- The input is represented in 1.31 format, and intermediate multiplication
- yields a 2.62 format.
- The accumulator maintains full precision of the intermediate multiplication results,
- but provides only a single guard bit.
- There is no saturation on intermediate additions.
- If the accumulator overflows, it wraps around and distorts the result.
- In order to avoid overflows completely, the input signal must be scaled down by
- log2(blockSize) bits, as a total of blockSize additions are performed internally.
- Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.
- */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-void arm_rms_q31(
- const q31_t * pSrc,
- uint32_t blockSize,
- q31_t * pResult)
-{
- q63_t pow = 0.0f;
- q31_t normalizedPower;
- arm_power_q31(pSrc, blockSize, &pow);
-
- normalizedPower=clip_q63_to_q31((pow / (q63_t) blockSize) >> 17);
- arm_sqrt_q31(normalizedPower, pResult);
-
-}
-
-#else
-void arm_rms_q31(
- const q31_t * pSrc,
- uint32_t blockSize,
- q31_t * pResult)
-{
- uint32_t blkCnt; /* Loop counter */
- uint64_t sum = 0; /* Temporary result storage (can get never negative. changed type from q63 to uint64 */
- q31_t in; /* Temporary variable to store input value */
-
-#if defined (ARM_MATH_LOOPUNROLL)
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- in = *pSrc++;
- /* Compute sum of squares and store result in a temporary variable, sum. */
- sum += ((q63_t) in * in);
-
- in = *pSrc++;
- sum += ((q63_t) in * in);
-
- in = *pSrc++;
- sum += ((q63_t) in * in);
-
- in = *pSrc++;
- sum += ((q63_t) in * in);
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
-
- in = *pSrc++;
- /* Compute sum of squares and store result in a temporary variable. */
- sum += ((q63_t) in * in);
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Convert data in 2.62 to 1.31 by 31 right shifts and saturate */
- /* Compute Rms and store result in destination vector */
- arm_sqrt_q31(clip_q63_to_q31((sum / (q63_t) blockSize) >> 31), pResult);
-}
-#endif /* defined(ARM_MATH_MVEI) */
-
-/**
- @} end of RMS group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_rms_q31.c
+ * Description: Root Mean Square of the elements of a Q31 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup RMS
+ @{
+ */
+
+/**
+ @brief Root Mean Square of the elements of a Q31 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult root mean square value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using an internal 64-bit accumulator.
+ The input is represented in 1.31 format, and intermediate multiplication
+ yields a 2.62 format.
+ The accumulator maintains full precision of the intermediate multiplication results,
+ but provides only a single guard bit.
+ There is no saturation on intermediate additions.
+ If the accumulator overflows, it wraps around and distorts the result.
+ In order to avoid overflows completely, the input signal must be scaled down by
+ log2(blockSize) bits, as a total of blockSize additions are performed internally.
+ Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.
+ */
+
+void arm_rms_q31(
+ const q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ uint64_t sum = 0; /* Temporary result storage (can get never negative. changed type from q63 to uint64 */
+ q31_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable, sum. */
+ sum += ((q63_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in);
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable. */
+ sum += ((q63_t) in * in);
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Convert data in 2.62 to 1.31 by 31 right shifts and saturate */
+ /* Compute Rms and store result in destination vector */
+ arm_sqrt_q31(clip_q63_to_q31((sum / (q63_t) blockSize) >> 31), pResult);
+}
+
+/**
+ @} end of RMS group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f32.c
index 682443d..214a794 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f32.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f32.c
@@ -1,83 +1,188 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_std_f32.c
- * Description: Standard deviation of the elements of a floating-point vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @defgroup STD Standard deviation
-
- Calculates the standard deviation of the elements in the input vector.
-
- The float implementation is relying on arm_var_f32 which is using a two-pass algorithm
- to avoid problem of numerical instabilities and cancellation errors.
-
- Fixed point versions are using the standard textbook algorithm since the fixed point
- numerical behavior is different from the float one.
-
- Algorithm for fixed point versions is summarized below:
-
-
- <pre>
- Result = sqrt((sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1))
-
- sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]
- sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]
- </pre>
-
- There are separate functions for floating point, Q31, and Q15 data types.
- */
-
-/**
- @addtogroup STD
- @{
- */
-
-/**
- @brief Standard deviation of the elements of a floating-point vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult standard deviation value returned here
- @return none
- */
-void arm_std_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult)
-{
- float32_t var;
- arm_var_f32(pSrc,blockSize,&var);
- arm_sqrt_f32(var, pResult);
-}
-
-/**
- @} end of STD group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_std_f32.c
+ * Description: Standard deviation of the elements of a floating-point vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @defgroup STD Standard deviation
+
+ Calculates the standard deviation of the elements in the input vector.
+ The underlying algorithm is used:
+
+ <pre>
+ Result = sqrt((sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1))
+
+ sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]
+ sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]
+ </pre>
+
+ There are separate functions for floating point, Q31, and Q15 data types.
+ */
+
+/**
+ @addtogroup STD
+ @{
+ */
+
+/**
+ @brief Standard deviation of the elements of a floating-point vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult standard deviation value returned here
+ @return none
+ */
+#if defined(ARM_MATH_NEON_EXPERIMENTAL)
+void arm_std_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t var;
+ arm_var_f32(pSrc,blockSize,&var);
+ arm_sqrt_f32(var, pResult);
+}
+#else
+void arm_std_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ float32_t sum = 0.0f; /* Temporary result storage */
+ float32_t sumOfSquares = 0.0f; /* Sum of squares */
+ float32_t in; /* Temporary variable to store input value */
+
+#ifndef ARM_MATH_CM0_FAMILY
+ float32_t meanOfSquares, mean, squareOfMean; /* Temporary variables */
+#else
+ float32_t squareOfSum; /* Square of Sum */
+ float32_t var; /* Temporary varaince storage */
+#endif
+
+ if (blockSize <= 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ sumOfSquares += in * in;
+ /* Compute sum and store result in a temporary variable, sum. */
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += in * in;
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += in * in;
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += in * in;
+ sum += in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ sumOfSquares += ( in * in);
+ /* Compute sum and store result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+#ifndef ARM_MATH_CM0_FAMILY
+
+ /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
+ meanOfSquares = sumOfSquares / ((float32_t) blockSize - 1.0f);
+
+ /* Compute mean of all input values */
+ mean = sum / (float32_t) blockSize;
+
+ /* Compute square of mean */
+ squareOfMean = (mean * mean) * (((float32_t) blockSize) /
+ ((float32_t) blockSize - 1.0f));
+
+ /* Compute standard deviation and store result to destination */
+ arm_sqrt_f32((meanOfSquares - squareOfMean), pResult);
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ /* Compute square of sum */
+ squareOfSum = ((sum * sum) / (float32_t) blockSize);
+
+ /* Compute variance */
+ var = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f));
+
+ /* Compute standard deviation and store result in destination */
+ arm_sqrt_f32(var, pResult);
+
+#endif /* #ifndef ARM_MATH_CM0_FAMILY */
+
+}
+#endif /* #if defined(ARM_MATH_NEON) */
+
+/**
+ @} end of STD group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q15.c
index 88e273a..70d142d 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q15.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q15.c
@@ -1,173 +1,161 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_std_q15.c
- * Description: Standard deviation of an array of Q15 vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @addtogroup STD
- @{
- */
-
-/**
- @brief Standard deviation of the elements of a Q15 vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult standard deviation value returned here
- @return none
-
- @par Scaling and Overflow Behavior
- The function is implemented using a 64-bit internal accumulator.
- The input is represented in 1.15 format.
- Intermediate multiplication yields a 2.30 format, and this
- result is added without saturation to a 64-bit accumulator in 34.30 format.
- With 33 guard bits in the accumulator, there is no risk of overflow, and the
- full precision of the intermediate multiplication is preserved.
- Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
- 15 bits, and then saturated to yield a result in 1.15 format.
- */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_std_q15(
- const q15_t * pSrc,
- uint32_t blockSize,
- q15_t * pResult)
-{
- q15_t var=0;
-
- arm_var_q15(pSrc, blockSize, &var);
- arm_sqrt_q15(var,pResult);
-}
-#else
-void arm_std_q15(
- const q15_t * pSrc,
- uint32_t blockSize,
- q15_t * pResult)
-{
- uint32_t blkCnt; /* Loop counter */
- q31_t sum = 0; /* Accumulator */
- q31_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
- q63_t sumOfSquares = 0; /* Sum of squares */
- q15_t in; /* Temporary variable to store input value */
-
-#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
- q31_t in32; /* Temporary variable to store input value */
-#endif
-
- if (blockSize <= 1U)
- {
- *pResult = 0;
- return;
- }
-
-#if defined (ARM_MATH_LOOPUNROLL)
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
- /* C = A[0] + A[1] + ... + A[blockSize-1] */
-
- /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
- /* Compute sum and store result in a temporary variable, sum. */
-#if defined (ARM_MATH_DSP)
- in32 = read_q15x2_ia (&pSrc);
- sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
- sum += ((in32 << 16U) >> 16U);
- sum += (in32 >> 16U);
-
- in32 = read_q15x2_ia (&pSrc);
- sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
- sum += ((in32 << 16U) >> 16U);
- sum += (in32 >> 16U);
-#else
- in = *pSrc++;
- sumOfSquares += (in * in);
- sum += in;
-
- in = *pSrc++;
- sumOfSquares += (in * in);
- sum += in;
-
- in = *pSrc++;
- sumOfSquares += (in * in);
- sum += in;
-
- in = *pSrc++;
- sumOfSquares += (in * in);
- sum += in;
-#endif /* #if defined (ARM_MATH_DSP) */
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
- /* C = A[0] + A[1] + ... + A[blockSize-1] */
-
- in = *pSrc++;
- /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
- sumOfSquares += (in * in);
- /* Compute sum and store result in a temporary variable, sum. */
- sum += in;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
- meanOfSquares = (q31_t) (sumOfSquares / (q63_t)(blockSize - 1U));
-
- /* Compute square of mean */
- squareOfMean = (q31_t) ((q63_t) sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
-
- /* mean of squares minus the square of mean. */
- /* Compute standard deviation and store result in destination */
- arm_sqrt_q15(__SSAT((meanOfSquares - squareOfMean) >> 15U, 16U), pResult);
-}
-#endif /* defined(ARM_MATH_MVEI) */
-
-/**
- @} end of STD group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_std_q15.c
+ * Description: Standard deviation of an array of Q15 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup STD
+ @{
+ */
+
+/**
+ @brief Standard deviation of the elements of a Q15 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult standard deviation value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 64-bit internal accumulator.
+ The input is represented in 1.15 format.
+ Intermediate multiplication yields a 2.30 format, and this
+ result is added without saturation to a 64-bit accumulator in 34.30 format.
+ With 33 guard bits in the accumulator, there is no risk of overflow, and the
+ full precision of the intermediate multiplication is preserved.
+ Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
+ 15 bits, and then saturated to yield a result in 1.15 format.
+ */
+
+void arm_std_q15(
+ const q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q31_t sum = 0; /* Accumulator */
+ q31_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
+ q63_t sumOfSquares = 0; /* Sum of squares */
+ q15_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
+ q31_t in32; /* Temporary variable to store input value */
+#endif
+
+ if (blockSize <= 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ /* Compute sum and store result in a temporary variable, sum. */
+#if defined (ARM_MATH_DSP)
+ in32 = read_q15x2_ia ((q15_t **) &pSrc);
+ sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
+ sum += ((in32 << 16U) >> 16U);
+ sum += (in32 >> 16U);
+
+ in32 = read_q15x2_ia ((q15_t **) &pSrc);
+ sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
+ sum += ((in32 << 16U) >> 16U);
+ sum += (in32 >> 16U);
+#else
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+ sum += in;
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ sumOfSquares += (in * in);
+ /* Compute sum and store result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
+ meanOfSquares = (q31_t) (sumOfSquares / (q63_t)(blockSize - 1U));
+
+ /* Compute square of mean */
+ squareOfMean = (q31_t) ((q63_t) sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
+
+ /* mean of squares minus the square of mean. */
+ /* Compute standard deviation and store result in destination */
+ arm_sqrt_q15(__SSAT((meanOfSquares - squareOfMean) >> 15U, 16U), pResult);
+}
+
+/**
+ @} end of STD group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q31.c
index 63170e7..1452a69 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q31.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q31.c
@@ -1,159 +1,147 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_std_q31.c
- * Description: Standard deviation of the elements of a Q31 vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @addtogroup STD
- @{
- */
-
-/**
- @brief Standard deviation of the elements of a Q31 vector.
- @param[in] pSrc points to the input vector.
- @param[in] blockSize number of samples in input vector.
- @param[out] pResult standard deviation value returned here.
- @return none
-
- @par Scaling and Overflow Behavior
- The function is implemented using an internal 64-bit accumulator.
- The input is represented in 1.31 format, which is then downshifted by 8 bits
- which yields 1.23, and intermediate multiplication yields a 2.46 format.
- The accumulator maintains full precision of the intermediate multiplication results,
- but provides only a 16 guard bits.
- There is no saturation on intermediate additions.
- If the accumulator overflows it wraps around and distorts the result.
- In order to avoid overflows completely the input signal must be scaled down by
- log2(blockSize)-8 bits, as a total of blockSize additions are performed internally.
- After division, internal variables should be Q18.46
- Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value.
- */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_std_q31(
- const q31_t * pSrc,
- uint32_t blockSize,
- q31_t * pResult)
-{
- q31_t var=0;
-
- arm_var_q31(pSrc, blockSize, &var);
- arm_sqrt_q31(var, pResult);
-}
-#else
-void arm_std_q31(
- const q31_t * pSrc,
- uint32_t blockSize,
- q31_t * pResult)
-{
- uint32_t blkCnt; /* Loop counter */
- q63_t sum = 0; /* Accumulator */
- q63_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
- q63_t sumOfSquares = 0; /* Sum of squares */
- q31_t in; /* Temporary variable to store input value */
-
- if (blockSize <= 1U)
- {
- *pResult = 0;
- return;
- }
-
-#if defined (ARM_MATH_LOOPUNROLL)
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
- /* C = A[0] + A[1] + ... + A[blockSize-1] */
-
- in = *pSrc++ >> 8U;
- /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
- sumOfSquares += ((q63_t) (in) * (in));
- /* Compute sum and store result in a temporary variable, sum. */
- sum += in;
-
- in = *pSrc++ >> 8U;
- sumOfSquares += ((q63_t) (in) * (in));
- sum += in;
-
- in = *pSrc++ >> 8U;
- sumOfSquares += ((q63_t) (in) * (in));
- sum += in;
-
- in = *pSrc++ >> 8U;
- sumOfSquares += ((q63_t) (in) * (in));
- sum += in;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
- /* C = A[0] + A[1] + ... + A[blockSize-1] */
-
- in = *pSrc++ >> 8U;
- /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
- sumOfSquares += ((q63_t) (in) * (in));
- /* Compute sum and store result in a temporary variable, sum. */
- sum += in;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
- meanOfSquares = (sumOfSquares / (q63_t)(blockSize - 1U));
-
- /* Compute square of mean */
- squareOfMean = ( sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
-
- /* Compute standard deviation and store result in destination */
- arm_sqrt_q31((meanOfSquares - squareOfMean) >> 15U, pResult);
-}
-#endif /* defined(ARM_MATH_MVEI) */
-
-/**
- @} end of STD group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_std_q31.c
+ * Description: Standard deviation of the elements of a Q31 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup STD
+ @{
+ */
+
+/**
+ @brief Standard deviation of the elements of a Q31 vector.
+ @param[in] pSrc points to the input vector.
+ @param[in] blockSize number of samples in input vector.
+ @param[out] pResult standard deviation value returned here.
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using an internal 64-bit accumulator.
+ The input is represented in 1.31 format, which is then downshifted by 8 bits
+ which yields 1.23, and intermediate multiplication yields a 2.46 format.
+ The accumulator maintains full precision of the intermediate multiplication results,
+ but provides only a 16 guard bits.
+ There is no saturation on intermediate additions.
+ If the accumulator overflows it wraps around and distorts the result.
+ In order to avoid overflows completely the input signal must be scaled down by
+ log2(blockSize)-8 bits, as a total of blockSize additions are performed internally.
+ After division, internal variables should be Q18.46
+ Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value.
+ */
+
+void arm_std_q31(
+ const q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q63_t sum = 0; /* Accumulator */
+ q63_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
+ q63_t sumOfSquares = 0; /* Sum of squares */
+ q31_t in; /* Temporary variable to store input value */
+
+ if (blockSize <= 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ in = *pSrc++ >> 8U;
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ sumOfSquares += ((q63_t) (in) * (in));
+ /* Compute sum and store result in a temporary variable, sum. */
+ sum += in;
+
+ in = *pSrc++ >> 8U;
+ sumOfSquares += ((q63_t) (in) * (in));
+ sum += in;
+
+ in = *pSrc++ >> 8U;
+ sumOfSquares += ((q63_t) (in) * (in));
+ sum += in;
+
+ in = *pSrc++ >> 8U;
+ sumOfSquares += ((q63_t) (in) * (in));
+ sum += in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ in = *pSrc++ >> 8U;
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ sumOfSquares += ((q63_t) (in) * (in));
+ /* Compute sum and store result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
+ meanOfSquares = (sumOfSquares / (q63_t)(blockSize - 1U));
+
+ /* Compute square of mean */
+ squareOfMean = ( sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
+
+ /* Compute standard deviation and store result in destination */
+ arm_sqrt_q31((meanOfSquares - squareOfMean) >> 15U, pResult);
+}
+
+/**
+ @} end of STD group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f32.c
index 7ff344c..18f5806 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f32.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f32.c
@@ -1,293 +1,234 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_var_f32.c
- * Description: Variance of the elements of a floating-point vector
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @defgroup variance Variance
-
- Calculates the variance of the elements in the input vector.
- The underlying algorithm used is the direct method sometimes referred to as the two-pass method:
-
- <pre>
- Result = sum(element - meanOfElements)^2) / numElement - 1
-
- meanOfElements = ( pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] ) / blockSize
- </pre>
-
- There are separate functions for floating point, Q31, and Q15 data types.
- */
-
-/**
- @addtogroup variance
- @{
- */
-
-/**
- @brief Variance of the elements of a floating-point vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult variance value returned here
- @return none
- */
-#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
-
-#include "arm_helium_utils.h"
-
-void arm_var_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult)
-{
- uint32_t blkCnt; /* loop counters */
- f32x4_t vecSrc;
- f32x4_t sumVec = vdupq_n_f32(0.0f);
- float32_t fMean;
- float32_t sum = 0.0f; /* accumulator */
- float32_t in; /* Temporary variable to store input value */
-
- if (blockSize <= 1U) {
- *pResult = 0;
- return;
- }
-
- arm_mean_f32(pSrc, blockSize, &fMean);
-
- /* Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
- while (blkCnt > 0U)
- {
-
- vecSrc = vldrwq_f32(pSrc);
- /*
- * sum lanes
- */
- vecSrc = vsubq(vecSrc, fMean);
- sumVec = vfmaq(sumVec, vecSrc, vecSrc);
-
- blkCnt --;
- pSrc += 4;
- }
-
- sum = vecAddAcrossF32Mve(sumVec);
-
- /*
- * tail
- */
- blkCnt = blockSize & 0x3;
- while (blkCnt > 0U)
- {
- in = *pSrc++ - fMean;
- sum += in * in;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Variance */
- *pResult = sum / (float32_t) (blockSize - 1);
-}
-#else
-#if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_var_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult)
-{
- float32_t mean;
-
- float32_t sum = 0.0f; /* accumulator */
- float32_t in; /* Temporary variable to store input value */
- uint32_t blkCnt; /* loop counter */
-
- float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */
- float32x2_t sumV2;
- float32x4_t inV;
- float32x4_t avg;
-
- arm_mean_f32(pSrc,blockSize,&mean);
- avg = vdupq_n_f32(mean);
-
- blkCnt = blockSize >> 2U;
-
- /* Compute 4 outputs at a time.
- ** a second loop below computes the remaining 1 to 3 samples. */
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
- /* Compute Power and then store the result in a temporary variable, sum. */
- inV = vld1q_f32(pSrc);
- inV = vsubq_f32(inV, avg);
- sumV = vmlaq_f32(sumV, inV, inV);
- pSrc += 4;
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
- sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
-
- /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
- ** No loop unrolling is used. */
- blkCnt = blockSize % 0x4U;
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
- /* compute power and then store the result in a temporary variable, sum. */
- in = *pSrc++;
- in = in - mean;
- sum += in * in;
-
- /* Decrement the loop counter */
- blkCnt--;
- }
-
- /* Variance */
- *pResult = sum / (float32_t)(blockSize - 1.0f);
-
-}
-
-#else
-void arm_var_f32(
- const float32_t * pSrc,
- uint32_t blockSize,
- float32_t * pResult)
-{
- uint32_t blkCnt; /* Loop counter */
- float32_t sum = 0.0f; /* Temporary result storage */
- float32_t fSum = 0.0f;
- float32_t fMean, fValue;
- const float32_t * pInput = pSrc;
-
- if (blockSize <= 1U)
- {
- *pResult = 0;
- return;
- }
-
-#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
-
- while (blkCnt > 0U)
- {
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
-
- sum += *pInput++;
- sum += *pInput++;
- sum += *pInput++;
- sum += *pInput++;
-
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
-
- sum += *pInput++;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
- fMean = sum / (float32_t) blockSize;
-
- pInput = pSrc;
-
-#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
-
- while (blkCnt > 0U)
- {
- fValue = *pInput++ - fMean;
- fSum += fValue * fValue;
-
- fValue = *pInput++ - fMean;
- fSum += fValue * fValue;
-
- fValue = *pInput++ - fMean;
- fSum += fValue * fValue;
-
- fValue = *pInput++ - fMean;
- fSum += fValue * fValue;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- fValue = *pInput++ - fMean;
- fSum += fValue * fValue;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Variance */
- *pResult = fSum / (float32_t)(blockSize - 1.0f);
-}
-#endif /* #if defined(ARM_MATH_NEON) */
-#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
-
-/**
- @} end of variance group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_var_f32.c
+ * Description: Variance of the elements of a floating-point vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @defgroup variance Variance
+
+ Calculates the variance of the elements in the input vector.
+ The underlying algorithm used is the direct method sometimes referred to as the two-pass method:
+
+ <pre>
+ Result = sum(element - meanOfElements)^2) / numElement - 1
+
+ meanOfElements = ( pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] ) / blockSize
+ </pre>
+
+ There are separate functions for floating point, Q31, and Q15 data types.
+ */
+
+/**
+ @addtogroup variance
+ @{
+ */
+
+/**
+ @brief Variance of the elements of a floating-point vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult variance value returned here
+ @return none
+ */
+#if defined(ARM_MATH_NEON_EXPERIMENTAL)
+void arm_var_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t mean;
+
+ float32_t sum = 0.0f; /* accumulator */
+ float32_t in; /* Temporary variable to store input value */
+ uint32_t blkCnt; /* loop counter */
+
+ float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */
+ float32x2_t sumV2;
+ float32x4_t inV;
+ float32x4_t avg;
+
+ arm_mean_f32(pSrc,blockSize,&mean);
+ avg = vdupq_n_f32(mean);
+
+ blkCnt = blockSize >> 2U;
+
+ /* Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ inV = vld1q_f32(pSrc);
+ inV = vsubq_f32(inV, avg);
+ sumV = vmlaq_f32(sumV, inV, inV);
+ pSrc += 4;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
+ sum = sumV2[0] + sumV2[1];
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* compute power and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ in = in - mean;
+ sum += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Variance */
+ *pResult = sum / (float32_t)(blockSize - 1.0f);
+
+}
+
+#else
+void arm_var_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ float32_t sum = 0.0f; /* Temporary result storage */
+ float32_t fSum = 0.0f;
+ float32_t fMean, fValue;
+ const float32_t * pInput = pSrc;
+
+ if (blockSize <= 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+
+ sum += *pInput++;
+ sum += *pInput++;
+ sum += *pInput++;
+ sum += *pInput++;
+
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+
+ sum += *pInput++;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ fMean = sum / (float32_t) blockSize;
+
+ pInput = pSrc;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ fValue = *pInput++ - fMean;
+ fSum += fValue * fValue;
+
+ fValue = *pInput++ - fMean;
+ fSum += fValue * fValue;
+
+ fValue = *pInput++ - fMean;
+ fSum += fValue * fValue;
+
+ fValue = *pInput++ - fMean;
+ fSum += fValue * fValue;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ fValue = *pInput++ - fMean;
+ fSum += fValue * fValue;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Variance */
+ *pResult = fSum / (float32_t)(blockSize - 1.0f);
+}
+#endif /* #if defined(ARM_MATH_NEON) */
+
+/**
+ @} end of variance group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q15.c
index f020c88..4218522 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q15.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q15.c
@@ -1,230 +1,164 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_var_q15.c
- * Description: Variance of an array of Q15 type
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @addtogroup variance
- @{
- */
-
-/**
- @brief Variance of the elements of a Q15 vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult variance value returned here
- @return none
-
- @par Scaling and Overflow Behavior
- The function is implemented using a 64-bit internal accumulator.
- The input is represented in 1.15 format.
- Intermediate multiplication yields a 2.30 format, and this
- result is added without saturation to a 64-bit accumulator in 34.30 format.
- With 33 guard bits in the accumulator, there is no risk of overflow, and the
- full precision of the intermediate multiplication is preserved.
- Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
- 15 bits, and then saturated to yield a result in 1.15 format.
- */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_var_q15(
- const q15_t * pSrc,
- uint32_t blockSize,
- q15_t * pResult)
-{
- uint32_t blkCnt; /* loop counters */
- q15x8_t vecSrc;
- q63_t sumOfSquares = 0LL;
- q63_t meanOfSquares, squareOfMean; /* square of mean and mean of square */
- q63_t sum = 0LL;
- q15_t in;
-
- if (blockSize <= 1U) {
- *pResult = 0;
- return;
- }
-
-
- blkCnt = blockSize >> 3;
- while (blkCnt > 0U)
- {
- vecSrc = vldrhq_s16(pSrc);
- /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
- /* Compute Sum of squares of the input samples
- * and then store the result in a temporary variable, sumOfSquares. */
-
- sumOfSquares = vmlaldavaq_s16(sumOfSquares, vecSrc, vecSrc);
- sum = vaddvaq_s16(sum, vecSrc);
-
- blkCnt --;
- pSrc += 8;
- }
-
- /* Tail */
- blkCnt = blockSize & 7;
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
- /* C = A[0] + A[1] + ... + A[blockSize-1] */
-
- in = *pSrc++;
- /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
-#if defined (ARM_MATH_DSP)
- sumOfSquares = __SMLALD(in, in, sumOfSquares);
-#else
- sumOfSquares += (in * in);
-#endif /* #if defined (ARM_MATH_DSP) */
- /* Compute sum and store result in a temporary variable, sum. */
- sum += in;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Compute Mean of squares of the input samples
- * and then store the result in a temporary variable, meanOfSquares. */
- meanOfSquares = arm_div_q63_to_q31(sumOfSquares, (blockSize - 1U));
-
- /* Compute square of mean */
- squareOfMean = arm_div_q63_to_q31((q63_t)sum * sum, (q31_t)(blockSize * (blockSize - 1U)));
-
- /* mean of the squares minus the square of the mean. */
- *pResult = (meanOfSquares - squareOfMean) >> 15;
-}
-#else
-void arm_var_q15(
- const q15_t * pSrc,
- uint32_t blockSize,
- q15_t * pResult)
-{
- uint32_t blkCnt; /* Loop counter */
- q31_t sum = 0; /* Accumulator */
- q31_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
- q63_t sumOfSquares = 0; /* Sum of squares */
- q15_t in; /* Temporary variable to store input value */
-
-#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
- q31_t in32; /* Temporary variable to store input value */
-#endif
-
- if (blockSize <= 1U)
- {
- *pResult = 0;
- return;
- }
-
-#if defined (ARM_MATH_LOOPUNROLL)
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
- /* C = A[0] + A[1] + ... + A[blockSize-1] */
-
- /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
- /* Compute sum and store result in a temporary variable, sum. */
-#if defined (ARM_MATH_DSP)
- in32 = read_q15x2_ia (&pSrc);
- sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
- sum += ((in32 << 16U) >> 16U);
- sum += (in32 >> 16U);
-
- in32 = read_q15x2_ia (&pSrc);
- sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
- sum += ((in32 << 16U) >> 16U);
- sum += (in32 >> 16U);
-#else
- in = *pSrc++;
- sumOfSquares += (in * in);
- sum += in;
-
- in = *pSrc++;
- sumOfSquares += (in * in);
- sum += in;
-
- in = *pSrc++;
- sumOfSquares += (in * in);
- sum += in;
-
- in = *pSrc++;
- sumOfSquares += (in * in);
- sum += in;
-#endif /* #if defined (ARM_MATH_DSP) */
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
- /* C = A[0] + A[1] + ... + A[blockSize-1] */
-
- in = *pSrc++;
- /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
-#if defined (ARM_MATH_DSP)
- sumOfSquares = __SMLALD(in, in, sumOfSquares);
-#else
- sumOfSquares += (in * in);
-#endif /* #if defined (ARM_MATH_DSP) */
- /* Compute sum and store result in a temporary variable, sum. */
- sum += in;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
- meanOfSquares = (q31_t) (sumOfSquares / (q63_t)(blockSize - 1U));
-
- /* Compute square of mean */
- squareOfMean = (q31_t) ((q63_t) sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
-
- /* mean of squares minus the square of mean. */
- *pResult = (meanOfSquares - squareOfMean) >> 15U;
-}
-#endif /* defined(ARM_MATH_MVEI) */
-
-/**
- @} end of variance group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_var_q15.c
+ * Description: Variance of an array of Q15 type
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup variance
+ @{
+ */
+
+/**
+ @brief Variance of the elements of a Q15 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult variance value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 64-bit internal accumulator.
+ The input is represented in 1.15 format.
+ Intermediate multiplication yields a 2.30 format, and this
+ result is added without saturation to a 64-bit accumulator in 34.30 format.
+ With 33 guard bits in the accumulator, there is no risk of overflow, and the
+ full precision of the intermediate multiplication is preserved.
+ Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
+ 15 bits, and then saturated to yield a result in 1.15 format.
+ */
+
+void arm_var_q15(
+ const q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q31_t sum = 0; /* Accumulator */
+ q31_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
+ q63_t sumOfSquares = 0; /* Sum of squares */
+ q15_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
+ q31_t in32; /* Temporary variable to store input value */
+#endif
+
+ if (blockSize <= 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ /* Compute sum and store result in a temporary variable, sum. */
+#if defined (ARM_MATH_DSP)
+ in32 = read_q15x2_ia ((q15_t **) &pSrc);
+ sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
+ sum += ((in32 << 16U) >> 16U);
+ sum += (in32 >> 16U);
+
+ in32 = read_q15x2_ia ((q15_t **) &pSrc);
+ sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
+ sum += ((in32 << 16U) >> 16U);
+ sum += (in32 >> 16U);
+#else
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+ sum += in;
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+#if defined (ARM_MATH_DSP)
+ sumOfSquares = __SMLALD(in, in, sumOfSquares);
+#else
+ sumOfSquares += (in * in);
+#endif /* #if defined (ARM_MATH_DSP) */
+ /* Compute sum and store result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
+ meanOfSquares = (q31_t) (sumOfSquares / (q63_t)(blockSize - 1U));
+
+ /* Compute square of mean */
+ squareOfMean = (q31_t) ((q63_t) sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
+
+ /* mean of squares minus the square of mean. */
+ *pResult = (meanOfSquares - squareOfMean) >> 15U;
+}
+
+/**
+ @} end of variance group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q31.c
index 45b6b66..6b552db 100644
--- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q31.c
+++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q31.c
@@ -1,214 +1,147 @@
-/* ----------------------------------------------------------------------
- * Project: CMSIS DSP Library
- * Title: arm_var_q31.c
- * Description: Variance of an array of Q31 type
- *
- * $Date: 23 April 2021
- * $Revision: V1.9.0
- *
- * Target Processor: Cortex-M and Cortex-A cores
- * -------------------------------------------------------------------- */
-/*
- * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the License); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an AS IS BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dsp/statistics_functions.h"
-
-/**
- @ingroup groupStats
- */
-
-/**
- @addtogroup variance
- @{
- */
-
-/**
- @brief Variance of the elements of a Q31 vector.
- @param[in] pSrc points to the input vector
- @param[in] blockSize number of samples in input vector
- @param[out] pResult variance value returned here
- @return none
-
- @par Scaling and Overflow Behavior
- The function is implemented using an internal 64-bit accumulator.
- The input is represented in 1.31 format, which is then downshifted by 8 bits
- which yields 1.23, and intermediate multiplication yields a 2.46 format.
- The accumulator maintains full precision of the intermediate multiplication results,
- and as a consequence has only 16 guard bits.
- There is no saturation on intermediate additions.
- If the accumulator overflows it wraps around and distorts the result.
- In order to avoid overflows completely the input signal must be scaled down by
- log2(blockSize)-8 bits, as a total of blockSize additions are performed internally.
- After division, internal variables should be Q18.46
- Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value.
- */
-#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
-void arm_var_q31(
- const q31_t * pSrc,
- uint32_t blockSize,
- q31_t * pResult)
-{
- uint32_t blkCnt; /* loop counters */
- q31x4_t vecSrc;
- q63_t sumOfSquares = 0LL;
- q63_t meanOfSquares, squareOfMean; /* square of mean and mean of square */
- q63_t sum = 0LL;
- q31_t in;
-
- if (blockSize <= 1U) {
- *pResult = 0;
- return;
- }
-
-
- /* Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
- while (blkCnt > 0U)
- {
- vecSrc = vldrwq_s32(pSrc);
- /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
- /* Compute Sum of squares of the input samples
- * and then store the result in a temporary variable, sumOfSquares. */
-
- /* downscale */
- vecSrc = vshrq(vecSrc, 8);
- sumOfSquares = vmlaldavaq(sumOfSquares, vecSrc, vecSrc);
- sum = vaddlvaq(sum, vecSrc);
-
- blkCnt --;
- pSrc += 4;
- }
-
-
- /*
- * tail
- */
- blkCnt = blockSize & 0x3;
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
- /* C = A[0] + A[1] + ... + A[blockSize-1] */
-
- in = *pSrc++ >> 8U;
- /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
- sumOfSquares += ((q63_t) (in) * (in));
- /* Compute sum and store result in a temporary variable, sum. */
- sum += in;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Compute Mean of squares of the input samples
- * and then store the result in a temporary variable, meanOfSquares. */
- meanOfSquares = sumOfSquares / (q63_t) (blockSize - 1U);
-
- /* Compute square of mean */
- squareOfMean = sum * sum / (q63_t) (blockSize * (blockSize - 1U));
-
- /* Compute standard deviation and then store the result to the destination */
- *pResult = asrl(meanOfSquares - squareOfMean, 15U);
-}
-#else
-void arm_var_q31(
- const q31_t * pSrc,
- uint32_t blockSize,
- q31_t * pResult)
-{
- uint32_t blkCnt; /* Loop counter */
- q63_t sum = 0; /* Temporary result storage */
- q63_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
- q63_t sumOfSquares = 0; /* Sum of squares */
- q31_t in; /* Temporary variable to store input value */
-
- if (blockSize <= 1U)
- {
- *pResult = 0;
- return;
- }
-
-#if defined (ARM_MATH_LOOPUNROLL)
-
- /* Loop unrolling: Compute 4 outputs at a time */
- blkCnt = blockSize >> 2U;
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
- /* C = A[0] + A[1] + ... + A[blockSize-1] */
-
- in = *pSrc++ >> 8U;
- /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
- sumOfSquares += ((q63_t) (in) * (in));
- /* Compute sum and store result in a temporary variable, sum. */
- sum += in;
-
- in = *pSrc++ >> 8U;
- sumOfSquares += ((q63_t) (in) * (in));
- sum += in;
-
- in = *pSrc++ >> 8U;
- sumOfSquares += ((q63_t) (in) * (in));
- sum += in;
-
- in = *pSrc++ >> 8U;
- sumOfSquares += ((q63_t) (in) * (in));
- sum += in;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Loop unrolling: Compute remaining outputs */
- blkCnt = blockSize % 0x4U;
-
-#else
-
- /* Initialize blkCnt with number of samples */
- blkCnt = blockSize;
-
-#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
-
- while (blkCnt > 0U)
- {
- /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
- /* C = A[0] + A[1] + ... + A[blockSize-1] */
-
- in = *pSrc++ >> 8U;
- /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
- sumOfSquares += ((q63_t) (in) * (in));
- /* Compute sum and store result in a temporary variable, sum. */
- sum += in;
-
- /* Decrement loop counter */
- blkCnt--;
- }
-
- /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
- meanOfSquares = (sumOfSquares / (q63_t)(blockSize - 1U));
-
- /* Compute square of mean */
- squareOfMean = ( sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
-
- /* Compute variance and store result in destination */
- *pResult = (meanOfSquares - squareOfMean) >> 15U;
-}
-#endif
-/**
- @} end of variance group
- */
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_var_q31.c
+ * Description: Variance of an array of Q31 type
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup variance
+ @{
+ */
+
+/**
+ @brief Variance of the elements of a Q31 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult variance value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using an internal 64-bit accumulator.
+ The input is represented in 1.31 format, which is then downshifted by 8 bits
+ which yields 1.23, and intermediate multiplication yields a 2.46 format.
+ The accumulator maintains full precision of the intermediate multiplication results,
+ but provides only a 16 guard bits.
+ There is no saturation on intermediate additions.
+ If the accumulator overflows it wraps around and distorts the result.
+ In order to avoid overflows completely the input signal must be scaled down by
+ log2(blockSize)-8 bits, as a total of blockSize additions are performed internally.
+ After division, internal variables should be Q18.46
+ Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value.
+ */
+
+void arm_var_q31(
+ const q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q63_t sum = 0; /* Temporary result storage */
+ q63_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
+ q63_t sumOfSquares = 0; /* Sum of squares */
+ q31_t in; /* Temporary variable to store input value */
+
+ if (blockSize <= 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ in = *pSrc++ >> 8U;
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ sumOfSquares += ((q63_t) (in) * (in));
+ /* Compute sum and store result in a temporary variable, sum. */
+ sum += in;
+
+ in = *pSrc++ >> 8U;
+ sumOfSquares += ((q63_t) (in) * (in));
+ sum += in;
+
+ in = *pSrc++ >> 8U;
+ sumOfSquares += ((q63_t) (in) * (in));
+ sum += in;
+
+ in = *pSrc++ >> 8U;
+ sumOfSquares += ((q63_t) (in) * (in));
+ sum += in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ in = *pSrc++ >> 8U;
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ sumOfSquares += ((q63_t) (in) * (in));
+ /* Compute sum and store result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
+ meanOfSquares = (sumOfSquares / (q63_t)(blockSize - 1U));
+
+ /* Compute square of mean */
+ squareOfMean = ( sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
+
+ /* Compute variance and store result in destination */
+ *pResult = (meanOfSquares - squareOfMean) >> 15U;
+}
+
+/**
+ @} end of variance group
+ */