diff options
author | Clyne Sullivan <clyne@bitgloo.com> | 2025-02-02 11:26:53 -0500 |
---|---|---|
committer | Clyne Sullivan <clyne@bitgloo.com> | 2025-02-02 11:26:53 -0500 |
commit | 9c59a184dba820975e5da6fcd5d248aee87f7e2f (patch) | |
tree | 6b30516adc2ba0f7b0a8f5fb5d2e6966c03108d8 /Drivers/CMSIS/DSP/Source/StatisticsFunctions | |
parent | d09f4289b5788d6a8b34e424841292e2b8529e56 (diff) |
add l476 implementationl476
Diffstat (limited to 'Drivers/CMSIS/DSP/Source/StatisticsFunctions')
27 files changed, 4040 insertions, 5335 deletions
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/CMakeLists.txt b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/CMakeLists.txt index 480985f..1a56e61 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/CMakeLists.txt +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/CMakeLists.txt @@ -1,117 +1,16 @@ -cmake_minimum_required (VERSION 3.14) - -project(CMSISDSPStatistics) - -include(configLib) -include(configDsp) - - - -add_library(CMSISDSPStatistics STATIC) - -target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f64.c) -target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f64.c) -target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_dot_prod_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_max_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_max_f64.c) -target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f64.c) -target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_f64.c) -target_sources(CMSISDSPStatistics PRIVATE arm_max_q15.c) -target_sources(CMSISDSPStatistics PRIVATE arm_max_q31.c) -target_sources(CMSISDSPStatistics PRIVATE arm_max_q7.c) -target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_q31.c) -target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_q15.c) -target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_q7.c) -target_sources(CMSISDSPStatistics PRIVATE arm_mean_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_mean_f64.c) -target_sources(CMSISDSPStatistics PRIVATE arm_mean_q15.c) -target_sources(CMSISDSPStatistics PRIVATE arm_mean_q31.c) -target_sources(CMSISDSPStatistics PRIVATE arm_mean_q7.c) -target_sources(CMSISDSPStatistics PRIVATE arm_min_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_min_f64.c) -target_sources(CMSISDSPStatistics PRIVATE arm_min_q15.c) -target_sources(CMSISDSPStatistics PRIVATE arm_min_q31.c) -target_sources(CMSISDSPStatistics PRIVATE arm_min_q7.c) -target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_q31.c) -target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_q15.c) -target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_q7.c) -target_sources(CMSISDSPStatistics PRIVATE arm_power_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_power_f64.c) -target_sources(CMSISDSPStatistics PRIVATE arm_power_q15.c) -target_sources(CMSISDSPStatistics PRIVATE arm_power_q31.c) -target_sources(CMSISDSPStatistics PRIVATE arm_power_q7.c) -target_sources(CMSISDSPStatistics PRIVATE arm_rms_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_rms_q15.c) -target_sources(CMSISDSPStatistics PRIVATE arm_rms_q31.c) -target_sources(CMSISDSPStatistics PRIVATE arm_std_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_std_f64.c) -target_sources(CMSISDSPStatistics PRIVATE arm_std_q15.c) -target_sources(CMSISDSPStatistics PRIVATE arm_std_q31.c) -target_sources(CMSISDSPStatistics PRIVATE arm_var_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_var_f64.c) -target_sources(CMSISDSPStatistics PRIVATE arm_var_q15.c) -target_sources(CMSISDSPStatistics PRIVATE arm_var_q31.c) - -target_sources(CMSISDSPStatistics PRIVATE arm_absmax_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmax_f64.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmax_q15.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmax_q31.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmax_q7.c) - -target_sources(CMSISDSPStatistics PRIVATE arm_absmin_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmin_f64.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmin_q15.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmin_q31.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmin_q7.c) - -target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_f64.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_q15.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_q31.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_q7.c) - -target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_f64.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q15.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q31.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q7.c) -target_sources(CMSISDSPStatistics PRIVATE arm_mse_q7.c) -target_sources(CMSISDSPStatistics PRIVATE arm_mse_q15.c) -target_sources(CMSISDSPStatistics PRIVATE arm_mse_q31.c) -target_sources(CMSISDSPStatistics PRIVATE arm_mse_f16.c) -target_sources(CMSISDSPStatistics PRIVATE arm_mse_f32.c) -target_sources(CMSISDSPStatistics PRIVATE arm_mse_f64.c) - -configLib(CMSISDSPStatistics ${ROOT}) -configDsp(CMSISDSPStatistics ${ROOT}) - -### Includes -target_include_directories(CMSISDSPStatistics PUBLIC "${DSP}/Include") - - - -if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16)) -target_sources(CMSISDSPStatistics PRIVATE arm_max_f16.c) -target_sources(CMSISDSPStatistics PRIVATE arm_min_f16.c) -target_sources(CMSISDSPStatistics PRIVATE arm_mean_f16.c) -target_sources(CMSISDSPStatistics PRIVATE arm_power_f16.c) -target_sources(CMSISDSPStatistics PRIVATE arm_rms_f16.c) -target_sources(CMSISDSPStatistics PRIVATE arm_std_f16.c) -target_sources(CMSISDSPStatistics PRIVATE arm_var_f16.c) -target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f16.c) -target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f16.c) -target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_dot_prod_f16.c) -target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_f16.c) -target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f16.c) -target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_f16.c) - -target_sources(CMSISDSPStatistics PRIVATE arm_absmax_f16.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmin_f16.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_f16.c) -target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_f16.c) -endif()
\ No newline at end of file +cmake_minimum_required (VERSION 3.6)
+
+project(CMSISDSPStatistics)
+
+
+file(GLOB SRC "./*_*.c")
+
+add_library(CMSISDSPStatistics STATIC ${SRC})
+
+configdsp(CMSISDSPStatistics ..)
+
+### Includes
+target_include_directories(CMSISDSPStatistics PUBLIC "${DSP}/../../Include")
+
+
+
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c index 5446992..ae74d02 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c @@ -1,100 +1,53 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: StatisticsFunctions.c - * Description: Combination of all statistics function source files. - * - * $Date: 16. March 2020 - * $Revision: V1.1.0 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "arm_entropy_f32.c" -#include "arm_entropy_f64.c" -#include "arm_kullback_leibler_f32.c" -#include "arm_kullback_leibler_f64.c" -#include "arm_logsumexp_dot_prod_f32.c" -#include "arm_logsumexp_f32.c" -#include "arm_max_f32.c" -#include "arm_max_f64.c" -#include "arm_max_q15.c" -#include "arm_max_q31.c" -#include "arm_max_q7.c" -#include "arm_max_no_idx_f32.c" -#include "arm_max_no_idx_f64.c" -#include "arm_max_no_idx_q31.c" -#include "arm_max_no_idx_q15.c" -#include "arm_max_no_idx_q7.c" -#include "arm_mean_f32.c" -#include "arm_mean_f64.c" -#include "arm_mean_q15.c" -#include "arm_mean_q31.c" -#include "arm_mean_q7.c" -#include "arm_min_f32.c" -#include "arm_min_f64.c" -#include "arm_min_q15.c" -#include "arm_min_q31.c" -#include "arm_min_q7.c" -#include "arm_min_no_idx_f32.c" -#include "arm_min_no_idx_f64.c" -#include "arm_min_no_idx_q31.c" -#include "arm_min_no_idx_q15.c" -#include "arm_min_no_idx_q7.c" -#include "arm_power_f32.c" -#include "arm_power_f64.c" -#include "arm_power_q15.c" -#include "arm_power_q31.c" -#include "arm_power_q7.c" -#include "arm_rms_f32.c" -#include "arm_rms_q15.c" -#include "arm_rms_q31.c" -#include "arm_std_f32.c" -#include "arm_std_f64.c" -#include "arm_std_q15.c" -#include "arm_std_q31.c" -#include "arm_var_f32.c" -#include "arm_var_f64.c" -#include "arm_var_q15.c" -#include "arm_var_q31.c" -#include "arm_absmax_f32.c" -#include "arm_absmax_f64.c" -#include "arm_absmax_q15.c" -#include "arm_absmax_q31.c" -#include "arm_absmax_q7.c" -#include "arm_absmin_f32.c" -#include "arm_absmin_f64.c" -#include "arm_absmin_q15.c" -#include "arm_absmin_q31.c" -#include "arm_absmin_q7.c" -#include "arm_absmax_no_idx_f32.c" -#include "arm_absmax_no_idx_f64.c" -#include "arm_absmax_no_idx_q15.c" -#include "arm_absmax_no_idx_q31.c" -#include "arm_absmax_no_idx_q7.c" -#include "arm_absmin_no_idx_f32.c" -#include "arm_absmin_no_idx_f64.c" -#include "arm_absmin_no_idx_q15.c" -#include "arm_absmin_no_idx_q31.c" -#include "arm_absmin_no_idx_q7.c" -#include "arm_mse_q7.c" -#include "arm_mse_q15.c" -#include "arm_mse_q31.c" -#include "arm_mse_f32.c" -#include "arm_mse_f64.c" +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: StatisticsFunctions.c
+ * Description: Combination of all statistics function source files.
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.0.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_max_f32.c"
+#include "arm_max_q15.c"
+#include "arm_max_q31.c"
+#include "arm_max_q7.c"
+#include "arm_mean_f32.c"
+#include "arm_mean_q15.c"
+#include "arm_mean_q31.c"
+#include "arm_mean_q7.c"
+#include "arm_min_f32.c"
+#include "arm_min_q15.c"
+#include "arm_min_q31.c"
+#include "arm_min_q7.c"
+#include "arm_power_f32.c"
+#include "arm_power_q15.c"
+#include "arm_power_q31.c"
+#include "arm_power_q7.c"
+#include "arm_rms_f32.c"
+#include "arm_rms_q15.c"
+#include "arm_rms_q31.c"
+#include "arm_std_f32.c"
+#include "arm_std_q15.c"
+#include "arm_std_q31.c"
+#include "arm_var_f32.c"
+#include "arm_var_q15.c"
+#include "arm_var_q31.c"
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f32.c index d82b039..f6ffafe 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f32.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f32.c @@ -1,365 +1,271 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_max_f32.c - * Description: Maximum value of a floating-point vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" -#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) -#include <limits.h> -#endif - -/** - @ingroup groupStats - */ - -/** - @defgroup Max Maximum - - Computes the maximum value of an array of data. - The function returns both the maximum value and its position within the array. - There are separate functions for floating-point, Q31, Q15, and Q7 data types. - */ - -/** - @addtogroup Max - @{ - */ - -/** - @brief Maximum value of a floating-point vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult maximum value returned here - @param[out] pIndex index of maximum value returned here - @return none - */ - -#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) -void arm_max_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult, - uint32_t * pIndex) -{ - uint32_t blkCnt; - f32x4_t vecSrc; - f32x4_t curExtremValVec = vdupq_n_f32(F32_MIN); - float32_t maxValue = F32_MIN; - uint32_t idx = blockSize; - uint32x4_t indexVec; - uint32x4_t curExtremIdxVec; - uint32_t curIdx = 0; - mve_pred16_t p0; - float32_t tmp; - - - indexVec = vidupq_wb_u32(&curIdx, 1); - curExtremIdxVec = vdupq_n_u32(0); - - /* Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - while (blkCnt > 0U) - { - vecSrc = vldrwq_f32(pSrc); - /* - * Get current max per lane and current index per lane - * when a max is selected - */ - p0 = vcmpgeq(vecSrc, curExtremValVec); - curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); - curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); - - indexVec = vidupq_wb_u32(&curIdx, 1); - - pSrc += 4; - /* Decrement the loop counter */ - blkCnt--; - } - - - /* - * Get max value across the vector - */ - maxValue = vmaxnmvq(maxValue, curExtremValVec); - /* - * set index for lower values to max possible index - */ - p0 = vcmpgeq(curExtremValVec, maxValue); - indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0); - /* - * Get min index which is thus for a max value - */ - idx = vminvq(idx, indexVec); - - /* Tail */ - blkCnt = blockSize & 0x3; - - while (blkCnt > 0U) - { - /* Initialize tmp to the next consecutive values one by one */ - tmp = *pSrc++; - - /* compare for the maximum value */ - if (maxValue < tmp) - { - /* Update the maximum value and it's index */ - maxValue = tmp; - idx = blockSize - blkCnt; - } - - /* Decrement loop counter */ - blkCnt--; - } - - /* - * Save result - */ - *pIndex = idx; - *pResult = maxValue; -} - -#else -#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) -void arm_max_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult, - uint32_t * pIndex) -{ - float32_t maxVal1, out; /* Temporary variables to store the output value. */ - uint32_t blkCnt, outIndex; /* loop counter */ - - float32x4_t outV, srcV; - float32x2_t outV2; - - uint32x4_t idxV; - uint32x4_t maxIdx; - static const uint32_t indexInit[4]={4,5,6,7}; - static const uint32_t countVInit[4]={0,1,2,3}; - - uint32x4_t index; - uint32x4_t delta; - uint32x4_t countV; - uint32x2_t countV2; - - maxIdx = vdupq_n_u32(ULONG_MAX); - delta = vdupq_n_u32(4); - index = vld1q_u32(indexInit); - countV = vld1q_u32(countVInit); - - - /* Initialise the index value to zero. */ - outIndex = 0U; - - /* Load first input value that act as reference value for comparison */ - if (blockSize <= 3) - { - out = *pSrc++; - - blkCnt = blockSize - 1; - - while (blkCnt > 0U) - { - /* Initialize maxVal to the next consecutive values one by one */ - maxVal1 = *pSrc++; - - /* compare for the maximum value */ - if (out < maxVal1) - { - /* Update the maximum value and it's index */ - out = maxVal1; - outIndex = blockSize - blkCnt; - } - - /* Decrement the loop counter */ - blkCnt--; - } - } - else - { - outV = vld1q_f32(pSrc); - pSrc += 4; - - /* Compute 4 outputs at a time */ - blkCnt = (blockSize - 4 ) >> 2U; - - while (blkCnt > 0U) - { - srcV = vld1q_f32(pSrc); - pSrc += 4; - - idxV = vcgtq_f32(srcV, outV); - outV = vbslq_f32(idxV, srcV, outV ); - countV = vbslq_u32(idxV, index,countV ); - - index = vaddq_u32(index,delta); - - /* Decrement the loop counter */ - blkCnt--; - } - - outV2 = vpmax_f32(vget_low_f32(outV),vget_high_f32(outV)); - outV2 = vpmax_f32(outV2,outV2); - out = vget_lane_f32(outV2, 0); - - idxV = vceqq_f32(outV, vdupq_n_f32(out)); - countV = vbslq_u32(idxV, countV,maxIdx); - - countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV)); - countV2 = vpmin_u32(countV2,countV2); - outIndex = vget_lane_u32(countV2,0); - - /* if (blockSize - 1U) is not multiple of 4 */ - blkCnt = (blockSize - 4 ) % 4U; - - while (blkCnt > 0U) - { - /* Initialize maxVal to the next consecutive values one by one */ - maxVal1 = *pSrc++; - - /* compare for the maximum value */ - if (out < maxVal1) - { - /* Update the maximum value and it's index */ - out = maxVal1; - outIndex = blockSize - blkCnt ; - } - - /* Decrement the loop counter */ - blkCnt--; - } - - - } - - /* Store the maximum value and it's index into destination pointers */ - *pResult = out; - *pIndex = outIndex; -} -#else -void arm_max_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult, - uint32_t * pIndex) -{ - float32_t maxVal, out; /* Temporary variables to store the output value. */ - uint32_t blkCnt, outIndex; /* Loop counter */ - -#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) - uint32_t index; /* index of maximum value */ -#endif - - /* Initialise index value to zero. */ - outIndex = 0U; - - /* Load first input value that act as reference value for comparision */ - out = *pSrc++; - -#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) - /* Initialise index of maximum value. */ - index = 0U; - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = (blockSize - 1U) >> 2U; - - while (blkCnt > 0U) - { - /* Initialize maxVal to next consecutive values one by one */ - maxVal = *pSrc++; - - /* compare for the maximum value */ - if (out < maxVal) - { - /* Update the maximum value and it's index */ - out = maxVal; - outIndex = index + 1U; - } - - maxVal = *pSrc++; - if (out < maxVal) - { - out = maxVal; - outIndex = index + 2U; - } - - maxVal = *pSrc++; - if (out < maxVal) - { - out = maxVal; - outIndex = index + 3U; - } - - maxVal = *pSrc++; - if (out < maxVal) - { - out = maxVal; - outIndex = index + 4U; - } - - index += 4U; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = (blockSize - 1U) % 4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = (blockSize - 1U); - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* Initialize maxVal to the next consecutive values one by one */ - maxVal = *pSrc++; - - /* compare for the maximum value */ - if (out < maxVal) - { - /* Update the maximum value and it's index */ - out = maxVal; - outIndex = blockSize - blkCnt; - } - - /* Decrement loop counter */ - blkCnt--; - } - - /* Store the maximum value and it's index into destination pointers */ - *pResult = out; - *pIndex = outIndex; -} -#endif /* #if defined(ARM_MATH_NEON) */ -#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ - -/** - @} end of Max group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_max_f32.c
+ * Description: Maximum value of a floating-point vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#if defined(ARM_MATH_NEON)
+#include <limits.h>
+#endif
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @defgroup Max Maximum
+
+ Computes the maximum value of an array of data.
+ The function returns both the maximum value and its position within the array.
+ There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ @addtogroup Max
+ @{
+ */
+
+/**
+ @brief Maximum value of a floating-point vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult maximum value returned here
+ @param[out] pIndex index of maximum value returned here
+ @return none
+ */
+#if defined(ARM_MATH_NEON)
+void arm_max_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult,
+ uint32_t * pIndex)
+{
+ float32_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ float32x4_t outV, srcV;
+ float32x2_t outV2;
+
+ uint32x4_t idxV;
+ uint32x4_t maxIdx={ULONG_MAX,ULONG_MAX,ULONG_MAX,ULONG_MAX};
+ uint32x4_t index={4,5,6,7};
+ uint32x4_t delta={4,4,4,4};
+ uint32x4_t countV={0,1,2,3};
+ uint32x2_t countV2;
+
+ /* Initialise the count value. */
+ count = 0U;
+
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+
+ /* Load first input value that act as reference value for comparison */
+ if (blockSize <= 3)
+ {
+ out = *pSrc++;
+
+ blkCnt = blockSize - 1;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal1)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+ }
+ else
+ {
+ outV = vld1q_f32(pSrc);
+ pSrc += 4;
+
+ /* Compute 4 outputs at a time */
+ blkCnt = (blockSize - 4 ) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ srcV = vld1q_f32(pSrc);
+ pSrc += 4;
+
+ idxV = vcgtq_f32(srcV, outV);
+ outV = vbslq_f32(idxV, srcV, outV );
+ countV = vbslq_u32(idxV, index,countV );
+
+ index = vaddq_u32(index,delta);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ outV2 = vpmax_f32(vget_low_f32(outV),vget_high_f32(outV));
+ outV2 = vpmax_f32(outV2,outV2);
+ out = outV2[0];
+
+ idxV = vceqq_f32(outV, vdupq_n_f32(out));
+ countV = vbslq_u32(idxV, countV,maxIdx);
+
+ countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
+ countV2 = vpmin_u32(countV2,countV2);
+ outIndex = countV2[0];
+
+ /* if (blockSize - 1U) is not multiple of 4 */
+ blkCnt = (blockSize - 4 ) % 4U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal1)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal1;
+ outIndex = blockSize - blkCnt ;
+ }
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+
+ }
+
+ /* Store the maximum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+#else
+void arm_max_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult,
+ uint32_t * pIndex)
+{
+ float32_t maxVal, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ uint32_t index; /* index of maximum value */
+#endif
+
+ /* Initialise index value to zero. */
+ outIndex = 0U;
+
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ /* Initialise index of maximum value. */
+ index = 0U;
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to next consecutive values one by one */
+ maxVal = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal;
+ outIndex = index + 1U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 2U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 3U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 4U;
+ }
+
+ index += 4U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store the maximum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+#endif /* #if defined(ARM_MATH_NEON) */
+/**
+ @} end of Max group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q15.c index 5715e37..dbead54 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q15.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q15.c @@ -1,201 +1,148 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_max_q15.c - * Description: Maximum value of a Q15 vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @addtogroup Max - @{ - */ - -/** - @brief Maximum value of a Q15 vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult maximum value returned here - @param[out] pIndex index of maximum value returned here - @return none - */ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - -#include "arm_helium_utils.h" - -void arm_max_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult, - uint32_t * pIndex) -{ - int32_t blkCnt; /* loop counters */ - q15x8_t extremValVec = vdupq_n_s16(Q15_MIN); - q15_t maxValue = Q15_MIN; - uint16x8_t indexVec; - uint16x8_t extremIdxVec; - mve_pred16_t p0; - uint16_t extremIdxArr[8]; - - indexVec = vidupq_u16(0U, 1); - - blkCnt = blockSize; - do { - mve_pred16_t p = vctp16q(blkCnt); - q15x8_t extremIdxVal = vld1q_z_s16(pSrc, p); - /* - * Get current max per lane and current index per lane - * when a max is selected - */ - p0 = vcmpgeq_m(extremIdxVal, extremValVec, p); - - extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0); - /* store per-lane extrema indexes */ - vst1q_p_u16(extremIdxArr, indexVec, p0); - - indexVec += 8; - pSrc += 8; - blkCnt -= 8; - } - while (blkCnt > 0); - - - /* Get max value across the vector */ - maxValue = vmaxvq(maxValue, extremValVec); - - /* set index for lower values to max possible index */ - p0 = vcmpgeq(extremValVec, maxValue); - extremIdxVec = vld1q_u16(extremIdxArr); - - indexVec = vpselq(extremIdxVec, vdupq_n_u16(blockSize - 1), p0); - *pIndex = vminvq(blockSize - 1, indexVec); - *pResult = maxValue; -} - -#else -void arm_max_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult, - uint32_t * pIndex) -{ - q15_t maxVal, out; /* Temporary variables to store the output value. */ - uint32_t blkCnt, outIndex; /* Loop counter */ - -#if defined (ARM_MATH_LOOPUNROLL) - uint32_t index; /* index of maximum value */ -#endif - - /* Initialise index value to zero. */ - outIndex = 0U; - /* Load first input value that act as reference value for comparision */ - out = *pSrc++; - -#if defined (ARM_MATH_LOOPUNROLL) - /* Initialise index of maximum value. */ - index = 0U; - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = (blockSize - 1U) >> 2U; - - while (blkCnt > 0U) - { - /* Initialize maxVal to next consecutive values one by one */ - maxVal = *pSrc++; - - /* compare for the maximum value */ - if (out < maxVal) - { - /* Update the maximum value and it's index */ - out = maxVal; - outIndex = index + 1U; - } - - maxVal = *pSrc++; - if (out < maxVal) - { - out = maxVal; - outIndex = index + 2U; - } - - maxVal = *pSrc++; - if (out < maxVal) - { - out = maxVal; - outIndex = index + 3U; - } - - maxVal = *pSrc++; - if (out < maxVal) - { - out = maxVal; - outIndex = index + 4U; - } - - index += 4U; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = (blockSize - 1U) % 4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = (blockSize - 1U); - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* Initialize maxVal to the next consecutive values one by one */ - maxVal = *pSrc++; - - /* compare for the maximum value */ - if (out < maxVal) - { - /* Update the maximum value and it's index */ - out = maxVal; - outIndex = blockSize - blkCnt; - } - - /* Decrement loop counter */ - blkCnt--; - } - - /* Store the maximum value and it's index into destination pointers */ - *pResult = out; - *pIndex = outIndex; -} -#endif /* defined(ARM_MATH_MVEI) */ -/** - @} end of Max group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_max_q15.c
+ * Description: Maximum value of a Q15 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup Max
+ @{
+ */
+
+/**
+ @brief Maximum value of a Q15 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult maximum value returned here
+ @param[out] pIndex index of maximum value returned here
+ @return none
+ */
+
+void arm_max_q15(
+ const q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult,
+ uint32_t * pIndex)
+{
+ q15_t maxVal, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ uint32_t index; /* index of maximum value */
+#endif
+
+ /* Initialise index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ /* Initialise index of maximum value. */
+ index = 0U;
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to next consecutive values one by one */
+ maxVal = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal;
+ outIndex = index + 1U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 2U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 3U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 4U;
+ }
+
+ index += 4U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store the maximum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ @} end of Max group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q31.c index fed900b..5c6badf 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q31.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q31.c @@ -1,202 +1,148 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_max_q31.c - * Description: Maximum value of a Q31 vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @addtogroup Max - @{ - */ - -/** - @brief Maximum value of a Q31 vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult maximum value returned here - @param[out] pIndex index of maximum value returned here - @return none - */ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - -#include "arm_helium_utils.h" - -void arm_max_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult, - uint32_t * pIndex) -{ - int32_t blkCnt; /* loop counters */ - q31x4_t extremValVec = vdupq_n_s32(Q31_MIN); - q31_t maxValue = Q31_MIN; - uint32x4_t indexVec; - uint32x4_t extremIdxVec; - mve_pred16_t p0; - uint32_t extremIdxArr[4]; - - indexVec = vidupq_u32(0U, 1); - - blkCnt = blockSize; - do { - mve_pred16_t p = vctp32q(blkCnt); - q31x4_t extremIdxVal = vld1q_z_s32(pSrc, p); - /* - * Get current max per lane and current index per lane - * when a max is selected - */ - p0 = vcmpgeq_m(extremIdxVal, extremValVec, p); - - extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0); - /* store per-lane extrema indexes */ - vst1q_p_u32(extremIdxArr, indexVec, p0); - - indexVec += 4; - pSrc += 4; - blkCnt -= 4; - } - while (blkCnt > 0); - - - /* Get max value across the vector */ - maxValue = vmaxvq(maxValue, extremValVec); - - /* set index for lower values to max possible index */ - p0 = vcmpgeq(extremValVec, maxValue); - extremIdxVec = vld1q_u32(extremIdxArr); - - indexVec = vpselq(extremIdxVec, vdupq_n_u32(blockSize - 1), p0); - *pIndex = vminvq(blockSize - 1, indexVec); - *pResult = maxValue; -} - -#else -void arm_max_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult, - uint32_t * pIndex) -{ - q31_t maxVal, out; /* Temporary variables to store the output value. */ - uint32_t blkCnt, outIndex; /* Loop counter */ - -#if defined (ARM_MATH_LOOPUNROLL) - uint32_t index; /* index of maximum value */ -#endif - - /* Initialise index value to zero. */ - outIndex = 0U; - /* Load first input value that act as reference value for comparision */ - out = *pSrc++; - -#if defined (ARM_MATH_LOOPUNROLL) - /* Initialise index of maximum value. */ - index = 0U; - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = (blockSize - 1U) >> 2U; - - while (blkCnt > 0U) - { - /* Initialize maxVal to next consecutive values one by one */ - maxVal = *pSrc++; - - /* compare for the maximum value */ - if (out < maxVal) - { - /* Update the maximum value and it's index */ - out = maxVal; - outIndex = index + 1U; - } - - maxVal = *pSrc++; - if (out < maxVal) - { - out = maxVal; - outIndex = index + 2U; - } - - maxVal = *pSrc++; - if (out < maxVal) - { - out = maxVal; - outIndex = index + 3U; - } - - maxVal = *pSrc++; - if (out < maxVal) - { - out = maxVal; - outIndex = index + 4U; - } - - index += 4U; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = (blockSize - 1U) % 4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = (blockSize - 1U); - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* Initialize maxVal to the next consecutive values one by one */ - maxVal = *pSrc++; - - /* compare for the maximum value */ - if (out < maxVal) - { - /* Update the maximum value and it's index */ - out = maxVal; - outIndex = blockSize - blkCnt; - } - - /* Decrement loop counter */ - blkCnt--; - } - - /* Store the maximum value and it's index into destination pointers */ - *pResult = out; - *pIndex = outIndex; -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @} end of Max group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_max_q31.c
+ * Description: Maximum value of a Q31 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup Max
+ @{
+ */
+
+/**
+ @brief Maximum value of a Q31 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult maximum value returned here
+ @param[out] pIndex index of maximum value returned here
+ @return none
+ */
+
+void arm_max_q31(
+ const q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult,
+ uint32_t * pIndex)
+{
+ q31_t maxVal, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ uint32_t index; /* index of maximum value */
+#endif
+
+ /* Initialise index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ /* Initialise index of maximum value. */
+ index = 0U;
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to next consecutive values one by one */
+ maxVal = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal;
+ outIndex = index + 1U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 2U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 3U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 4U;
+ }
+
+ index += 4U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store the maximum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ @} end of Max group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q7.c index 5deae64..d4977e5 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q7.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q7.c @@ -1,256 +1,148 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_max_q7.c - * Description: Maximum value of a Q7 vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @addtogroup Max - @{ - */ - -/** - @brief Maximum value of a Q7 vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult maximum value returned here - @param[out] pIndex index of maximum value returned here - @return none - */ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - -#include "arm_helium_utils.h" - -static void arm_small_blk_max_q7( - const q7_t * pSrc, - uint16_t blockSize, - q7_t * pResult, - uint32_t * pIndex) -{ - int32_t blkCnt; /* loop counters */ - q7x16_t extremValVec = vdupq_n_s8(Q7_MIN); - q7_t maxValue = Q7_MIN; - uint8x16_t indexVec; - uint8x16_t extremIdxVec; - mve_pred16_t p0; - uint8_t extremIdxArr[16]; - - indexVec = vidupq_u8(0U, 1); - - blkCnt = blockSize; - do { - mve_pred16_t p = vctp8q(blkCnt); - q7x16_t extremIdxVal = vld1q_z_s8(pSrc, p); - /* - * Get current max per lane and current index per lane - * when a max is selected - */ - p0 = vcmpgeq_m(extremIdxVal, extremValVec, p); - - extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0); - /* store per-lane extrema indexes */ - vst1q_p_u8(extremIdxArr, indexVec, p0); - - indexVec += 16; - pSrc += 16; - blkCnt -= 16; - } - while (blkCnt > 0); - - - /* Get max value across the vector */ - maxValue = vmaxvq(maxValue, extremValVec); - - /* set index for lower values to max possible index */ - p0 = vcmpgeq(extremValVec, maxValue); - extremIdxVec = vld1q_u8(extremIdxArr); - - indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0); - *pIndex = vminvq_u8(blockSize - 1, indexVec); - *pResult = maxValue; -} - -void arm_max_q7( - const q7_t * pSrc, - uint32_t blockSize, - q7_t * pResult, - uint32_t * pIndex) -{ - int32_t totalSize = blockSize; - const uint16_t sub_blk_sz = UINT8_MAX + 1; - - if (totalSize <= sub_blk_sz) - { - arm_small_blk_max_q7(pSrc, blockSize, pResult, pIndex); - } - else - { - uint32_t curIdx = 0; - q7_t curBlkExtr = Q7_MIN; - uint32_t curBlkPos = 0; - uint32_t curBlkIdx = 0; - /* - * process blocks of 255 elts - */ - while (totalSize >= sub_blk_sz) - { - const q7_t *curSrc = pSrc; - - arm_small_blk_max_q7(curSrc, sub_blk_sz, pResult, pIndex); - if (*pResult > curBlkExtr) - { - /* - * update partial extrema - */ - curBlkExtr = *pResult; - curBlkPos = *pIndex; - curBlkIdx = curIdx; - } - curIdx++; - pSrc += sub_blk_sz; - totalSize -= sub_blk_sz; - } - /* - * remainder - */ - arm_small_blk_max_q7(pSrc, totalSize, pResult, pIndex); - if (*pResult > curBlkExtr) - { - curBlkExtr = *pResult; - curBlkPos = *pIndex; - curBlkIdx = curIdx; - } - *pIndex = curBlkIdx * sub_blk_sz + curBlkPos; - *pResult = curBlkExtr; - } -} -#else -void arm_max_q7( - const q7_t * pSrc, - uint32_t blockSize, - q7_t * pResult, - uint32_t * pIndex) -{ - q7_t maxVal, out; /* Temporary variables to store the output value. */ - uint32_t blkCnt, outIndex; /* Loop counter */ - -#if defined (ARM_MATH_LOOPUNROLL) - uint32_t index; /* index of maximum value */ -#endif - - /* Initialise index value to zero. */ - outIndex = 0U; - /* Load first input value that act as reference value for comparision */ - out = *pSrc++; - -#if defined (ARM_MATH_LOOPUNROLL) - /* Initialise index of maximum value. */ - index = 0U; - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = (blockSize - 1U) >> 2U; - - while (blkCnt > 0U) - { - /* Initialize maxVal to next consecutive values one by one */ - maxVal = *pSrc++; - - /* compare for the maximum value */ - if (out < maxVal) - { - /* Update the maximum value and it's index */ - out = maxVal; - outIndex = index + 1U; - } - - maxVal = *pSrc++; - if (out < maxVal) - { - out = maxVal; - outIndex = index + 2U; - } - - maxVal = *pSrc++; - if (out < maxVal) - { - out = maxVal; - outIndex = index + 3U; - } - - maxVal = *pSrc++; - if (out < maxVal) - { - out = maxVal; - outIndex = index + 4U; - } - - index += 4U; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = (blockSize - 1U) % 4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = (blockSize - 1U); - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* Initialize maxVal to the next consecutive values one by one */ - maxVal = *pSrc++; - - /* compare for the maximum value */ - if (out < maxVal) - { - /* Update the maximum value and it's index */ - out = maxVal; - outIndex = blockSize - blkCnt; - } - - /* Decrement loop counter */ - blkCnt--; - } - - /* Store the maximum value and it's index into destination pointers */ - *pResult = out; - *pIndex = outIndex; -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @} end of Max group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_max_q7.c
+ * Description: Maximum value of a Q7 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup Max
+ @{
+ */
+
+/**
+ @brief Maximum value of a Q7 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult maximum value returned here
+ @param[out] pIndex index of maximum value returned here
+ @return none
+ */
+
+void arm_max_q7(
+ const q7_t * pSrc,
+ uint32_t blockSize,
+ q7_t * pResult,
+ uint32_t * pIndex)
+{
+ q7_t maxVal, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ uint32_t index; /* index of maximum value */
+#endif
+
+ /* Initialise index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ /* Initialise index of maximum value. */
+ index = 0U;
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to next consecutive values one by one */
+ maxVal = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal;
+ outIndex = index + 1U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 2U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 3U;
+ }
+
+ maxVal = *pSrc++;
+ if (out < maxVal)
+ {
+ out = maxVal;
+ outIndex = index + 4U;
+ }
+
+ index += 4U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store the maximum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ @} end of Max group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f32.c index dd6d817..3373f82 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f32.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f32.c @@ -1,198 +1,166 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_mean_f32.c - * Description: Mean value of a floating-point vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - - -/** - @addtogroup mean - @{ - */ - -/** - @brief Mean value of a floating-point vector. - @param[in] pSrc points to the input vector. - @param[in] blockSize number of samples in input vector. - @param[out] pResult mean value returned here. - @return none - */ -#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) - -#include "arm_helium_utils.h" - -void arm_mean_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult) -{ - uint32_t blkCnt; /* loop counters */ - f32x4_t vecSrc; - f32x4_t sumVec = vdupq_n_f32(0.0f); - float32_t sum = 0.0f; - - /* Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - while (blkCnt > 0U) - { - vecSrc = vldrwq_f32(pSrc); - sumVec = vaddq_f32(sumVec, vecSrc); - - blkCnt --; - pSrc += 4; - } - - sum = vecAddAcrossF32Mve(sumVec); - - /* Tail */ - blkCnt = blockSize & 0x3; - - while (blkCnt > 0U) - { - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ - sum += *pSrc++; - - /* Decrement loop counter */ - blkCnt--; - } - - *pResult = sum / (float32_t) blockSize; -} - - -#else -#if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE) -void arm_mean_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult) -{ - float32_t sum = 0.0f; /* Temporary result storage */ - float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */ - float32x2_t sumV2; - - uint32_t blkCnt; /* Loop counter */ - - float32x4_t inV; - - blkCnt = blockSize >> 2U; - - /* Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ - while (blkCnt > 0U) - { - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ - inV = vld1q_f32(pSrc); - sumV = vaddq_f32(sumV, inV); - - pSrc += 4; - /* Decrement the loop counter */ - blkCnt--; - } - - sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV)); - sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1); - - /* If the blockSize is not a multiple of 4, compute any remaining output samples here. - ** No loop unrolling is used. */ - blkCnt = blockSize & 3; - - while (blkCnt > 0U) - { - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ - sum += *pSrc++; - - /* Decrement the loop counter */ - blkCnt--; - } - - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ - /* Store the result to the destination */ - *pResult = sum / (float32_t) blockSize; -} -#else -void arm_mean_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult) -{ - uint32_t blkCnt; /* Loop counter */ - float32_t sum = 0.0f; /* Temporary result storage */ - -#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - - while (blkCnt > 0U) - { - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ - sum += *pSrc++; - - sum += *pSrc++; - - sum += *pSrc++; - - sum += *pSrc++; - - /* Decrement the loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = blockSize % 0x4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ - sum += *pSrc++; - - /* Decrement loop counter */ - blkCnt--; - } - - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ - /* Store result to destination */ - *pResult = (sum / blockSize); -} -#endif /* #if defined(ARM_MATH_NEON) */ -#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ - -/** - @} end of mean group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_mean_f32.c
+ * Description: Mean value of a floating-point vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @defgroup mean Mean
+
+ Calculates the mean of the input vector. Mean is defined as the average of the elements in the vector.
+ The underlying algorithm is used:
+
+ <pre>
+ Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]) / blockSize;
+ </pre>
+
+ There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ @addtogroup mean
+ @{
+ */
+
+/**
+ @brief Mean value of a floating-point vector.
+ @param[in] pSrc points to the input vector.
+ @param[in] blockSize number of samples in input vector.
+ @param[out] pResult mean value returned here.
+ @return none
+ */
+#if defined(ARM_MATH_NEON_EXPERIMENTAL)
+void arm_mean_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t sum = 0.0f; /* Temporary result storage */
+ float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */
+ float32x2_t sumV2;
+
+ uint32_t blkCnt; /* Loop counter */
+
+ float32_t in1, in2, in3, in4;
+ float32x4_t inV;
+
+ blkCnt = blockSize >> 2U;
+
+ /* Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ inV = vld1q_f32(pSrc);
+ sumV = vaddq_f32(sumV, inV);
+
+ pSrc += 4;
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
+ sum = sumV2[0] + sumV2[1];
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize & 3;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store the result to the destination */
+ *pResult = sum / (float32_t) blockSize;
+}
+#else
+void arm_mean_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ float32_t sum = 0.0f; /* Temporary result storage */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ sum += *pSrc++;
+
+ sum += *pSrc++;
+
+ sum += *pSrc++;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store result to destination */
+ *pResult = (sum / blockSize);
+}
+#endif /* #if defined(ARM_MATH_NEON) */
+
+/**
+ @} end of mean group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q15.c index 54949a1..09e6da9 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q15.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q15.c @@ -1,156 +1,114 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_mean_q15.c - * Description: Mean value of a Q15 vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @addtogroup mean - @{ - */ - -/** - @brief Mean value of a Q15 vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult mean value returned here - @return none - - @par Scaling and Overflow Behavior - The function is implemented using a 32-bit internal accumulator. - The input is represented in 1.15 format and is accumulated in a 32-bit - accumulator in 17.15 format. - There is no risk of internal overflow with this approach, and the - full precision of intermediate result is preserved. - Finally, the accumulator is truncated to yield a result of 1.15 format. - */ - -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) -void arm_mean_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult) -{ - uint32_t blkCnt; /* loop counters */ - q15x8_t vecSrc; - q31_t sum = 0L; - - /* Compute 8 outputs at a time */ - blkCnt = blockSize >> 3U; - while (blkCnt > 0U) - { - vecSrc = vldrhq_s16(pSrc); - /* - * sum lanes - */ - sum = vaddvaq(sum, vecSrc); - - blkCnt--; - pSrc += 8; - } - - /* Tail */ - blkCnt = blockSize & 0x7; - - while (blkCnt > 0U) - { - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ - sum += *pSrc++; - - /* Decrement loop counter */ - blkCnt--; - } - - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ - /* Store the result to the destination */ - *pResult = (q15_t) (sum / (int32_t) blockSize); -} -#else -void arm_mean_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult) -{ - uint32_t blkCnt; /* Loop counter */ - q31_t sum = 0; /* Temporary result storage */ - -#if defined (ARM_MATH_LOOPUNROLL) - q31_t in; -#endif - -#if defined (ARM_MATH_LOOPUNROLL) - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - - while (blkCnt > 0U) - { - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ - in = read_q15x2_ia (&pSrc); - sum += ((in << 16U) >> 16U); - sum += (in >> 16U); - - in = read_q15x2_ia (&pSrc); - sum += ((in << 16U) >> 16U); - sum += (in >> 16U); - - /* Decrement the loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = blockSize % 0x4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ - sum += *pSrc++; - - /* Decrement loop counter */ - blkCnt--; - } - - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ - /* Store result to destination */ - *pResult = (q15_t) (sum / (int32_t) blockSize); -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @} end of mean group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_mean_q15.c
+ * Description: Mean value of a Q15 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup mean
+ @{
+ */
+
+/**
+ @brief Mean value of a Q15 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult mean value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 32-bit internal accumulator.
+ The input is represented in 1.15 format and is accumulated in a 32-bit
+ accumulator in 17.15 format.
+ There is no risk of internal overflow with this approach, and the
+ full precision of intermediate result is preserved.
+ Finally, the accumulator is truncated to yield a result of 1.15 format.
+ */
+
+void arm_mean_q15(
+ const q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q31_t sum = 0; /* Temporary result storage */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ q31_t in;
+#endif
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ in = read_q15x2_ia ((q15_t **) &pSrc);
+ sum += ((in << 16U) >> 16U);
+ sum += (in >> 16U);
+
+ in = read_q15x2_ia ((q15_t **) &pSrc);
+ sum += ((in << 16U) >> 16U);
+ sum += (in >> 16U);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store result to destination */
+ *pResult = (q15_t) (sum / (int32_t) blockSize);
+}
+
+/**
+ @} end of mean group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q31.c index b33ed00..7667ad8 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q31.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q31.c @@ -1,149 +1,110 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_mean_q31.c - * Description: Mean value of a Q31 vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @addtogroup mean - @{ - */ - -/** - @brief Mean value of a Q31 vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult mean value returned here - @return none - - @par Scaling and Overflow Behavior - The function is implemented using a 64-bit internal accumulator. - The input is represented in 1.31 format and is accumulated in a 64-bit - accumulator in 33.31 format. - There is no risk of internal overflow with this approach, and the - full precision of intermediate result is preserved. - Finally, the accumulator is truncated to yield a result of 1.31 format. - */ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) -void arm_mean_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult) -{ - uint32_t blkCnt; /* loop counters */ - q31x4_t vecSrc; - q63_t sum = 0LL; - - - /* Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - while (blkCnt > 0U) - { - - vecSrc = vldrwq_s32(pSrc); - /* - * sum lanes - */ - sum = vaddlvaq(sum, vecSrc); - - blkCnt --; - pSrc += 4; - } - - /* Tail */ - blkCnt = blockSize & 0x3; - - while (blkCnt > 0U) - { - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ - sum += *pSrc++; - blkCnt --; - } - - *pResult = arm_div_q63_to_q31(sum, blockSize); -} -#else -void arm_mean_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult) -{ - uint32_t blkCnt; /* Loop counter */ - q63_t sum = 0; /* Temporary result storage */ - -#if defined (ARM_MATH_LOOPUNROLL) - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - - while (blkCnt > 0U) - { - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ - sum += *pSrc++; - - sum += *pSrc++; - - sum += *pSrc++; - - sum += *pSrc++; - - /* Decrement the loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = blockSize % 0x4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ - sum += *pSrc++; - - /* Decrement loop counter */ - blkCnt--; - } - - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ - /* Store result to destination */ - *pResult = (q31_t) (sum / blockSize); -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @} end of mean group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_mean_q31.c
+ * Description: Mean value of a Q31 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup mean
+ @{
+ */
+
+/**
+ @brief Mean value of a Q31 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult mean value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 64-bit internal accumulator.
+ The input is represented in 1.31 format and is accumulated in a 64-bit
+ accumulator in 33.31 format.
+ There is no risk of internal overflow with this approach, and the
+ full precision of intermediate result is preserved.
+ Finally, the accumulator is truncated to yield a result of 1.31 format.
+ */
+
+void arm_mean_q31(
+ const q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q63_t sum = 0; /* Temporary result storage */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ sum += *pSrc++;
+
+ sum += *pSrc++;
+
+ sum += *pSrc++;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store result to destination */
+ *pResult = (q31_t) (sum / blockSize);
+}
+
+/**
+ @} end of mean group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q7.c index f0701eb..3f90ea8 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q7.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q7.c @@ -1,153 +1,112 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_mean_q7.c - * Description: Mean value of a Q7 vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @addtogroup mean - @{ - */ - -/** - @brief Mean value of a Q7 vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult mean value returned here - @return none - - @par Scaling and Overflow Behavior - The function is implemented using a 32-bit internal accumulator. - The input is represented in 1.7 format and is accumulated in a 32-bit - accumulator in 25.7 format. - There is no risk of internal overflow with this approach, and the - full precision of intermediate result is preserved. - Finally, the accumulator is truncated to yield a result of 1.7 format. - */ - -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - -void arm_mean_q7( - const q7_t * pSrc, - uint32_t blockSize, - q7_t * pResult) -{ - uint32_t blkCnt; /* loop counters */ - q7x16_t vecSrc; - q31_t sum = 0L; - - - blkCnt = blockSize >> 4; - while (blkCnt > 0U) - { - vecSrc = vldrbq_s8(pSrc); - /* - * sum lanes - */ - sum = vaddvaq(sum, vecSrc); - - blkCnt--; - pSrc += 16; - } - - blkCnt = blockSize & 0xF; - while (blkCnt > 0U) - { - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ - sum += *pSrc++; - - /* Decrement loop counter */ - blkCnt--; - } - - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ - /* Store the result to the destination */ - *pResult = (q7_t) (sum / (int32_t) blockSize); -} -#else -void arm_mean_q7( - const q7_t * pSrc, - uint32_t blockSize, - q7_t * pResult) -{ - uint32_t blkCnt; /* Loop counter */ - q31_t sum = 0; /* Temporary result storage */ - -#if defined (ARM_MATH_LOOPUNROLL) - q31_t in; -#endif - -#if defined (ARM_MATH_LOOPUNROLL) - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - - while (blkCnt > 0U) - { - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ - in = read_q7x4_ia (&pSrc); - sum += ((in << 24U) >> 24U); - sum += ((in << 16U) >> 24U); - sum += ((in << 8U) >> 24U); - sum += (in >> 24U); - - /* Decrement the loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = blockSize % 0x4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ - sum += *pSrc++; - - /* Decrement loop counter */ - blkCnt--; - } - - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ - /* Store result to destination */ - *pResult = (q7_t) (sum / (int32_t) blockSize); -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @} end of mean group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_mean_q7.c
+ * Description: Mean value of a Q7 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup mean
+ @{
+ */
+
+/**
+ @brief Mean value of a Q7 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult mean value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 32-bit internal accumulator.
+ The input is represented in 1.7 format and is accumulated in a 32-bit
+ accumulator in 25.7 format.
+ There is no risk of internal overflow with this approach, and the
+ full precision of intermediate result is preserved.
+ Finally, the accumulator is truncated to yield a result of 1.7 format.
+ */
+
+void arm_mean_q7(
+ const q7_t * pSrc,
+ uint32_t blockSize,
+ q7_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q31_t sum = 0; /* Temporary result storage */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ q31_t in;
+#endif
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ in = read_q7x4_ia ((q7_t **) &pSrc);
+ sum += ((in << 24U) >> 24U);
+ sum += ((in << 16U) >> 24U);
+ sum += ((in << 8U) >> 24U);
+ sum += (in >> 24U);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store result to destination */
+ *pResult = (q7_t) (sum / (int32_t) blockSize);
+}
+
+/**
+ @} end of mean group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f32.c index ad8a472..f6504aa 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f32.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f32.c @@ -1,363 +1,268 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_min_f32.c - * Description: Minimum value of a floating-point vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) -#include <limits.h> -#endif - - -/** - @ingroup groupStats - */ - -/** - @defgroup Min Minimum - - Computes the minimum value of an array of data. - The function returns both the minimum value and its position within the array. - There are separate functions for floating-point, Q31, Q15, and Q7 data types. - */ - -/** - @addtogroup Min - @{ - */ - -/** - @brief Minimum value of a floating-point vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult minimum value returned here - @param[out] pIndex index of minimum value returned here - @return none - */ - -#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) - -void arm_min_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult, - uint32_t * pIndex) -{ - uint32_t blkCnt; /* loop counters */ - f32x4_t vecSrc; - float32_t const *pSrcVec; - f32x4_t curExtremValVec = vdupq_n_f32(F32_MAX); - float32_t minValue = F32_MAX; - uint32_t idx = blockSize; - uint32x4_t indexVec; - uint32x4_t curExtremIdxVec; - float32_t tmp; - mve_pred16_t p0; - - indexVec = vidupq_u32((uint32_t)0, 1); - curExtremIdxVec = vdupq_n_u32(0); - - pSrcVec = (float32_t const *) pSrc; - /* Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - while (blkCnt > 0U) - { - vecSrc = vldrwq_f32(pSrcVec); - pSrcVec += 4; - /* - * Get current max per lane and current index per lane - * when a max is selected - */ - p0 = vcmpleq(vecSrc, curExtremValVec); - curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); - curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); - - indexVec = indexVec + 4; - /* - * Decrement the blockSize loop counter - */ - blkCnt--; - } - - /* - * Get min value across the vector - */ - minValue = vminnmvq(minValue, curExtremValVec); - /* - * set index for lower values to max possible index - */ - p0 = vcmpleq(curExtremValVec, minValue); - indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0); - /* - * Get min index which is thus for a max value - */ - idx = vminvq(idx, indexVec); - - /* - * tail - */ - blkCnt = blockSize & 0x3; - - while (blkCnt > 0U) - { - /* Initialize minVal to the next consecutive values one by one */ - tmp = *pSrc++; - - /* compare for the minimum value */ - if (minValue > tmp) - { - /* Update the minimum value and it's index */ - minValue = tmp; - idx = blockSize - blkCnt; - } - blkCnt--; - } - /* - * Save result - */ - *pIndex = idx; - *pResult = minValue; -} - -#else -#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) -void arm_min_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult, - uint32_t * pIndex) -{ - float32_t maxVal1, out; /* Temporary variables to store the output value. */ - uint32_t blkCnt, outIndex; /* loop counter */ - - float32x4_t outV, srcV; - float32x2_t outV2; - - uint32x4_t idxV; - static const uint32_t indexInit[4]={4,5,6,7}; - static const uint32_t countVInit[4]={0,1,2,3}; - uint32x4_t maxIdx; - uint32x4_t index; - uint32x4_t delta; - uint32x4_t countV; - uint32x2_t countV2; - - maxIdx = vdupq_n_u32(ULONG_MAX); - delta = vdupq_n_u32(4); - index = vld1q_u32(indexInit); - countV = vld1q_u32(countVInit); - - /* Initialise the index value to zero. */ - outIndex = 0U; - - /* Load first input value that act as reference value for comparison */ - if (blockSize <= 3) - { - out = *pSrc++; - - blkCnt = blockSize - 1; - - while (blkCnt > 0U) - { - /* Initialize maxVal to the next consecutive values one by one */ - maxVal1 = *pSrc++; - - /* compare for the maximum value */ - if (out > maxVal1) - { - /* Update the maximum value and it's index */ - out = maxVal1; - outIndex = blockSize - blkCnt; - } - - /* Decrement the loop counter */ - blkCnt--; - } - } - else - { - outV = vld1q_f32(pSrc); - pSrc += 4; - - /* Compute 4 outputs at a time */ - blkCnt = (blockSize - 4 ) >> 2U; - - while (blkCnt > 0U) - { - srcV = vld1q_f32(pSrc); - pSrc += 4; - - idxV = vcltq_f32(srcV, outV); - outV = vbslq_f32(idxV, srcV, outV ); - countV = vbslq_u32(idxV, index,countV ); - - index = vaddq_u32(index,delta); - - /* Decrement the loop counter */ - blkCnt--; - } - - outV2 = vpmin_f32(vget_low_f32(outV),vget_high_f32(outV)); - outV2 = vpmin_f32(outV2,outV2); - out = vget_lane_f32(outV2,0); - - idxV = vceqq_f32(outV, vdupq_n_f32(out)); - countV = vbslq_u32(idxV, countV,maxIdx); - - countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV)); - countV2 = vpmin_u32(countV2,countV2); - outIndex = vget_lane_u32(countV2,0); - - /* if (blockSize - 1U) is not multiple of 4 */ - blkCnt = (blockSize - 4 ) % 4U; - - while (blkCnt > 0U) - { - /* Initialize maxVal to the next consecutive values one by one */ - maxVal1 = *pSrc++; - - /* compare for the maximum value */ - if (out > maxVal1) - { - /* Update the maximum value and it's index */ - out = maxVal1; - outIndex = blockSize - blkCnt ; - } - - /* Decrement the loop counter */ - blkCnt--; - } - } - - /* Store the maximum value and it's index into destination pointers */ - *pResult = out; - *pIndex = outIndex; -} -#else -void arm_min_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult, - uint32_t * pIndex) -{ - float32_t minVal, out; /* Temporary variables to store the output value. */ - uint32_t blkCnt, outIndex; /* Loop counter */ - -#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) - uint32_t index; /* index of maximum value */ -#endif - - /* Initialise index value to zero. */ - outIndex = 0U; - - /* Load first input value that act as reference value for comparision */ - out = *pSrc++; - -#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) - /* Initialise index of maximum value. */ - index = 0U; - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = (blockSize - 1U) >> 2U; - - while (blkCnt > 0U) - { - /* Initialize minVal to next consecutive values one by one */ - minVal = *pSrc++; - - /* compare for the minimum value */ - if (out > minVal) - { - /* Update the minimum value and it's index */ - out = minVal; - outIndex = index + 1U; - } - - minVal = *pSrc++; - if (out > minVal) - { - out = minVal; - outIndex = index + 2U; - } - - minVal = *pSrc++; - if (out > minVal) - { - out = minVal; - outIndex = index + 3U; - } - - minVal = *pSrc++; - if (out > minVal) - { - out = minVal; - outIndex = index + 4U; - } - - index += 4U; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = (blockSize - 1U) % 4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = (blockSize - 1U); - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* Initialize minVal to the next consecutive values one by one */ - minVal = *pSrc++; - - /* compare for the minimum value */ - if (out > minVal) - { - /* Update the minimum value and it's index */ - out = minVal; - outIndex = blockSize - blkCnt; - } - - /* Decrement loop counter */ - blkCnt--; - } - - /* Store the minimum value and it's index into destination pointers */ - *pResult = out; - *pIndex = outIndex; -} -#endif /* #if defined(ARM_MATH_NEON) */ -#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ - -/** - @} end of Min group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_min_f32.c
+ * Description: Minimum value of a floating-point vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include <limits.h>
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @defgroup Min Minimum
+
+ Computes the minimum value of an array of data.
+ The function returns both the minimum value and its position within the array.
+ There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ @addtogroup Min
+ @{
+ */
+
+/**
+ @brief Minimum value of a floating-point vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult minimum value returned here
+ @param[out] pIndex index of minimum value returned here
+ @return none
+ */
+#if defined(ARM_MATH_NEON)
+void arm_min_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult,
+ uint32_t * pIndex)
+{
+ float32_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ float32x4_t outV, srcV;
+ float32x2_t outV2;
+
+ uint32x4_t idxV;
+ uint32x4_t maxIdx={ULONG_MAX,ULONG_MAX,ULONG_MAX,ULONG_MAX};
+ uint32x4_t index={4,5,6,7};
+ uint32x4_t delta={4,4,4,4};
+ uint32x4_t countV={0,1,2,3};
+ uint32x2_t countV2;
+
+ /* Initialise the count value. */
+ count = 0U;
+
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+
+ /* Load first input value that act as reference value for comparison */
+ if (blockSize <= 3)
+ {
+ out = *pSrc++;
+
+ blkCnt = blockSize - 1;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out > maxVal1)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+ }
+ else
+ {
+ outV = vld1q_f32(pSrc);
+ pSrc += 4;
+
+ /* Compute 4 outputs at a time */
+ blkCnt = (blockSize - 4 ) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ srcV = vld1q_f32(pSrc);
+ pSrc += 4;
+
+ idxV = vcltq_f32(srcV, outV);
+ outV = vbslq_f32(idxV, srcV, outV );
+ countV = vbslq_u32(idxV, index,countV );
+
+ index = vaddq_u32(index,delta);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ outV2 = vpmin_f32(vget_low_f32(outV),vget_high_f32(outV));
+ outV2 = vpmin_f32(outV2,outV2);
+ out = outV2[0];
+
+ idxV = vceqq_f32(outV, vdupq_n_f32(out));
+ countV = vbslq_u32(idxV, countV,maxIdx);
+
+ countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
+ countV2 = vpmin_u32(countV2,countV2);
+ outIndex = countV2[0];
+
+ /* if (blockSize - 1U) is not multiple of 4 */
+ blkCnt = (blockSize - 4 ) % 4U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out > maxVal1)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal1;
+ outIndex = blockSize - blkCnt ;
+ }
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+ }
+
+ /* Store the maximum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+#else
+void arm_min_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult,
+ uint32_t * pIndex)
+{
+ float32_t minVal, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ uint32_t index; /* index of maximum value */
+#endif
+
+ /* Initialise index value to zero. */
+ outIndex = 0U;
+
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ /* Initialise index of maximum value. */
+ index = 0U;
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to next consecutive values one by one */
+ minVal = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal;
+ outIndex = index + 1U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 2U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 3U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 4U;
+ }
+
+ index += 4U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store the minimum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+#endif /* #if defined(ARM_MATH_NEON) */
+
+/**
+ @} end of Min group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q15.c index f31019d..67fd49e 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q15.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q15.c @@ -1,203 +1,149 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_min_q15.c - * Description: Minimum value of a Q15 vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - - -/** - @addtogroup Min - @{ - */ - -/** - @brief Minimum value of a Q15 vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult minimum value returned here - @param[out] pIndex index of minimum value returned here - @return none - */ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - -#include "arm_helium_utils.h" - -void arm_min_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult, - uint32_t * pIndex) -{ - - int32_t blkCnt; /* loop counters */ - q15x8_t extremValVec = vdupq_n_s16(Q15_MAX); - q15_t minValue = Q15_MAX; - uint16x8_t indexVec; - uint16x8_t extremIdxVec; - mve_pred16_t p0; - uint16_t extremIdxArr[8]; - - indexVec = vidupq_u16(0U, 1); - - blkCnt = blockSize; - do { - mve_pred16_t p = vctp16q(blkCnt); - q15x8_t extremIdxVal = vld1q_z_s16(pSrc, p); - /* - * Get current min per lane and current index per lane - * when a min is selected - */ - p0 = vcmpleq_m(extremIdxVal, extremValVec, p); - - extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0); - /* store per-lane extrema indexes */ - vst1q_p_u16(extremIdxArr, indexVec, p0); - - indexVec += 8; - pSrc += 8; - blkCnt -= 8; - } - while (blkCnt > 0); - - /* Get min value across the vector */ - minValue = vminvq(minValue, extremValVec); - - /* set index for lower values to min possible index */ - p0 = vcmpleq(extremValVec, minValue); - extremIdxVec = vld1q_u16(extremIdxArr); - - indexVec = vpselq(extremIdxVec, vdupq_n_u16(blockSize - 1), p0); - *pIndex = vminvq(blockSize - 1, indexVec); - *pResult = minValue; - -} -#else -void arm_min_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult, - uint32_t * pIndex) -{ - q15_t minVal, out; /* Temporary variables to store the output value. */ - uint32_t blkCnt, outIndex; /* Loop counter */ - -#if defined (ARM_MATH_LOOPUNROLL) - uint32_t index; /* index of maximum value */ -#endif - - /* Initialise index value to zero. */ - outIndex = 0U; - /* Load first input value that act as reference value for comparision */ - out = *pSrc++; - -#if defined (ARM_MATH_LOOPUNROLL) - /* Initialise index of maximum value. */ - index = 0U; - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = (blockSize - 1U) >> 2U; - - while (blkCnt > 0U) - { - /* Initialize minVal to next consecutive values one by one */ - minVal = *pSrc++; - - /* compare for the minimum value */ - if (out > minVal) - { - /* Update the minimum value and it's index */ - out = minVal; - outIndex = index + 1U; - } - - minVal = *pSrc++; - if (out > minVal) - { - out = minVal; - outIndex = index + 2U; - } - - minVal = *pSrc++; - if (out > minVal) - { - out = minVal; - outIndex = index + 3U; - } - - minVal = *pSrc++; - if (out > minVal) - { - out = minVal; - outIndex = index + 4U; - } - - index += 4U; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = (blockSize - 1U) % 4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = (blockSize - 1U); - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* Initialize minVal to the next consecutive values one by one */ - minVal = *pSrc++; - - /* compare for the minimum value */ - if (out > minVal) - { - /* Update the minimum value and it's index */ - out = minVal; - outIndex = blockSize - blkCnt; - } - - /* Decrement loop counter */ - blkCnt--; - } - - /* Store the minimum value and it's index into destination pointers */ - *pResult = out; - *pIndex = outIndex; -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @} end of Min group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_min_q15.c
+ * Description: Minimum value of a Q15 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+
+/**
+ @addtogroup Min
+ @{
+ */
+
+/**
+ @brief Minimum value of a Q15 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult minimum value returned here
+ @param[out] pIndex index of minimum value returned here
+ @return none
+ */
+
+void arm_min_q15(
+ const q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult,
+ uint32_t * pIndex)
+{
+ q15_t minVal, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ uint32_t index; /* index of maximum value */
+#endif
+
+ /* Initialise index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ /* Initialise index of maximum value. */
+ index = 0U;
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to next consecutive values one by one */
+ minVal = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal;
+ outIndex = index + 1U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 2U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 3U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 4U;
+ }
+
+ index += 4U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store the minimum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ @} end of Min group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q31.c index c993004..c362173 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q31.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q31.c @@ -1,203 +1,149 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_min_q31.c - * Description: Minimum value of a Q31 vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - - -/** - @addtogroup Min - @{ - */ - -/** - @brief Minimum value of a Q31 vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult minimum value returned here - @param[out] pIndex index of minimum value returned here - @return none - */ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - -#include "arm_helium_utils.h" - -void arm_min_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult, - uint32_t * pIndex) -{ - int32_t blkCnt; /* loop counters */ - q31x4_t extremValVec = vdupq_n_s32(Q31_MAX); - q31_t minValue = Q31_MAX; - uint32x4_t indexVec; - uint32x4_t extremIdxVec; - mve_pred16_t p0; - uint32_t extremIdxArr[4]; - - indexVec = vidupq_u32(0U, 1); - - blkCnt = blockSize; - do { - mve_pred16_t p = vctp32q(blkCnt); - q31x4_t extremIdxVal = vld1q_z_s32(pSrc, p); - /* - * Get current min per lane and current index per lane - * when a min is selected - */ - p0 = vcmpleq_m(extremIdxVal, extremValVec, p); - - extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0); - /* store per-lane extrema indexes */ - vst1q_p_u32(extremIdxArr, indexVec, p0); - - indexVec += 4; - pSrc += 4; - blkCnt -= 4; - } - while (blkCnt > 0); - - - /* Get min value across the vector */ - minValue = vminvq(minValue, extremValVec); - - /* set index for lower values to min possible index */ - p0 = vcmpleq(extremValVec, minValue); - extremIdxVec = vld1q_u32(extremIdxArr); - - indexVec = vpselq(extremIdxVec, vdupq_n_u32(blockSize - 1), p0); - *pIndex = vminvq(blockSize - 1, indexVec); - *pResult = minValue; -} - -#else -void arm_min_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult, - uint32_t * pIndex) -{ - q31_t minVal, out; /* Temporary variables to store the output value. */ - uint32_t blkCnt, outIndex; /* Loop counter */ - -#if defined (ARM_MATH_LOOPUNROLL) - uint32_t index; /* index of maximum value */ -#endif - - /* Initialise index value to zero. */ - outIndex = 0U; - /* Load first input value that act as reference value for comparision */ - out = *pSrc++; - -#if defined (ARM_MATH_LOOPUNROLL) - /* Initialise index of maximum value. */ - index = 0U; - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = (blockSize - 1U) >> 2U; - - while (blkCnt > 0U) - { - /* Initialize minVal to next consecutive values one by one */ - minVal = *pSrc++; - - /* compare for the minimum value */ - if (out > minVal) - { - /* Update the minimum value and it's index */ - out = minVal; - outIndex = index + 1U; - } - - minVal = *pSrc++; - if (out > minVal) - { - out = minVal; - outIndex = index + 2U; - } - - minVal = *pSrc++; - if (out > minVal) - { - out = minVal; - outIndex = index + 3U; - } - - minVal = *pSrc++; - if (out > minVal) - { - out = minVal; - outIndex = index + 4U; - } - - index += 4U; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = (blockSize - 1U) % 4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = (blockSize - 1U); - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* Initialize minVal to the next consecutive values one by one */ - minVal = *pSrc++; - - /* compare for the minimum value */ - if (out > minVal) - { - /* Update the minimum value and it's index */ - out = minVal; - outIndex = blockSize - blkCnt; - } - - /* Decrement loop counter */ - blkCnt--; - } - - /* Store the minimum value and it's index into destination pointers */ - *pResult = out; - *pIndex = outIndex; -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @} end of Min group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_min_q31.c
+ * Description: Minimum value of a Q31 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+
+/**
+ @addtogroup Min
+ @{
+ */
+
+/**
+ @brief Minimum value of a Q31 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult minimum value returned here
+ @param[out] pIndex index of minimum value returned here
+ @return none
+ */
+
+void arm_min_q31(
+ const q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult,
+ uint32_t * pIndex)
+{
+ q31_t minVal, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ uint32_t index; /* index of maximum value */
+#endif
+
+ /* Initialise index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ /* Initialise index of maximum value. */
+ index = 0U;
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to next consecutive values one by one */
+ minVal = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal;
+ outIndex = index + 1U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 2U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 3U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 4U;
+ }
+
+ index += 4U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store the minimum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ @} end of Min group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q7.c index 3e5aae5..4329630 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q7.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q7.c @@ -1,284 +1,149 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_min_q7.c - * Description: Minimum value of a Q7 vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - - -/** - @addtogroup Min - @{ - */ - -/** - @brief Minimum value of a Q7 vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult minimum value returned here - @param[out] pIndex index of minimum value returned here - @return none - */ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - -#include "arm_helium_utils.h" - -static void arm_small_blk_min_q7( - const q7_t * pSrc, - uint8_t blockSize, - q7_t * pResult, - uint32_t * pIndex) -{ - uint32_t blkCnt; /* loop counters */ - q7x16_t vecSrc; - q7x16_t curExtremValVec = vdupq_n_s8(Q7_MAX); - q7_t minValue = Q7_MAX,temp; - uint32_t idx = blockSize; - uint8x16_t indexVec; - uint8x16_t curExtremIdxVec; - mve_pred16_t p0; - - - indexVec = vidupq_u8((uint32_t)0, 1); - curExtremIdxVec = vdupq_n_u8(0); - - blkCnt = blockSize >> 4; - while (blkCnt > 0U) - { - vecSrc = vldrbq_s8(pSrc); - pSrc += 16; - /* - * Get current min per lane and current index per lane - * when a min is selected - */ - p0 = vcmpleq(vecSrc, curExtremValVec); - curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); - curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); - - indexVec = indexVec + 16; - /* - * Decrement the blockSize loop counter - */ - blkCnt--; - } - - /* - * Get min value across the vector - */ - minValue = vminvq(minValue, curExtremValVec); - /* - * set index for lower values to min possible index - */ - p0 = vcmpleq(curExtremValVec, minValue); - indexVec = vpselq(curExtremIdxVec, vdupq_n_u8(blockSize), p0); - /* - * Get min index which is thus for a min value - */ - idx = vminvq(idx, indexVec); - - blkCnt = blockSize & 0xF; - while (blkCnt > 0U) - { - /* Initialize minVal to the next consecutive values one by one */ - temp = *pSrc++; - - /* compare for the minimum value */ - if (minValue > temp) - { - /* Update the minimum value and it's index */ - minValue = temp; - idx = blockSize - blkCnt; - } - - /* Decrement loop counter */ - blkCnt--; - } - /* - * Save result - */ - *pIndex = idx; - *pResult = minValue; -} - -void arm_min_q7( - const q7_t * pSrc, - uint32_t blockSize, - q7_t * pResult, - uint32_t * pIndex) -{ - int32_t totalSize = blockSize; - - if (totalSize <= UINT8_MAX) - { - arm_small_blk_min_q7(pSrc, blockSize, pResult, pIndex); - } - else - { - uint32_t curIdx = 0; - q7_t curBlkExtr = Q7_MAX; - uint32_t curBlkPos = 0; - uint32_t curBlkIdx = 0; - /* - * process blocks of 255 elts - */ - while (totalSize >= UINT8_MAX) - { - const q7_t *curSrc = pSrc; - - arm_small_blk_min_q7(curSrc, UINT8_MAX, pResult, pIndex); - if (*pResult < curBlkExtr) - { - /* - * update partial extrema - */ - curBlkExtr = *pResult; - curBlkPos = *pIndex; - curBlkIdx = curIdx; - } - curIdx++; - pSrc += UINT8_MAX; - totalSize -= UINT8_MAX; - } - /* - * remainder - */ - arm_small_blk_min_q7(pSrc, totalSize, pResult, pIndex); - if (*pResult < curBlkExtr) - { - curBlkExtr = *pResult; - curBlkPos = *pIndex; - curBlkIdx = curIdx; - } - *pIndex = curBlkIdx * UINT8_MAX + curBlkPos; - *pResult = curBlkExtr; - } -} -#else -void arm_min_q7( - const q7_t * pSrc, - uint32_t blockSize, - q7_t * pResult, - uint32_t * pIndex) -{ - q7_t minVal, out; /* Temporary variables to store the output value. */ - uint32_t blkCnt, outIndex; /* Loop counter */ - -#if defined (ARM_MATH_LOOPUNROLL) - uint32_t index; /* index of maximum value */ -#endif - - /* Initialise index value to zero. */ - outIndex = 0U; - /* Load first input value that act as reference value for comparision */ - out = *pSrc++; - -#if defined (ARM_MATH_LOOPUNROLL) - /* Initialise index of maximum value. */ - index = 0U; - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = (blockSize - 1U) >> 2U; - - while (blkCnt > 0U) - { - /* Initialize minVal to next consecutive values one by one */ - minVal = *pSrc++; - - /* compare for the minimum value */ - if (out > minVal) - { - /* Update the minimum value and it's index */ - out = minVal; - outIndex = index + 1U; - } - - minVal = *pSrc++; - if (out > minVal) - { - out = minVal; - outIndex = index + 2U; - } - - minVal = *pSrc++; - if (out > minVal) - { - out = minVal; - outIndex = index + 3U; - } - - minVal = *pSrc++; - if (out > minVal) - { - out = minVal; - outIndex = index + 4U; - } - - index += 4U; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = (blockSize - 1U) % 4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = (blockSize - 1U); - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* Initialize minVal to the next consecutive values one by one */ - minVal = *pSrc++; - - /* compare for the minimum value */ - if (out > minVal) - { - /* Update the minimum value and it's index */ - out = minVal; - outIndex = blockSize - blkCnt; - } - - /* Decrement loop counter */ - blkCnt--; - } - - /* Store the minimum value and it's index into destination pointers */ - *pResult = out; - *pIndex = outIndex; -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @} end of Min group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_min_q7.c
+ * Description: Minimum value of a Q7 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+
+/**
+ @addtogroup Min
+ @{
+ */
+
+/**
+ @brief Minimum value of a Q7 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult minimum value returned here
+ @param[out] pIndex index of minimum value returned here
+ @return none
+ */
+
+void arm_min_q7(
+ const q7_t * pSrc,
+ uint32_t blockSize,
+ q7_t * pResult,
+ uint32_t * pIndex)
+{
+ q7_t minVal, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* Loop counter */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ uint32_t index; /* index of maximum value */
+#endif
+
+ /* Initialise index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+ /* Initialise index of maximum value. */
+ index = 0U;
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to next consecutive values one by one */
+ minVal = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal;
+ outIndex = index + 1U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 2U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 3U;
+ }
+
+ minVal = *pSrc++;
+ if (out > minVal)
+ {
+ out = minVal;
+ outIndex = index + 4U;
+ }
+
+ index += 4U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store the minimum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ @} end of Min group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f32.c index ce12e57..abf444b 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f32.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f32.c @@ -1,229 +1,175 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_power_f32.c - * Description: Sum of the squares of the elements of a floating-point vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @defgroup power Power - - Calculates the sum of the squares of the elements in the input vector. - The underlying algorithm is used: - - <pre> - Result = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + pSrc[2] * pSrc[2] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]; - </pre> - - There are separate functions for floating point, Q31, Q15, and Q7 data types. - - Since the result is not divided by the length, those functions are in fact computing - something which is more an energy than a power. - - */ - -/** - @addtogroup power - @{ - */ - -/** - @brief Sum of the squares of the elements of a floating-point vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult sum of the squares value returned here - @return none - */ -#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) - -#include "arm_helium_utils.h" - -void arm_power_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult) -{ - uint32_t blkCnt; /* loop counters */ - f32x4_t vecSrc; - f32x4_t sumVec = vdupq_n_f32(0.0f); - float32_t sum = 0.0f; - float32_t in; - - /* Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - while (blkCnt > 0U) - { - vecSrc = vldrwq_f32(pSrc); - /* - * sum lanes - */ - sumVec = vfmaq(sumVec, vecSrc, vecSrc); - - blkCnt --; - pSrc += 4; - } - sum = vecAddAcrossF32Mve(sumVec); - - /* - * tail - */ - blkCnt = blockSize & 0x3; - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - /* Compute Power and store result in a temporary variable, sum. */ - in = *pSrc++; - sum += in * in; - - /* Decrement loop counter */ - blkCnt--; - } - - *pResult = sum; -} -#else -#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) -void arm_power_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult) -{ - float32_t sum = 0.0f; /* accumulator */ - float32_t in; /* Temporary variable to store input value */ - uint32_t blkCnt; /* loop counter */ - - float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */ - float32x2_t sumV2; - float32x4_t inV; - - blkCnt = blockSize >> 2U; - - /* Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ - /* Compute Power and then store the result in a temporary variable, sum. */ - inV = vld1q_f32(pSrc); - sumV = vmlaq_f32(sumV, inV, inV); - pSrc += 4; - - /* Decrement the loop counter */ - blkCnt--; - } - sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV)); - sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1); - - /* If the blockSize is not a multiple of 4, compute any remaining output samples here. - ** No loop unrolling is used. */ - blkCnt = blockSize % 0x4U; - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ - /* compute power and then store the result in a temporary variable, sum. */ - in = *pSrc++; - sum += in * in; - - /* Decrement the loop counter */ - blkCnt--; - } - - /* Store the result to the destination */ - *pResult = sum; -} -#else -void arm_power_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult) -{ - uint32_t blkCnt; /* Loop counter */ - float32_t sum = 0.0f; /* Temporary result storage */ - float32_t in; /* Temporary variable to store input value */ - -#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - /* Compute Power and store result in a temporary variable, sum. */ - in = *pSrc++; - sum += in * in; - - in = *pSrc++; - sum += in * in; - - in = *pSrc++; - sum += in * in; - - in = *pSrc++; - sum += in * in; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = blockSize % 0x4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - /* Compute Power and store result in a temporary variable, sum. */ - in = *pSrc++; - sum += in * in; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Store result to destination */ - *pResult = sum; -} -#endif /* #if defined(ARM_MATH_NEON) */ -#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ - -/** - @} end of power group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_power_f32.c
+ * Description: Sum of the squares of the elements of a floating-point vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @defgroup power Power
+
+ Calculates the sum of the squares of the elements in the input vector.
+ The underlying algorithm is used:
+
+ <pre>
+ Result = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + pSrc[2] * pSrc[2] + ... + pSrc[blockSize-1] * pSrc[blockSize-1];
+ </pre>
+
+ There are separate functions for floating point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ @addtogroup power
+ @{
+ */
+
+/**
+ @brief Sum of the squares of the elements of a floating-point vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult sum of the squares value returned here
+ @return none
+ */
+#if defined(ARM_MATH_NEON)
+void arm_power_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t sum = 0.0f; /* accumulator */
+ float32_t in; /* Temporary variable to store input value */
+ uint32_t blkCnt; /* loop counter */
+
+ float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */
+ float32x2_t sumV2;
+ float32x4_t inV;
+
+ blkCnt = blockSize >> 2U;
+
+ /* Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ inV = vld1q_f32(pSrc);
+ sumV = vmlaq_f32(sumV, inV, inV);
+ pSrc += 4;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+ sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
+ sum = sumV2[0] + sumV2[1];
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* compute power and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Store the result to the destination */
+ *pResult = sum;
+}
+#else
+void arm_power_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ float32_t sum = 0.0f; /* Temporary result storage */
+ float32_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute Power and store result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in * in;
+
+ in = *pSrc++;
+ sum += in * in;
+
+ in = *pSrc++;
+ sum += in * in;
+
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute Power and store result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store result to destination */
+ *pResult = sum;
+}
+#endif /* #if defined(ARM_MATH_NEON) */
+
+/**
+ @} end of power group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q15.c index 2c47f0b..62e567f 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q15.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q15.c @@ -1,177 +1,132 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_power_q15.c - * Description: Sum of the squares of the elements of a Q15 vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @addtogroup power - @{ - */ - -/** - @brief Sum of the squares of the elements of a Q15 vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult sum of the squares value returned here - @return none - - @par Scaling and Overflow Behavior - The function is implemented using a 64-bit internal accumulator. - The input is represented in 1.15 format. - Intermediate multiplication yields a 2.30 format, and this - result is added without saturation to a 64-bit accumulator in 34.30 format. - With 33 guard bits in the accumulator, there is no risk of overflow, and the - full precision of the intermediate multiplication is preserved. - Finally, the return result is in 34.30 format. - */ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - -void arm_power_q15( - const q15_t * pSrc, - uint32_t blockSize, - q63_t * pResult) -{ - uint32_t blkCnt; /* loop counters */ - q15x8_t vecSrc; - q63_t sum = 0LL; - q15_t in; - - /* Compute 8 outputs at a time */ - blkCnt = blockSize >> 3U; - while (blkCnt > 0U) - { - vecSrc = vldrhq_s16(pSrc); - /* - * sum lanes - */ - sum = vmlaldavaq(sum, vecSrc, vecSrc); - - blkCnt --; - pSrc += 8; - } - - /* - * tail - */ - blkCnt = blockSize & 0x7; - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - /* Compute Power and store result in a temporary variable, sum. */ - in = *pSrc++; - sum += ((q31_t) in * in); - - /* Decrement loop counter */ - blkCnt--; - } - - *pResult = sum; -} -#else -void arm_power_q15( - const q15_t * pSrc, - uint32_t blockSize, - q63_t * pResult) -{ - uint32_t blkCnt; /* Loop counter */ - q63_t sum = 0; /* Temporary result storage */ - q15_t in; /* Temporary variable to store input value */ - -#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP) - q31_t in32; /* Temporary variable to store packed input value */ -#endif - -#if defined (ARM_MATH_LOOPUNROLL) - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - /* Compute Power and store result in a temporary variable, sum. */ -#if defined (ARM_MATH_DSP) - in32 = read_q15x2_ia (&pSrc); - sum = __SMLALD(in32, in32, sum); - - in32 = read_q15x2_ia (&pSrc); - sum = __SMLALD(in32, in32, sum); -#else - in = *pSrc++; - sum += ((q31_t) in * in); - - in = *pSrc++; - sum += ((q31_t) in * in); - - in = *pSrc++; - sum += ((q31_t) in * in); - - in = *pSrc++; - sum += ((q31_t) in * in); -#endif /* #if defined (ARM_MATH_DSP) */ - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = blockSize % 0x4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - /* Compute Power and store result in a temporary variable, sum. */ - in = *pSrc++; - sum += ((q31_t) in * in); - - /* Decrement loop counter */ - blkCnt--; - } - - /* Store result in 34.30 format */ - *pResult = sum; -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @} end of power group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_power_q15.c
+ * Description: Sum of the squares of the elements of a Q15 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup power
+ @{
+ */
+
+/**
+ @brief Sum of the squares of the elements of a Q15 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult sum of the squares value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 64-bit internal accumulator.
+ The input is represented in 1.15 format.
+ Intermediate multiplication yields a 2.30 format, and this
+ result is added without saturation to a 64-bit accumulator in 34.30 format.
+ With 33 guard bits in the accumulator, there is no risk of overflow, and the
+ full precision of the intermediate multiplication is preserved.
+ Finally, the return result is in 34.30 format.
+ */
+
+void arm_power_q15(
+ const q15_t * pSrc,
+ uint32_t blockSize,
+ q63_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q63_t sum = 0; /* Temporary result storage */
+ q15_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
+ q31_t in32; /* Temporary variable to store packed input value */
+#endif
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute Power and store result in a temporary variable, sum. */
+#if defined (ARM_MATH_DSP)
+ in32 = read_q15x2_ia ((q15_t **) &pSrc);
+ sum = __SMLALD(in32, in32, sum);
+
+ in32 = read_q15x2_ia ((q15_t **) &pSrc);
+ sum = __SMLALD(in32, in32, sum);
+#else
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute Power and store result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store result in 34.30 format */
+ *pResult = sum;
+}
+
+/**
+ @} end of power group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q31.c index a39b3a7..aa51d0f 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q31.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q31.c @@ -1,165 +1,121 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_power_q31.c - * Description: Sum of the squares of the elements of a Q31 vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @addtogroup power - @{ - */ - -/** - @brief Sum of the squares of the elements of a Q31 vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult sum of the squares value returned here - @return none - - @par Scaling and Overflow Behavior - The function is implemented using a 64-bit internal accumulator. - The input is represented in 1.31 format. - Intermediate multiplication yields a 2.62 format, and this - result is truncated to 2.48 format by discarding the lower 14 bits. - The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format. - With 15 guard bits in the accumulator, there is no risk of overflow, and the - full precision of the intermediate multiplication is preserved. - Finally, the return result is in 16.48 format. - */ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) -void arm_power_q31( - const q31_t * pSrc, - uint32_t blockSize, - q63_t * pResult) -{ - uint32_t blkCnt; /* loop counters */ - q31x4_t vecSrc; - q63_t sum = 0LL; - q31_t in; - - /* Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - while (blkCnt > 0U) - { - vecSrc = vldrwq_s32(pSrc); - /* - * sum lanes - */ - sum = vrmlaldavhaq(sum, vecSrc, vecSrc); - - blkCnt --; - pSrc += 4; - } - - /* - * tail - */ - blkCnt = blockSize & 0x3; - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - /* Compute Power and store result in a temporary variable, sum. */ - in = *pSrc++; - sum += ((q63_t) in * in) >> 8; - - /* Decrement loop counter */ - blkCnt--; - } - - *pResult = asrl(sum, 6); -} -#else -void arm_power_q31( - const q31_t * pSrc, - uint32_t blockSize, - q63_t * pResult) -{ - uint32_t blkCnt; /* Loop counter */ - q63_t sum = 0; /* Temporary result storage */ - q31_t in; /* Temporary variable to store input value */ - -#if defined (ARM_MATH_LOOPUNROLL) - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - /* Compute Power then shift intermediate results by 14 bits to maintain 16.48 format and store result in a temporary variable sum, providing 15 guard bits. */ - in = *pSrc++; - sum += ((q63_t) in * in) >> 14U; - - in = *pSrc++; - sum += ((q63_t) in * in) >> 14U; - - in = *pSrc++; - sum += ((q63_t) in * in) >> 14U; - - in = *pSrc++; - sum += ((q63_t) in * in) >> 14U; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = blockSize % 0x4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - /* Compute Power and store result in a temporary variable, sum. */ - in = *pSrc++; - sum += ((q63_t) in * in) >> 14U; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Store results in 16.48 format */ - *pResult = sum; -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @} end of power group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_power_q31.c
+ * Description: Sum of the squares of the elements of a Q31 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup power
+ @{
+ */
+
+/**
+ @brief Sum of the squares of the elements of a Q31 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult sum of the squares value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 64-bit internal accumulator.
+ The input is represented in 1.31 format.
+ Intermediate multiplication yields a 2.62 format, and this
+ result is truncated to 2.48 format by discarding the lower 14 bits.
+ The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.
+ With 15 guard bits in the accumulator, there is no risk of overflow, and the
+ full precision of the intermediate multiplication is preserved.
+ Finally, the return result is in 16.48 format.
+ */
+
+void arm_power_q31(
+ const q31_t * pSrc,
+ uint32_t blockSize,
+ q63_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q63_t sum = 0; /* Temporary result storage */
+ q31_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute Power then shift intermediate results by 14 bits to maintain 16.48 format and store result in a temporary variable sum, providing 15 guard bits. */
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14U;
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14U;
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14U;
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute Power and store result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14U;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store results in 16.48 format */
+ *pResult = sum;
+}
+
+/**
+ @} end of power group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q7.c index 0545f7c..eefa8ba 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q7.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q7.c @@ -1,180 +1,136 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_power_q7.c - * Description: Sum of the squares of the elements of a Q7 vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @addtogroup power - @{ - */ - -/** - @brief Sum of the squares of the elements of a Q7 vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult sum of the squares value returned here - @return none - - @par Scaling and Overflow Behavior - The function is implemented using a 32-bit internal accumulator. - The input is represented in 1.7 format. - Intermediate multiplication yields a 2.14 format, and this - result is added without saturation to an accumulator in 18.14 format. - With 17 guard bits in the accumulator, there is no risk of overflow, and the - full precision of the intermediate multiplication is preserved. - Finally, the return result is in 18.14 format. - */ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) -void arm_power_q7( - const q7_t * pSrc, - uint32_t blockSize, - q31_t * pResult) -{ - uint32_t blkCnt; /* loop counters */ - q7x16_t vecSrc; - q31_t sum = 0LL; - q7_t in; - - /* Compute 16 outputs at a time */ - blkCnt = blockSize >> 4U; - while (blkCnt > 0U) - { - vecSrc = vldrbq_s8(pSrc); - /* - * sum lanes - */ - sum = vmladavaq(sum, vecSrc, vecSrc); - - blkCnt--; - pSrc += 16; - } - - /* - * tail - */ - blkCnt = blockSize & 0xF; - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - /* Compute Power and store result in a temporary variable, sum. */ - in = *pSrc++; - sum += ((q15_t) in * in); - - /* Decrement loop counter */ - blkCnt--; - } - - *pResult = sum; -} -#else -void arm_power_q7( - const q7_t * pSrc, - uint32_t blockSize, - q31_t * pResult) -{ - uint32_t blkCnt; /* Loop counter */ - q31_t sum = 0; /* Temporary result storage */ - q7_t in; /* Temporary variable to store input value */ - -#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP) - q31_t in32; /* Temporary variable to store packed input value */ - q31_t in1, in2; /* Temporary variables to store input value */ -#endif - -#if defined (ARM_MATH_LOOPUNROLL) - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - /* Compute Power and store result in a temporary variable, sum. */ -#if defined (ARM_MATH_DSP) - in32 = read_q7x4_ia (&pSrc); - - in1 = __SXTB16(__ROR(in32, 8)); - in2 = __SXTB16(in32); - - /* calculate power and accumulate to accumulator */ - sum = __SMLAD(in1, in1, sum); - sum = __SMLAD(in2, in2, sum); -#else - in = *pSrc++; - sum += ((q15_t) in * in); - - in = *pSrc++; - sum += ((q15_t) in * in); - - in = *pSrc++; - sum += ((q15_t) in * in); - - in = *pSrc++; - sum += ((q15_t) in * in); -#endif /* #if defined (ARM_MATH_DSP) */ - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = blockSize % 0x4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - /* Compute Power and store result in a temporary variable, sum. */ - in = *pSrc++; - sum += ((q15_t) in * in); - - /* Decrement loop counter */ - blkCnt--; - } - - /* Store result in 18.14 format */ - *pResult = sum; -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @} end of power group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_power_q7.c
+ * Description: Sum of the squares of the elements of a Q7 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup power
+ @{
+ */
+
+/**
+ @brief Sum of the squares of the elements of a Q7 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult sum of the squares value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 32-bit internal accumulator.
+ The input is represented in 1.7 format.
+ Intermediate multiplication yields a 2.14 format, and this
+ result is added without saturation to an accumulator in 18.14 format.
+ With 17 guard bits in the accumulator, there is no risk of overflow, and the
+ full precision of the intermediate multiplication is preserved.
+ Finally, the return result is in 18.14 format.
+ */
+
+void arm_power_q7(
+ const q7_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q31_t sum = 0; /* Temporary result storage */
+ q7_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
+ q31_t in32; /* Temporary variable to store packed input value */
+ q31_t in1, in2; /* Temporary variables to store input value */
+#endif
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute Power and store result in a temporary variable, sum. */
+#if defined (ARM_MATH_DSP)
+ in32 = read_q7x4_ia ((q7_t **) &pSrc);
+
+ in1 = __SXTB16(__ROR(in32, 8));
+ in2 = __SXTB16(in32);
+
+ /* calculate power and accumulate to accumulator */
+ sum = __SMLAD(in1, in1, sum);
+ sum = __SMLAD(in2, in2, sum);
+#else
+ in = *pSrc++;
+ sum += ((q15_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q15_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q15_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q15_t) in * in);
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute Power and store result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += ((q15_t) in * in);
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Store result in 18.14 format */
+ *pResult = sum;
+}
+
+/**
+ @} end of power group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f32.c index cb45752..4f316fb 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f32.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f32.c @@ -1,192 +1,176 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_rms_f32.c - * Description: Root mean square value of the elements of a floating-point vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @defgroup RMS Root mean square (RMS) - - Calculates the Root Mean Square of the elements in the input vector. - The underlying algorithm is used: - - <pre> - Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize)); - </pre> - - There are separate functions for floating point, Q31, and Q15 data types. - */ - -/** - @addtogroup RMS - @{ - */ - -/** - @brief Root Mean Square of the elements of a floating-point vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult root mean square value returned here - @return none - */ - -#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) -void arm_rms_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult) -{ - float32_t pow = 0.0f; - - arm_power_f32(pSrc, blockSize, &pow); - - /* Compute Rms and store the result in the destination */ - arm_sqrt_f32(pow / (float32_t) blockSize, pResult); -} -#else -#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) -void arm_rms_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult) -{ - float32_t sum = 0.0f; /* accumulator */ - float32_t in; /* Temporary variable to store input value */ - uint32_t blkCnt; /* loop counter */ - - float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */ - float32x2_t sumV2; - float32x4_t inV; - - blkCnt = blockSize >> 2U; - - /* Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ - /* Compute Power and then store the result in a temporary variable, sum. */ - inV = vld1q_f32(pSrc); - sumV = vmlaq_f32(sumV, inV, inV); - pSrc += 4; - - /* Decrement the loop counter */ - blkCnt--; - } - - sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV)); - sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1); - - /* If the blockSize is not a multiple of 4, compute any remaining output samples here. - ** No loop unrolling is used. */ - blkCnt = blockSize % 0x4U; - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ - /* compute power and then store the result in a temporary variable, sum. */ - in = *pSrc++; - sum += in * in; - - /* Decrement the loop counter */ - blkCnt--; - } - - /* Compute Rms and store the result in the destination */ - arm_sqrt_f32(sum / (float32_t) blockSize, pResult); -} -#else -void arm_rms_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult) -{ - uint32_t blkCnt; /* Loop counter */ - float32_t sum = 0.0f; /* Temporary result storage */ - float32_t in; /* Temporary variable to store input value */ - -#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - in = *pSrc++; - /* Compute sum of squares and store result in a temporary variable, sum. */ - sum += in * in; - - in = *pSrc++; - sum += in * in; - - in = *pSrc++; - sum += in * in; - - in = *pSrc++; - sum += in * in; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = blockSize % 0x4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - in = *pSrc++; - /* Compute sum of squares and store result in a temporary variable. */ - sum += ( in * in); - - /* Decrement loop counter */ - blkCnt--; - } - - /* Compute Rms and store result in destination */ - arm_sqrt_f32(sum / (float32_t) blockSize, pResult); -} -#endif /* #if defined(ARM_MATH_NEON) */ -#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ - -/** - @} end of RMS group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_rms_f32.c
+ * Description: Root mean square value of the elements of a floating-point vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @defgroup RMS Root mean square (RMS)
+
+ Calculates the Root Mean Square of the elements in the input vector.
+ The underlying algorithm is used:
+
+ <pre>
+ Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize));
+ </pre>
+
+ There are separate functions for floating point, Q31, and Q15 data types.
+ */
+
+/**
+ @addtogroup RMS
+ @{
+ */
+
+/**
+ @brief Root Mean Square of the elements of a floating-point vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult root mean square value returned here
+ @return none
+ */
+#if defined(ARM_MATH_NEON)
+void arm_rms_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t sum = 0.0f; /* accumulator */
+ float32_t in; /* Temporary variable to store input value */
+ uint32_t blkCnt; /* loop counter */
+
+ float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */
+ float32x2_t sumV2;
+ float32x4_t inV;
+
+ blkCnt = blockSize >> 2U;
+
+ /* Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ inV = vld1q_f32(pSrc);
+ sumV = vmlaq_f32(sumV, inV, inV);
+ pSrc += 4;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
+ sum = sumV2[0] + sumV2[1];
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* compute power and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Rms and store the result in the destination */
+ arm_sqrt_f32(sum / (float32_t) blockSize, pResult);
+}
+#else
+void arm_rms_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ float32_t sum = 0.0f; /* Temporary result storage */
+ float32_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable, sum. */
+ sum += in * in;
+
+ in = *pSrc++;
+ sum += in * in;
+
+ in = *pSrc++;
+ sum += in * in;
+
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable. */
+ sum += ( in * in);
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Rms and store result in destination */
+ arm_sqrt_f32(sum / (float32_t) blockSize, pResult);
+}
+#endif /* #if defined(ARM_MATH_NEON) */
+
+/**
+ @} end of RMS group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q15.c index 1df17b1..c8a0a13 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q15.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q15.c @@ -1,149 +1,134 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_rms_q15.c - * Description: Root Mean Square of the elements of a Q15 vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @addtogroup RMS - @{ - */ - -/** - @brief Root Mean Square of the elements of a Q15 vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult root mean square value returned here - @return none - - @par Scaling and Overflow Behavior - The function is implemented using a 64-bit internal accumulator. - The input is represented in 1.15 format. - Intermediate multiplication yields a 2.30 format, and this - result is added without saturation to a 64-bit accumulator in 34.30 format. - With 33 guard bits in the accumulator, there is no risk of overflow, and the - full precision of the intermediate multiplication is preserved. - Finally, the 34.30 result is truncated to 34.15 format by discarding the lower - 15 bits, and then saturated to yield a result in 1.15 format. - */ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) -void arm_rms_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult) -{ - q63_t pow = 0.0f; - q15_t normalizedPower; - - arm_power_q15(pSrc, blockSize, &pow); - - normalizedPower=__SSAT((pow / (q63_t) blockSize) >> 15,16); - arm_sqrt_q15(normalizedPower, pResult); -} -#else -void arm_rms_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult) -{ - uint32_t blkCnt; /* Loop counter */ - q63_t sum = 0; /* Temporary result storage */ - q15_t in; /* Temporary variable to store input value */ - -#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP) - q31_t in32; /* Temporary variable to store input value */ -#endif - -#if defined (ARM_MATH_LOOPUNROLL) - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - /* Compute sum of squares and store result in a temporary variable. */ -#if defined (ARM_MATH_DSP) - in32 = read_q15x2_ia (&pSrc); - sum = __SMLALD(in32, in32, sum); - - in32 = read_q15x2_ia (&pSrc); - sum = __SMLALD(in32, in32, sum); -#else - in = *pSrc++; - sum += ((q31_t) in * in); - - in = *pSrc++; - sum += ((q31_t) in * in); - - in = *pSrc++; - sum += ((q31_t) in * in); - - in = *pSrc++; - sum += ((q31_t) in * in); -#endif /* #if defined (ARM_MATH_DSP) */ - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = blockSize % 0x4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - in = *pSrc++; - /* Compute sum of squares and store result in a temporary variable. */ - sum += ((q31_t) in * in); - - /* Decrement loop counter */ - blkCnt--; - } - - /* Truncating and saturating the accumulator to 1.15 format */ - /* Store result in destination */ - arm_sqrt_q15(__SSAT((sum / (q63_t)blockSize) >> 15, 16), pResult); -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @} end of RMS group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_rms_q15.c
+ * Description: Root Mean Square of the elements of a Q15 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup RMS
+ @{
+ */
+
+/**
+ @brief Root Mean Square of the elements of a Q15 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult root mean square value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 64-bit internal accumulator.
+ The input is represented in 1.15 format.
+ Intermediate multiplication yields a 2.30 format, and this
+ result is added without saturation to a 64-bit accumulator in 34.30 format.
+ With 33 guard bits in the accumulator, there is no risk of overflow, and the
+ full precision of the intermediate multiplication is preserved.
+ Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
+ 15 bits, and then saturated to yield a result in 1.15 format.
+ */
+
+void arm_rms_q15(
+ const q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q63_t sum = 0; /* Temporary result storage */
+ q15_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
+ q31_t in32; /* Temporary variable to store input value */
+#endif
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ /* Compute sum of squares and store result in a temporary variable. */
+#if defined (ARM_MATH_DSP)
+ in32 = read_q15x2_ia ((q15_t **) &pSrc);
+ sum = __SMLALD(in32, in32, sum);
+
+ in32 = read_q15x2_ia ((q15_t **) &pSrc);
+ sum = __SMLALD(in32, in32, sum);
+#else
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable. */
+ sum += ((q31_t) in * in);
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Truncating and saturating the accumulator to 1.15 format */
+ /* Store result in destination */
+ arm_sqrt_q15(__SSAT((sum / (q63_t)blockSize) >> 15, 16), pResult);
+}
+
+/**
+ @} end of RMS group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q31.c index ba39b7b..4cf086f 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q31.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q31.c @@ -1,141 +1,124 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_rms_q31.c - * Description: Root Mean Square of the elements of a Q31 vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @addtogroup RMS - @{ - */ - -/** - @brief Root Mean Square of the elements of a Q31 vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult root mean square value returned here - @return none - - @par Scaling and Overflow Behavior - The function is implemented using an internal 64-bit accumulator. - The input is represented in 1.31 format, and intermediate multiplication - yields a 2.62 format. - The accumulator maintains full precision of the intermediate multiplication results, - but provides only a single guard bit. - There is no saturation on intermediate additions. - If the accumulator overflows, it wraps around and distorts the result. - In order to avoid overflows completely, the input signal must be scaled down by - log2(blockSize) bits, as a total of blockSize additions are performed internally. - Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value. - */ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - -void arm_rms_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult) -{ - q63_t pow = 0.0f; - q31_t normalizedPower; - arm_power_q31(pSrc, blockSize, &pow); - - normalizedPower=clip_q63_to_q31((pow / (q63_t) blockSize) >> 17); - arm_sqrt_q31(normalizedPower, pResult); - -} - -#else -void arm_rms_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult) -{ - uint32_t blkCnt; /* Loop counter */ - uint64_t sum = 0; /* Temporary result storage (can get never negative. changed type from q63 to uint64 */ - q31_t in; /* Temporary variable to store input value */ - -#if defined (ARM_MATH_LOOPUNROLL) - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - in = *pSrc++; - /* Compute sum of squares and store result in a temporary variable, sum. */ - sum += ((q63_t) in * in); - - in = *pSrc++; - sum += ((q63_t) in * in); - - in = *pSrc++; - sum += ((q63_t) in * in); - - in = *pSrc++; - sum += ((q63_t) in * in); - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = blockSize % 0x4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - - in = *pSrc++; - /* Compute sum of squares and store result in a temporary variable. */ - sum += ((q63_t) in * in); - - /* Decrement loop counter */ - blkCnt--; - } - - /* Convert data in 2.62 to 1.31 by 31 right shifts and saturate */ - /* Compute Rms and store result in destination vector */ - arm_sqrt_q31(clip_q63_to_q31((sum / (q63_t) blockSize) >> 31), pResult); -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @} end of RMS group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_rms_q31.c
+ * Description: Root Mean Square of the elements of a Q31 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup RMS
+ @{
+ */
+
+/**
+ @brief Root Mean Square of the elements of a Q31 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult root mean square value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using an internal 64-bit accumulator.
+ The input is represented in 1.31 format, and intermediate multiplication
+ yields a 2.62 format.
+ The accumulator maintains full precision of the intermediate multiplication results,
+ but provides only a single guard bit.
+ There is no saturation on intermediate additions.
+ If the accumulator overflows, it wraps around and distorts the result.
+ In order to avoid overflows completely, the input signal must be scaled down by
+ log2(blockSize) bits, as a total of blockSize additions are performed internally.
+ Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.
+ */
+
+void arm_rms_q31(
+ const q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ uint64_t sum = 0; /* Temporary result storage (can get never negative. changed type from q63 to uint64 */
+ q31_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable, sum. */
+ sum += ((q63_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in);
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in);
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable. */
+ sum += ((q63_t) in * in);
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Convert data in 2.62 to 1.31 by 31 right shifts and saturate */
+ /* Compute Rms and store result in destination vector */
+ arm_sqrt_q31(clip_q63_to_q31((sum / (q63_t) blockSize) >> 31), pResult);
+}
+
+/**
+ @} end of RMS group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f32.c index 682443d..214a794 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f32.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f32.c @@ -1,83 +1,188 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_std_f32.c - * Description: Standard deviation of the elements of a floating-point vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @defgroup STD Standard deviation - - Calculates the standard deviation of the elements in the input vector. - - The float implementation is relying on arm_var_f32 which is using a two-pass algorithm - to avoid problem of numerical instabilities and cancellation errors. - - Fixed point versions are using the standard textbook algorithm since the fixed point - numerical behavior is different from the float one. - - Algorithm for fixed point versions is summarized below: - - - <pre> - Result = sqrt((sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1)) - - sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1] - sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1] - </pre> - - There are separate functions for floating point, Q31, and Q15 data types. - */ - -/** - @addtogroup STD - @{ - */ - -/** - @brief Standard deviation of the elements of a floating-point vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult standard deviation value returned here - @return none - */ -void arm_std_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult) -{ - float32_t var; - arm_var_f32(pSrc,blockSize,&var); - arm_sqrt_f32(var, pResult); -} - -/** - @} end of STD group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_std_f32.c
+ * Description: Standard deviation of the elements of a floating-point vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @defgroup STD Standard deviation
+
+ Calculates the standard deviation of the elements in the input vector.
+ The underlying algorithm is used:
+
+ <pre>
+ Result = sqrt((sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1))
+
+ sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]
+ sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]
+ </pre>
+
+ There are separate functions for floating point, Q31, and Q15 data types.
+ */
+
+/**
+ @addtogroup STD
+ @{
+ */
+
+/**
+ @brief Standard deviation of the elements of a floating-point vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult standard deviation value returned here
+ @return none
+ */
+#if defined(ARM_MATH_NEON_EXPERIMENTAL)
+void arm_std_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t var;
+ arm_var_f32(pSrc,blockSize,&var);
+ arm_sqrt_f32(var, pResult);
+}
+#else
+void arm_std_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ float32_t sum = 0.0f; /* Temporary result storage */
+ float32_t sumOfSquares = 0.0f; /* Sum of squares */
+ float32_t in; /* Temporary variable to store input value */
+
+#ifndef ARM_MATH_CM0_FAMILY
+ float32_t meanOfSquares, mean, squareOfMean; /* Temporary variables */
+#else
+ float32_t squareOfSum; /* Square of Sum */
+ float32_t var; /* Temporary varaince storage */
+#endif
+
+ if (blockSize <= 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ sumOfSquares += in * in;
+ /* Compute sum and store result in a temporary variable, sum. */
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += in * in;
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += in * in;
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += in * in;
+ sum += in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ sumOfSquares += ( in * in);
+ /* Compute sum and store result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+#ifndef ARM_MATH_CM0_FAMILY
+
+ /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
+ meanOfSquares = sumOfSquares / ((float32_t) blockSize - 1.0f);
+
+ /* Compute mean of all input values */
+ mean = sum / (float32_t) blockSize;
+
+ /* Compute square of mean */
+ squareOfMean = (mean * mean) * (((float32_t) blockSize) /
+ ((float32_t) blockSize - 1.0f));
+
+ /* Compute standard deviation and store result to destination */
+ arm_sqrt_f32((meanOfSquares - squareOfMean), pResult);
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ /* Compute square of sum */
+ squareOfSum = ((sum * sum) / (float32_t) blockSize);
+
+ /* Compute variance */
+ var = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f));
+
+ /* Compute standard deviation and store result in destination */
+ arm_sqrt_f32(var, pResult);
+
+#endif /* #ifndef ARM_MATH_CM0_FAMILY */
+
+}
+#endif /* #if defined(ARM_MATH_NEON) */
+
+/**
+ @} end of STD group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q15.c index 88e273a..70d142d 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q15.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q15.c @@ -1,173 +1,161 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_std_q15.c - * Description: Standard deviation of an array of Q15 vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @addtogroup STD - @{ - */ - -/** - @brief Standard deviation of the elements of a Q15 vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult standard deviation value returned here - @return none - - @par Scaling and Overflow Behavior - The function is implemented using a 64-bit internal accumulator. - The input is represented in 1.15 format. - Intermediate multiplication yields a 2.30 format, and this - result is added without saturation to a 64-bit accumulator in 34.30 format. - With 33 guard bits in the accumulator, there is no risk of overflow, and the - full precision of the intermediate multiplication is preserved. - Finally, the 34.30 result is truncated to 34.15 format by discarding the lower - 15 bits, and then saturated to yield a result in 1.15 format. - */ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) -void arm_std_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult) -{ - q15_t var=0; - - arm_var_q15(pSrc, blockSize, &var); - arm_sqrt_q15(var,pResult); -} -#else -void arm_std_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult) -{ - uint32_t blkCnt; /* Loop counter */ - q31_t sum = 0; /* Accumulator */ - q31_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */ - q63_t sumOfSquares = 0; /* Sum of squares */ - q15_t in; /* Temporary variable to store input value */ - -#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP) - q31_t in32; /* Temporary variable to store input value */ -#endif - - if (blockSize <= 1U) - { - *pResult = 0; - return; - } - -#if defined (ARM_MATH_LOOPUNROLL) - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - /* C = A[0] + A[1] + ... + A[blockSize-1] */ - - /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ - /* Compute sum and store result in a temporary variable, sum. */ -#if defined (ARM_MATH_DSP) - in32 = read_q15x2_ia (&pSrc); - sumOfSquares = __SMLALD(in32, in32, sumOfSquares); - sum += ((in32 << 16U) >> 16U); - sum += (in32 >> 16U); - - in32 = read_q15x2_ia (&pSrc); - sumOfSquares = __SMLALD(in32, in32, sumOfSquares); - sum += ((in32 << 16U) >> 16U); - sum += (in32 >> 16U); -#else - in = *pSrc++; - sumOfSquares += (in * in); - sum += in; - - in = *pSrc++; - sumOfSquares += (in * in); - sum += in; - - in = *pSrc++; - sumOfSquares += (in * in); - sum += in; - - in = *pSrc++; - sumOfSquares += (in * in); - sum += in; -#endif /* #if defined (ARM_MATH_DSP) */ - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = blockSize % 0x4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - /* C = A[0] + A[1] + ... + A[blockSize-1] */ - - in = *pSrc++; - /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ - sumOfSquares += (in * in); - /* Compute sum and store result in a temporary variable, sum. */ - sum += in; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */ - meanOfSquares = (q31_t) (sumOfSquares / (q63_t)(blockSize - 1U)); - - /* Compute square of mean */ - squareOfMean = (q31_t) ((q63_t) sum * sum / (q63_t)(blockSize * (blockSize - 1U))); - - /* mean of squares minus the square of mean. */ - /* Compute standard deviation and store result in destination */ - arm_sqrt_q15(__SSAT((meanOfSquares - squareOfMean) >> 15U, 16U), pResult); -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @} end of STD group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_std_q15.c
+ * Description: Standard deviation of an array of Q15 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup STD
+ @{
+ */
+
+/**
+ @brief Standard deviation of the elements of a Q15 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult standard deviation value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 64-bit internal accumulator.
+ The input is represented in 1.15 format.
+ Intermediate multiplication yields a 2.30 format, and this
+ result is added without saturation to a 64-bit accumulator in 34.30 format.
+ With 33 guard bits in the accumulator, there is no risk of overflow, and the
+ full precision of the intermediate multiplication is preserved.
+ Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
+ 15 bits, and then saturated to yield a result in 1.15 format.
+ */
+
+void arm_std_q15(
+ const q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q31_t sum = 0; /* Accumulator */
+ q31_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
+ q63_t sumOfSquares = 0; /* Sum of squares */
+ q15_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
+ q31_t in32; /* Temporary variable to store input value */
+#endif
+
+ if (blockSize <= 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ /* Compute sum and store result in a temporary variable, sum. */
+#if defined (ARM_MATH_DSP)
+ in32 = read_q15x2_ia ((q15_t **) &pSrc);
+ sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
+ sum += ((in32 << 16U) >> 16U);
+ sum += (in32 >> 16U);
+
+ in32 = read_q15x2_ia ((q15_t **) &pSrc);
+ sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
+ sum += ((in32 << 16U) >> 16U);
+ sum += (in32 >> 16U);
+#else
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+ sum += in;
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ sumOfSquares += (in * in);
+ /* Compute sum and store result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
+ meanOfSquares = (q31_t) (sumOfSquares / (q63_t)(blockSize - 1U));
+
+ /* Compute square of mean */
+ squareOfMean = (q31_t) ((q63_t) sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
+
+ /* mean of squares minus the square of mean. */
+ /* Compute standard deviation and store result in destination */
+ arm_sqrt_q15(__SSAT((meanOfSquares - squareOfMean) >> 15U, 16U), pResult);
+}
+
+/**
+ @} end of STD group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q31.c index 63170e7..1452a69 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q31.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q31.c @@ -1,159 +1,147 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_std_q31.c - * Description: Standard deviation of the elements of a Q31 vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @addtogroup STD - @{ - */ - -/** - @brief Standard deviation of the elements of a Q31 vector. - @param[in] pSrc points to the input vector. - @param[in] blockSize number of samples in input vector. - @param[out] pResult standard deviation value returned here. - @return none - - @par Scaling and Overflow Behavior - The function is implemented using an internal 64-bit accumulator. - The input is represented in 1.31 format, which is then downshifted by 8 bits - which yields 1.23, and intermediate multiplication yields a 2.46 format. - The accumulator maintains full precision of the intermediate multiplication results, - but provides only a 16 guard bits. - There is no saturation on intermediate additions. - If the accumulator overflows it wraps around and distorts the result. - In order to avoid overflows completely the input signal must be scaled down by - log2(blockSize)-8 bits, as a total of blockSize additions are performed internally. - After division, internal variables should be Q18.46 - Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value. - */ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) -void arm_std_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult) -{ - q31_t var=0; - - arm_var_q31(pSrc, blockSize, &var); - arm_sqrt_q31(var, pResult); -} -#else -void arm_std_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult) -{ - uint32_t blkCnt; /* Loop counter */ - q63_t sum = 0; /* Accumulator */ - q63_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */ - q63_t sumOfSquares = 0; /* Sum of squares */ - q31_t in; /* Temporary variable to store input value */ - - if (blockSize <= 1U) - { - *pResult = 0; - return; - } - -#if defined (ARM_MATH_LOOPUNROLL) - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - /* C = A[0] + A[1] + ... + A[blockSize-1] */ - - in = *pSrc++ >> 8U; - /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ - sumOfSquares += ((q63_t) (in) * (in)); - /* Compute sum and store result in a temporary variable, sum. */ - sum += in; - - in = *pSrc++ >> 8U; - sumOfSquares += ((q63_t) (in) * (in)); - sum += in; - - in = *pSrc++ >> 8U; - sumOfSquares += ((q63_t) (in) * (in)); - sum += in; - - in = *pSrc++ >> 8U; - sumOfSquares += ((q63_t) (in) * (in)); - sum += in; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = blockSize % 0x4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - /* C = A[0] + A[1] + ... + A[blockSize-1] */ - - in = *pSrc++ >> 8U; - /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ - sumOfSquares += ((q63_t) (in) * (in)); - /* Compute sum and store result in a temporary variable, sum. */ - sum += in; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */ - meanOfSquares = (sumOfSquares / (q63_t)(blockSize - 1U)); - - /* Compute square of mean */ - squareOfMean = ( sum * sum / (q63_t)(blockSize * (blockSize - 1U))); - - /* Compute standard deviation and store result in destination */ - arm_sqrt_q31((meanOfSquares - squareOfMean) >> 15U, pResult); -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @} end of STD group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_std_q31.c
+ * Description: Standard deviation of the elements of a Q31 vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup STD
+ @{
+ */
+
+/**
+ @brief Standard deviation of the elements of a Q31 vector.
+ @param[in] pSrc points to the input vector.
+ @param[in] blockSize number of samples in input vector.
+ @param[out] pResult standard deviation value returned here.
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using an internal 64-bit accumulator.
+ The input is represented in 1.31 format, which is then downshifted by 8 bits
+ which yields 1.23, and intermediate multiplication yields a 2.46 format.
+ The accumulator maintains full precision of the intermediate multiplication results,
+ but provides only a 16 guard bits.
+ There is no saturation on intermediate additions.
+ If the accumulator overflows it wraps around and distorts the result.
+ In order to avoid overflows completely the input signal must be scaled down by
+ log2(blockSize)-8 bits, as a total of blockSize additions are performed internally.
+ After division, internal variables should be Q18.46
+ Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value.
+ */
+
+void arm_std_q31(
+ const q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q63_t sum = 0; /* Accumulator */
+ q63_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
+ q63_t sumOfSquares = 0; /* Sum of squares */
+ q31_t in; /* Temporary variable to store input value */
+
+ if (blockSize <= 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ in = *pSrc++ >> 8U;
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ sumOfSquares += ((q63_t) (in) * (in));
+ /* Compute sum and store result in a temporary variable, sum. */
+ sum += in;
+
+ in = *pSrc++ >> 8U;
+ sumOfSquares += ((q63_t) (in) * (in));
+ sum += in;
+
+ in = *pSrc++ >> 8U;
+ sumOfSquares += ((q63_t) (in) * (in));
+ sum += in;
+
+ in = *pSrc++ >> 8U;
+ sumOfSquares += ((q63_t) (in) * (in));
+ sum += in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ in = *pSrc++ >> 8U;
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ sumOfSquares += ((q63_t) (in) * (in));
+ /* Compute sum and store result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
+ meanOfSquares = (sumOfSquares / (q63_t)(blockSize - 1U));
+
+ /* Compute square of mean */
+ squareOfMean = ( sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
+
+ /* Compute standard deviation and store result in destination */
+ arm_sqrt_q31((meanOfSquares - squareOfMean) >> 15U, pResult);
+}
+
+/**
+ @} end of STD group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f32.c index 7ff344c..18f5806 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f32.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f32.c @@ -1,293 +1,234 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_var_f32.c - * Description: Variance of the elements of a floating-point vector - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @defgroup variance Variance - - Calculates the variance of the elements in the input vector. - The underlying algorithm used is the direct method sometimes referred to as the two-pass method: - - <pre> - Result = sum(element - meanOfElements)^2) / numElement - 1 - - meanOfElements = ( pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] ) / blockSize - </pre> - - There are separate functions for floating point, Q31, and Q15 data types. - */ - -/** - @addtogroup variance - @{ - */ - -/** - @brief Variance of the elements of a floating-point vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult variance value returned here - @return none - */ -#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) - -#include "arm_helium_utils.h" - -void arm_var_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult) -{ - uint32_t blkCnt; /* loop counters */ - f32x4_t vecSrc; - f32x4_t sumVec = vdupq_n_f32(0.0f); - float32_t fMean; - float32_t sum = 0.0f; /* accumulator */ - float32_t in; /* Temporary variable to store input value */ - - if (blockSize <= 1U) { - *pResult = 0; - return; - } - - arm_mean_f32(pSrc, blockSize, &fMean); - - /* Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - while (blkCnt > 0U) - { - - vecSrc = vldrwq_f32(pSrc); - /* - * sum lanes - */ - vecSrc = vsubq(vecSrc, fMean); - sumVec = vfmaq(sumVec, vecSrc, vecSrc); - - blkCnt --; - pSrc += 4; - } - - sum = vecAddAcrossF32Mve(sumVec); - - /* - * tail - */ - blkCnt = blockSize & 0x3; - while (blkCnt > 0U) - { - in = *pSrc++ - fMean; - sum += in * in; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Variance */ - *pResult = sum / (float32_t) (blockSize - 1); -} -#else -#if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE) -void arm_var_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult) -{ - float32_t mean; - - float32_t sum = 0.0f; /* accumulator */ - float32_t in; /* Temporary variable to store input value */ - uint32_t blkCnt; /* loop counter */ - - float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */ - float32x2_t sumV2; - float32x4_t inV; - float32x4_t avg; - - arm_mean_f32(pSrc,blockSize,&mean); - avg = vdupq_n_f32(mean); - - blkCnt = blockSize >> 2U; - - /* Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ - /* Compute Power and then store the result in a temporary variable, sum. */ - inV = vld1q_f32(pSrc); - inV = vsubq_f32(inV, avg); - sumV = vmlaq_f32(sumV, inV, inV); - pSrc += 4; - - /* Decrement the loop counter */ - blkCnt--; - } - - sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV)); - sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1); - - /* If the blockSize is not a multiple of 4, compute any remaining output samples here. - ** No loop unrolling is used. */ - blkCnt = blockSize % 0x4U; - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ - /* compute power and then store the result in a temporary variable, sum. */ - in = *pSrc++; - in = in - mean; - sum += in * in; - - /* Decrement the loop counter */ - blkCnt--; - } - - /* Variance */ - *pResult = sum / (float32_t)(blockSize - 1.0f); - -} - -#else -void arm_var_f32( - const float32_t * pSrc, - uint32_t blockSize, - float32_t * pResult) -{ - uint32_t blkCnt; /* Loop counter */ - float32_t sum = 0.0f; /* Temporary result storage */ - float32_t fSum = 0.0f; - float32_t fMean, fValue; - const float32_t * pInput = pSrc; - - if (blockSize <= 1U) - { - *pResult = 0; - return; - } - -#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - - while (blkCnt > 0U) - { - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ - - sum += *pInput++; - sum += *pInput++; - sum += *pInput++; - sum += *pInput++; - - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = blockSize % 0x4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ - - sum += *pInput++; - - /* Decrement loop counter */ - blkCnt--; - } - - /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ - fMean = sum / (float32_t) blockSize; - - pInput = pSrc; - -#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - - while (blkCnt > 0U) - { - fValue = *pInput++ - fMean; - fSum += fValue * fValue; - - fValue = *pInput++ - fMean; - fSum += fValue * fValue; - - fValue = *pInput++ - fMean; - fSum += fValue * fValue; - - fValue = *pInput++ - fMean; - fSum += fValue * fValue; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = blockSize % 0x4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - fValue = *pInput++ - fMean; - fSum += fValue * fValue; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Variance */ - *pResult = fSum / (float32_t)(blockSize - 1.0f); -} -#endif /* #if defined(ARM_MATH_NEON) */ -#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ - -/** - @} end of variance group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_var_f32.c
+ * Description: Variance of the elements of a floating-point vector
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @defgroup variance Variance
+
+ Calculates the variance of the elements in the input vector.
+ The underlying algorithm used is the direct method sometimes referred to as the two-pass method:
+
+ <pre>
+ Result = sum(element - meanOfElements)^2) / numElement - 1
+
+ meanOfElements = ( pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] ) / blockSize
+ </pre>
+
+ There are separate functions for floating point, Q31, and Q15 data types.
+ */
+
+/**
+ @addtogroup variance
+ @{
+ */
+
+/**
+ @brief Variance of the elements of a floating-point vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult variance value returned here
+ @return none
+ */
+#if defined(ARM_MATH_NEON_EXPERIMENTAL)
+void arm_var_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t mean;
+
+ float32_t sum = 0.0f; /* accumulator */
+ float32_t in; /* Temporary variable to store input value */
+ uint32_t blkCnt; /* loop counter */
+
+ float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */
+ float32x2_t sumV2;
+ float32x4_t inV;
+ float32x4_t avg;
+
+ arm_mean_f32(pSrc,blockSize,&mean);
+ avg = vdupq_n_f32(mean);
+
+ blkCnt = blockSize >> 2U;
+
+ /* Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ inV = vld1q_f32(pSrc);
+ inV = vsubq_f32(inV, avg);
+ sumV = vmlaq_f32(sumV, inV, inV);
+ pSrc += 4;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
+ sum = sumV2[0] + sumV2[1];
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* compute power and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ in = in - mean;
+ sum += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Variance */
+ *pResult = sum / (float32_t)(blockSize - 1.0f);
+
+}
+
+#else
+void arm_var_f32(
+ const float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ float32_t sum = 0.0f; /* Temporary result storage */
+ float32_t fSum = 0.0f;
+ float32_t fMean, fValue;
+ const float32_t * pInput = pSrc;
+
+ if (blockSize <= 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+
+ sum += *pInput++;
+ sum += *pInput++;
+ sum += *pInput++;
+ sum += *pInput++;
+
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+
+ sum += *pInput++;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ fMean = sum / (float32_t) blockSize;
+
+ pInput = pSrc;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ fValue = *pInput++ - fMean;
+ fSum += fValue * fValue;
+
+ fValue = *pInput++ - fMean;
+ fSum += fValue * fValue;
+
+ fValue = *pInput++ - fMean;
+ fSum += fValue * fValue;
+
+ fValue = *pInput++ - fMean;
+ fSum += fValue * fValue;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ fValue = *pInput++ - fMean;
+ fSum += fValue * fValue;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Variance */
+ *pResult = fSum / (float32_t)(blockSize - 1.0f);
+}
+#endif /* #if defined(ARM_MATH_NEON) */
+
+/**
+ @} end of variance group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q15.c index f020c88..4218522 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q15.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q15.c @@ -1,230 +1,164 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_var_q15.c - * Description: Variance of an array of Q15 type - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @addtogroup variance - @{ - */ - -/** - @brief Variance of the elements of a Q15 vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult variance value returned here - @return none - - @par Scaling and Overflow Behavior - The function is implemented using a 64-bit internal accumulator. - The input is represented in 1.15 format. - Intermediate multiplication yields a 2.30 format, and this - result is added without saturation to a 64-bit accumulator in 34.30 format. - With 33 guard bits in the accumulator, there is no risk of overflow, and the - full precision of the intermediate multiplication is preserved. - Finally, the 34.30 result is truncated to 34.15 format by discarding the lower - 15 bits, and then saturated to yield a result in 1.15 format. - */ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) -void arm_var_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult) -{ - uint32_t blkCnt; /* loop counters */ - q15x8_t vecSrc; - q63_t sumOfSquares = 0LL; - q63_t meanOfSquares, squareOfMean; /* square of mean and mean of square */ - q63_t sum = 0LL; - q15_t in; - - if (blockSize <= 1U) { - *pResult = 0; - return; - } - - - blkCnt = blockSize >> 3; - while (blkCnt > 0U) - { - vecSrc = vldrhq_s16(pSrc); - /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ - /* Compute Sum of squares of the input samples - * and then store the result in a temporary variable, sumOfSquares. */ - - sumOfSquares = vmlaldavaq_s16(sumOfSquares, vecSrc, vecSrc); - sum = vaddvaq_s16(sum, vecSrc); - - blkCnt --; - pSrc += 8; - } - - /* Tail */ - blkCnt = blockSize & 7; - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - /* C = A[0] + A[1] + ... + A[blockSize-1] */ - - in = *pSrc++; - /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ -#if defined (ARM_MATH_DSP) - sumOfSquares = __SMLALD(in, in, sumOfSquares); -#else - sumOfSquares += (in * in); -#endif /* #if defined (ARM_MATH_DSP) */ - /* Compute sum and store result in a temporary variable, sum. */ - sum += in; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Compute Mean of squares of the input samples - * and then store the result in a temporary variable, meanOfSquares. */ - meanOfSquares = arm_div_q63_to_q31(sumOfSquares, (blockSize - 1U)); - - /* Compute square of mean */ - squareOfMean = arm_div_q63_to_q31((q63_t)sum * sum, (q31_t)(blockSize * (blockSize - 1U))); - - /* mean of the squares minus the square of the mean. */ - *pResult = (meanOfSquares - squareOfMean) >> 15; -} -#else -void arm_var_q15( - const q15_t * pSrc, - uint32_t blockSize, - q15_t * pResult) -{ - uint32_t blkCnt; /* Loop counter */ - q31_t sum = 0; /* Accumulator */ - q31_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */ - q63_t sumOfSquares = 0; /* Sum of squares */ - q15_t in; /* Temporary variable to store input value */ - -#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP) - q31_t in32; /* Temporary variable to store input value */ -#endif - - if (blockSize <= 1U) - { - *pResult = 0; - return; - } - -#if defined (ARM_MATH_LOOPUNROLL) - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - /* C = A[0] + A[1] + ... + A[blockSize-1] */ - - /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ - /* Compute sum and store result in a temporary variable, sum. */ -#if defined (ARM_MATH_DSP) - in32 = read_q15x2_ia (&pSrc); - sumOfSquares = __SMLALD(in32, in32, sumOfSquares); - sum += ((in32 << 16U) >> 16U); - sum += (in32 >> 16U); - - in32 = read_q15x2_ia (&pSrc); - sumOfSquares = __SMLALD(in32, in32, sumOfSquares); - sum += ((in32 << 16U) >> 16U); - sum += (in32 >> 16U); -#else - in = *pSrc++; - sumOfSquares += (in * in); - sum += in; - - in = *pSrc++; - sumOfSquares += (in * in); - sum += in; - - in = *pSrc++; - sumOfSquares += (in * in); - sum += in; - - in = *pSrc++; - sumOfSquares += (in * in); - sum += in; -#endif /* #if defined (ARM_MATH_DSP) */ - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = blockSize % 0x4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - /* C = A[0] + A[1] + ... + A[blockSize-1] */ - - in = *pSrc++; - /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ -#if defined (ARM_MATH_DSP) - sumOfSquares = __SMLALD(in, in, sumOfSquares); -#else - sumOfSquares += (in * in); -#endif /* #if defined (ARM_MATH_DSP) */ - /* Compute sum and store result in a temporary variable, sum. */ - sum += in; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */ - meanOfSquares = (q31_t) (sumOfSquares / (q63_t)(blockSize - 1U)); - - /* Compute square of mean */ - squareOfMean = (q31_t) ((q63_t) sum * sum / (q63_t)(blockSize * (blockSize - 1U))); - - /* mean of squares minus the square of mean. */ - *pResult = (meanOfSquares - squareOfMean) >> 15U; -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @} end of variance group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_var_q15.c
+ * Description: Variance of an array of Q15 type
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup variance
+ @{
+ */
+
+/**
+ @brief Variance of the elements of a Q15 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult variance value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using a 64-bit internal accumulator.
+ The input is represented in 1.15 format.
+ Intermediate multiplication yields a 2.30 format, and this
+ result is added without saturation to a 64-bit accumulator in 34.30 format.
+ With 33 guard bits in the accumulator, there is no risk of overflow, and the
+ full precision of the intermediate multiplication is preserved.
+ Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
+ 15 bits, and then saturated to yield a result in 1.15 format.
+ */
+
+void arm_var_q15(
+ const q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q31_t sum = 0; /* Accumulator */
+ q31_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
+ q63_t sumOfSquares = 0; /* Sum of squares */
+ q15_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
+ q31_t in32; /* Temporary variable to store input value */
+#endif
+
+ if (blockSize <= 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ /* Compute sum and store result in a temporary variable, sum. */
+#if defined (ARM_MATH_DSP)
+ in32 = read_q15x2_ia ((q15_t **) &pSrc);
+ sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
+ sum += ((in32 << 16U) >> 16U);
+ sum += (in32 >> 16U);
+
+ in32 = read_q15x2_ia ((q15_t **) &pSrc);
+ sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
+ sum += ((in32 << 16U) >> 16U);
+ sum += (in32 >> 16U);
+#else
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+ sum += in;
+
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+ sum += in;
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ in = *pSrc++;
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+#if defined (ARM_MATH_DSP)
+ sumOfSquares = __SMLALD(in, in, sumOfSquares);
+#else
+ sumOfSquares += (in * in);
+#endif /* #if defined (ARM_MATH_DSP) */
+ /* Compute sum and store result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
+ meanOfSquares = (q31_t) (sumOfSquares / (q63_t)(blockSize - 1U));
+
+ /* Compute square of mean */
+ squareOfMean = (q31_t) ((q63_t) sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
+
+ /* mean of squares minus the square of mean. */
+ *pResult = (meanOfSquares - squareOfMean) >> 15U;
+}
+
+/**
+ @} end of variance group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q31.c index 45b6b66..6b552db 100644 --- a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q31.c +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q31.c @@ -1,214 +1,147 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_var_q31.c - * Description: Variance of an array of Q31 type - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/statistics_functions.h" - -/** - @ingroup groupStats - */ - -/** - @addtogroup variance - @{ - */ - -/** - @brief Variance of the elements of a Q31 vector. - @param[in] pSrc points to the input vector - @param[in] blockSize number of samples in input vector - @param[out] pResult variance value returned here - @return none - - @par Scaling and Overflow Behavior - The function is implemented using an internal 64-bit accumulator. - The input is represented in 1.31 format, which is then downshifted by 8 bits - which yields 1.23, and intermediate multiplication yields a 2.46 format. - The accumulator maintains full precision of the intermediate multiplication results, - and as a consequence has only 16 guard bits. - There is no saturation on intermediate additions. - If the accumulator overflows it wraps around and distorts the result. - In order to avoid overflows completely the input signal must be scaled down by - log2(blockSize)-8 bits, as a total of blockSize additions are performed internally. - After division, internal variables should be Q18.46 - Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value. - */ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) -void arm_var_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult) -{ - uint32_t blkCnt; /* loop counters */ - q31x4_t vecSrc; - q63_t sumOfSquares = 0LL; - q63_t meanOfSquares, squareOfMean; /* square of mean and mean of square */ - q63_t sum = 0LL; - q31_t in; - - if (blockSize <= 1U) { - *pResult = 0; - return; - } - - - /* Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - while (blkCnt > 0U) - { - vecSrc = vldrwq_s32(pSrc); - /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ - /* Compute Sum of squares of the input samples - * and then store the result in a temporary variable, sumOfSquares. */ - - /* downscale */ - vecSrc = vshrq(vecSrc, 8); - sumOfSquares = vmlaldavaq(sumOfSquares, vecSrc, vecSrc); - sum = vaddlvaq(sum, vecSrc); - - blkCnt --; - pSrc += 4; - } - - - /* - * tail - */ - blkCnt = blockSize & 0x3; - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - /* C = A[0] + A[1] + ... + A[blockSize-1] */ - - in = *pSrc++ >> 8U; - /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ - sumOfSquares += ((q63_t) (in) * (in)); - /* Compute sum and store result in a temporary variable, sum. */ - sum += in; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Compute Mean of squares of the input samples - * and then store the result in a temporary variable, meanOfSquares. */ - meanOfSquares = sumOfSquares / (q63_t) (blockSize - 1U); - - /* Compute square of mean */ - squareOfMean = sum * sum / (q63_t) (blockSize * (blockSize - 1U)); - - /* Compute standard deviation and then store the result to the destination */ - *pResult = asrl(meanOfSquares - squareOfMean, 15U); -} -#else -void arm_var_q31( - const q31_t * pSrc, - uint32_t blockSize, - q31_t * pResult) -{ - uint32_t blkCnt; /* Loop counter */ - q63_t sum = 0; /* Temporary result storage */ - q63_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */ - q63_t sumOfSquares = 0; /* Sum of squares */ - q31_t in; /* Temporary variable to store input value */ - - if (blockSize <= 1U) - { - *pResult = 0; - return; - } - -#if defined (ARM_MATH_LOOPUNROLL) - - /* Loop unrolling: Compute 4 outputs at a time */ - blkCnt = blockSize >> 2U; - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - /* C = A[0] + A[1] + ... + A[blockSize-1] */ - - in = *pSrc++ >> 8U; - /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ - sumOfSquares += ((q63_t) (in) * (in)); - /* Compute sum and store result in a temporary variable, sum. */ - sum += in; - - in = *pSrc++ >> 8U; - sumOfSquares += ((q63_t) (in) * (in)); - sum += in; - - in = *pSrc++ >> 8U; - sumOfSquares += ((q63_t) (in) * (in)); - sum += in; - - in = *pSrc++ >> 8U; - sumOfSquares += ((q63_t) (in) * (in)); - sum += in; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Loop unrolling: Compute remaining outputs */ - blkCnt = blockSize % 0x4U; - -#else - - /* Initialize blkCnt with number of samples */ - blkCnt = blockSize; - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - - while (blkCnt > 0U) - { - /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ - /* C = A[0] + A[1] + ... + A[blockSize-1] */ - - in = *pSrc++ >> 8U; - /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ - sumOfSquares += ((q63_t) (in) * (in)); - /* Compute sum and store result in a temporary variable, sum. */ - sum += in; - - /* Decrement loop counter */ - blkCnt--; - } - - /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */ - meanOfSquares = (sumOfSquares / (q63_t)(blockSize - 1U)); - - /* Compute square of mean */ - squareOfMean = ( sum * sum / (q63_t)(blockSize * (blockSize - 1U))); - - /* Compute variance and store result in destination */ - *pResult = (meanOfSquares - squareOfMean) >> 15U; -} -#endif -/** - @} end of variance group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_var_q31.c
+ * Description: Variance of an array of Q31 type
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupStats
+ */
+
+/**
+ @addtogroup variance
+ @{
+ */
+
+/**
+ @brief Variance of the elements of a Q31 vector.
+ @param[in] pSrc points to the input vector
+ @param[in] blockSize number of samples in input vector
+ @param[out] pResult variance value returned here
+ @return none
+
+ @par Scaling and Overflow Behavior
+ The function is implemented using an internal 64-bit accumulator.
+ The input is represented in 1.31 format, which is then downshifted by 8 bits
+ which yields 1.23, and intermediate multiplication yields a 2.46 format.
+ The accumulator maintains full precision of the intermediate multiplication results,
+ but provides only a 16 guard bits.
+ There is no saturation on intermediate additions.
+ If the accumulator overflows it wraps around and distorts the result.
+ In order to avoid overflows completely the input signal must be scaled down by
+ log2(blockSize)-8 bits, as a total of blockSize additions are performed internally.
+ After division, internal variables should be Q18.46
+ Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value.
+ */
+
+void arm_var_q31(
+ const q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ uint32_t blkCnt; /* Loop counter */
+ q63_t sum = 0; /* Temporary result storage */
+ q63_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
+ q63_t sumOfSquares = 0; /* Sum of squares */
+ q31_t in; /* Temporary variable to store input value */
+
+ if (blockSize <= 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Loop unrolling: Compute 4 outputs at a time */
+ blkCnt = blockSize >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ in = *pSrc++ >> 8U;
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ sumOfSquares += ((q63_t) (in) * (in));
+ /* Compute sum and store result in a temporary variable, sum. */
+ sum += in;
+
+ in = *pSrc++ >> 8U;
+ sumOfSquares += ((q63_t) (in) * (in));
+ sum += in;
+
+ in = *pSrc++ >> 8U;
+ sumOfSquares += ((q63_t) (in) * (in));
+ sum += in;
+
+ in = *pSrc++ >> 8U;
+ sumOfSquares += ((q63_t) (in) * (in));
+ sum += in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Loop unrolling: Compute remaining outputs */
+ blkCnt = blockSize % 0x4U;
+
+#else
+
+ /* Initialize blkCnt with number of samples */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* C = A[0] + A[1] + ... + A[blockSize-1] */
+
+ in = *pSrc++ >> 8U;
+ /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
+ sumOfSquares += ((q63_t) (in) * (in));
+ /* Compute sum and store result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
+ meanOfSquares = (sumOfSquares / (q63_t)(blockSize - 1U));
+
+ /* Compute square of mean */
+ squareOfMean = ( sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
+
+ /* Compute variance and store result in destination */
+ *pResult = (meanOfSquares - squareOfMean) >> 15U;
+}
+
+/**
+ @} end of variance group
+ */
|