diff options
Diffstat (limited to 'Drivers/CMSIS/DSP/Source/StatisticsFunctions')
93 files changed, 16455 insertions, 0 deletions
diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/CMakeLists.txt b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/CMakeLists.txt new file mode 100644 index 0000000..480985f --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/CMakeLists.txt @@ -0,0 +1,117 @@ +cmake_minimum_required (VERSION 3.14) + +project(CMSISDSPStatistics) + +include(configLib) +include(configDsp) + + + +add_library(CMSISDSPStatistics STATIC) + +target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f64.c) +target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f64.c) +target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_dot_prod_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_max_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_max_f64.c) +target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f64.c) +target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_f64.c) +target_sources(CMSISDSPStatistics PRIVATE arm_max_q15.c) +target_sources(CMSISDSPStatistics PRIVATE arm_max_q31.c) +target_sources(CMSISDSPStatistics PRIVATE arm_max_q7.c) +target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_q31.c) +target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_q15.c) +target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_q7.c) +target_sources(CMSISDSPStatistics PRIVATE arm_mean_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_mean_f64.c) +target_sources(CMSISDSPStatistics PRIVATE arm_mean_q15.c) +target_sources(CMSISDSPStatistics PRIVATE arm_mean_q31.c) +target_sources(CMSISDSPStatistics PRIVATE arm_mean_q7.c) +target_sources(CMSISDSPStatistics PRIVATE arm_min_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_min_f64.c) +target_sources(CMSISDSPStatistics PRIVATE arm_min_q15.c) +target_sources(CMSISDSPStatistics PRIVATE arm_min_q31.c) +target_sources(CMSISDSPStatistics PRIVATE arm_min_q7.c) +target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_q31.c) +target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_q15.c) +target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_q7.c) +target_sources(CMSISDSPStatistics PRIVATE arm_power_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_power_f64.c) +target_sources(CMSISDSPStatistics PRIVATE arm_power_q15.c) +target_sources(CMSISDSPStatistics PRIVATE arm_power_q31.c) +target_sources(CMSISDSPStatistics PRIVATE arm_power_q7.c) +target_sources(CMSISDSPStatistics PRIVATE arm_rms_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_rms_q15.c) +target_sources(CMSISDSPStatistics PRIVATE arm_rms_q31.c) +target_sources(CMSISDSPStatistics PRIVATE arm_std_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_std_f64.c) +target_sources(CMSISDSPStatistics PRIVATE arm_std_q15.c) +target_sources(CMSISDSPStatistics PRIVATE arm_std_q31.c) +target_sources(CMSISDSPStatistics PRIVATE arm_var_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_var_f64.c) +target_sources(CMSISDSPStatistics PRIVATE arm_var_q15.c) +target_sources(CMSISDSPStatistics PRIVATE arm_var_q31.c) + +target_sources(CMSISDSPStatistics PRIVATE arm_absmax_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmax_f64.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmax_q15.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmax_q31.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmax_q7.c) + +target_sources(CMSISDSPStatistics PRIVATE arm_absmin_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmin_f64.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmin_q15.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmin_q31.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmin_q7.c) + +target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_f64.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_q15.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_q31.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_q7.c) + +target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_f64.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q15.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q31.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_q7.c) +target_sources(CMSISDSPStatistics PRIVATE arm_mse_q7.c) +target_sources(CMSISDSPStatistics PRIVATE arm_mse_q15.c) +target_sources(CMSISDSPStatistics PRIVATE arm_mse_q31.c) +target_sources(CMSISDSPStatistics PRIVATE arm_mse_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_mse_f32.c) +target_sources(CMSISDSPStatistics PRIVATE arm_mse_f64.c) + +configLib(CMSISDSPStatistics ${ROOT}) +configDsp(CMSISDSPStatistics ${ROOT}) + +### Includes +target_include_directories(CMSISDSPStatistics PUBLIC "${DSP}/Include") + + + +if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16)) +target_sources(CMSISDSPStatistics PRIVATE arm_max_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_min_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_mean_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_power_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_rms_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_std_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_var_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_dot_prod_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_min_no_idx_f16.c) + +target_sources(CMSISDSPStatistics PRIVATE arm_absmax_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmin_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmax_no_idx_f16.c) +target_sources(CMSISDSPStatistics PRIVATE arm_absmin_no_idx_f16.c) +endif()
\ No newline at end of file diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c new file mode 100644 index 0000000..5446992 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c @@ -0,0 +1,100 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: StatisticsFunctions.c + * Description: Combination of all statistics function source files. + * + * $Date: 16. March 2020 + * $Revision: V1.1.0 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_entropy_f32.c" +#include "arm_entropy_f64.c" +#include "arm_kullback_leibler_f32.c" +#include "arm_kullback_leibler_f64.c" +#include "arm_logsumexp_dot_prod_f32.c" +#include "arm_logsumexp_f32.c" +#include "arm_max_f32.c" +#include "arm_max_f64.c" +#include "arm_max_q15.c" +#include "arm_max_q31.c" +#include "arm_max_q7.c" +#include "arm_max_no_idx_f32.c" +#include "arm_max_no_idx_f64.c" +#include "arm_max_no_idx_q31.c" +#include "arm_max_no_idx_q15.c" +#include "arm_max_no_idx_q7.c" +#include "arm_mean_f32.c" +#include "arm_mean_f64.c" +#include "arm_mean_q15.c" +#include "arm_mean_q31.c" +#include "arm_mean_q7.c" +#include "arm_min_f32.c" +#include "arm_min_f64.c" +#include "arm_min_q15.c" +#include "arm_min_q31.c" +#include "arm_min_q7.c" +#include "arm_min_no_idx_f32.c" +#include "arm_min_no_idx_f64.c" +#include "arm_min_no_idx_q31.c" +#include "arm_min_no_idx_q15.c" +#include "arm_min_no_idx_q7.c" +#include "arm_power_f32.c" +#include "arm_power_f64.c" +#include "arm_power_q15.c" +#include "arm_power_q31.c" +#include "arm_power_q7.c" +#include "arm_rms_f32.c" +#include "arm_rms_q15.c" +#include "arm_rms_q31.c" +#include "arm_std_f32.c" +#include "arm_std_f64.c" +#include "arm_std_q15.c" +#include "arm_std_q31.c" +#include "arm_var_f32.c" +#include "arm_var_f64.c" +#include "arm_var_q15.c" +#include "arm_var_q31.c" +#include "arm_absmax_f32.c" +#include "arm_absmax_f64.c" +#include "arm_absmax_q15.c" +#include "arm_absmax_q31.c" +#include "arm_absmax_q7.c" +#include "arm_absmin_f32.c" +#include "arm_absmin_f64.c" +#include "arm_absmin_q15.c" +#include "arm_absmin_q31.c" +#include "arm_absmin_q7.c" +#include "arm_absmax_no_idx_f32.c" +#include "arm_absmax_no_idx_f64.c" +#include "arm_absmax_no_idx_q15.c" +#include "arm_absmax_no_idx_q31.c" +#include "arm_absmax_no_idx_q7.c" +#include "arm_absmin_no_idx_f32.c" +#include "arm_absmin_no_idx_f64.c" +#include "arm_absmin_no_idx_q15.c" +#include "arm_absmin_no_idx_q31.c" +#include "arm_absmin_no_idx_q7.c" +#include "arm_mse_q7.c" +#include "arm_mse_q15.c" +#include "arm_mse_q31.c" +#include "arm_mse_f32.c" +#include "arm_mse_f64.c" diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctionsF16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctionsF16.c new file mode 100644 index 0000000..510746e --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctionsF16.c @@ -0,0 +1,46 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: StatisticsFunctions.c + * Description: Combination of all statistics function source files. + * + * $Date: 16. March 2020 + * $Revision: V1.1.0 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_max_f16.c" +#include "arm_min_f16.c" +#include "arm_mean_f16.c" +#include "arm_power_f16.c" +#include "arm_rms_f16.c" +#include "arm_std_f16.c" +#include "arm_var_f16.c" +#include "arm_entropy_f16.c" +#include "arm_kullback_leibler_f16.c" +#include "arm_logsumexp_dot_prod_f16.c" +#include "arm_logsumexp_f16.c" +#include "arm_max_no_idx_f16.c" +#include "arm_min_no_idx_f16.c" +#include "arm_absmax_f16.c" +#include "arm_absmin_f16.c" +#include "arm_absmax_no_idx_f16.c" +#include "arm_absmin_no_idx_f16.c" +#include "arm_mse_f16.c" diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f16.c new file mode 100644 index 0000000..4241eb5 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f16.c @@ -0,0 +1,274 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmax_f16.c + * Description: Maximum value of a absolute values of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include <limits.h> +#endif + +/** + @ingroup groupStats + */ + + +/** + @addtogroup AbsMax + @{ + */ + +/** + @brief Maximum value of absolute values of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @param[out] pIndex index of maximum value returned here + @return none + */ + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +void arm_absmax_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult, + uint32_t * pIndex) +{ + uint16_t blkCnt; /* loop counters */ + f16x8_t vecSrc; + float16_t const *pSrcVec; + f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMIN); + float16_t maxValue = F16_ABSMIN; + uint16_t idx = blockSize; + uint16x8_t indexVec; + uint16x8_t curExtremIdxVec; + mve_pred16_t p0; + + + indexVec = vidupq_u16((uint32_t)0, 1); + curExtremIdxVec = vdupq_n_u16(0); + + pSrcVec = (float16_t const *) pSrc; + blkCnt = blockSize >> 3; + while (blkCnt > 0U) + { + vecSrc = vldrhq_f16(pSrcVec); + pSrcVec += 8; + vecSrc = vabsq(vecSrc); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + p0 = vcmpgeq(vecSrc, curExtremValVec); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + + indexVec = indexVec + 8; + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 7; + if (blkCnt > 0U) + { + vecSrc = vldrhq_f16(pSrcVec); + pSrcVec += 8; + vecSrc = vabsq(vecSrc); + + p0 = vctp16q(blkCnt); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + p0 = vcmpgeq_m(vecSrc, curExtremValVec, p0); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + } + /* + * Get max value across the vector + */ + maxValue = vmaxnmvq(maxValue, curExtremValVec); + /* + * set index for lower values to max possible index + */ + p0 = vcmpgeq(curExtremValVec, maxValue); + indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0); + /* + * Get min index which is thus for a max value + */ + idx = vminvq(idx, indexVec); + /* + * Save result + */ + *pIndex = idx; + *pResult = maxValue; +} +#else +#if defined(ARM_MATH_LOOPUNROLL) +void arm_absmax_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult, + uint32_t * pIndex) +{ + float16_t cur_absmax, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out; \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmax to next consecutive values one by one */ \ + cur_absmax = *pSrc++; \ + cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \ + /* compare for the extrema value */ \ + if ((_Float16)cur_absmax > (_Float16)out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmax; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \ + if ((_Float16)cur_absmax > (_Float16)out) \ + { \ + out = cur_absmax; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \ + if ((_Float16)cur_absmax > (_Float16)out) \ + { \ + out = cur_absmax; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \ + if ((_Float16)cur_absmax > (_Float16)out) \ + { \ + out = cur_absmax; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmax = *pSrc++; \ + cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \ + if ((_Float16)cur_absmax > (_Float16)out) \ + { \ + out = cur_absmax; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else +void arm_absmax_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult, + uint32_t * pIndex) +{ + float16_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + + /* Initialise index value to zero. */ + outIndex = 0U; + + /* Load first input value that act as reference value for comparision */ + out = (_Float16)fabsf((float32_t)*pSrc++); + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = (_Float16)fabsf((float32_t)*pSrc++); + + /* compare for the maximum value */ + if ((_Float16)out < (_Float16)maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* defined(ARM_MATH_LOOPUNROLL) */ +#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMax group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f32.c new file mode 100644 index 0000000..a571ae0 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f32.c @@ -0,0 +1,260 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmax_f32.c + * Description: Maximum value of absolute values of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include <limits.h> +#endif + +/** + @ingroup groupStats + */ + +/** + @defgroup AbsMax Absolute Maximum + + Computes the maximum value of absolute values of an array of data. + The function returns both the maximum value and its position within the array. + There are separate functions for floating-point, Q31, Q15, and Q7 data types. + */ + +/** + @addtogroup AbsMax + @{ + */ + +/** + @brief Maximum value of absolute values of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @param[out] pIndex index of maximum value returned here + @return none + */ +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_absmax_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ + int32_t blkSize = blockSize; + f32x4_t vecSrc; + f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMIN); + float32_t maxValue = F32_ABSMIN; + uint32_t idx = blockSize; + uint32x4_t indexVec; + uint32x4_t curExtremIdxVec; + uint32_t curIdx = 0; + mve_pred16_t p0; + + + indexVec = vidupq_wb_u32(&curIdx, 1); + curExtremIdxVec = vdupq_n_u32(0); + + do { + mve_pred16_t p = vctp32q(blkSize); + + vecSrc = vldrwq_z_f32((float32_t const *) pSrc, p); + vecSrc = vabsq_m(vuninitializedq_f32(), vecSrc, p); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + p0 = vcmpgeq_m(vecSrc, curExtremValVec, p); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + + /* Does TP detection works here ?? */ + indexVec = vidupq_wb_u32(&curIdx, 1); + + blkSize -= 4; + pSrc += 4; + } + while (blkSize > 0); + + /* + * Get max value across the vector + */ + maxValue = vmaxnmvq(maxValue, curExtremValVec); + /* + * set index for lower values to max possible index + */ + p0 = vcmpgeq(curExtremValVec, maxValue); + indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0); + /* + * Get min index which is thus for a max value + */ + idx = vminvq(idx, indexVec); + /* + * Save result + */ + *pIndex = idx; + *pResult = maxValue; +} + + +#else +#if defined(ARM_MATH_LOOPUNROLL) +void arm_absmax_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ + float32_t cur_absmax, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0.0f) ? out : -out; \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmax to next consecutive values one by one */ \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \ + /* compare for the extrema value */ \ + if (cur_absmax > out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmax; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else +void arm_absmax_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ + float32_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + + + + /* Initialise index value to zero. */ + outIndex = 0U; + + /* Load first input value that act as reference value for comparision */ + out = fabsf(*pSrc++); + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = fabsf(*pSrc++); + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* defined(ARM_MATH_LOOPUNROLL) */ +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMax group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f64.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f64.c new file mode 100644 index 0000000..987c32c --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_f64.c @@ -0,0 +1,92 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmax_f64.c + * Description: Maximum value of absolute values of a floating-point vector + * + * $Date: 13 September 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup AbsMax + @{ + */ + +/** + @brief Maximum value of absolute values of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @param[out] pIndex index of maximum value returned here + @return none + */ +void arm_absmax_f64( + const float64_t * pSrc, + uint32_t blockSize, + float64_t * pResult, + uint32_t * pIndex) +{ + float64_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + + + + /* Initialise index value to zero. */ + outIndex = 0U; + + /* Load first input value that act as reference value for comparision */ + out = fabs(*pSrc++); + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = fabs(*pSrc++); + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + @} end of AbsMax group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f16.c new file mode 100644 index 0000000..82820c6 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f16.c @@ -0,0 +1,228 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmax_no_idx_f16.c + * Description: Maximum value of a absolute values of a floating-point vector + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include <limits.h> +#endif + +/** + @ingroup groupStats + */ + + +/** + @addtogroup AbsMax + @{ + */ + +/** + @brief Maximum value of absolute values of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @return none + */ + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +void arm_absmax_no_idx_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult) +{ + uint16_t blkCnt; /* loop counters */ + f16x8_t vecSrc; + float16_t const *pSrcVec; + f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMIN); + float16_t maxValue = F16_ABSMIN; + mve_pred16_t p0; + + + pSrcVec = (float16_t const *) pSrc; + blkCnt = blockSize >> 3; + while (blkCnt > 0) + { + vecSrc = vldrhq_f16(pSrcVec); + pSrcVec += 8; + /* + * update per-lane max. + */ + curExtremValVec = vmaxnmaq(vecSrc, curExtremValVec); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 7; + if (blkCnt > 0U) + { + vecSrc = vldrhq_f16(pSrcVec); + pSrcVec += 8; + p0 = vctp16q(blkCnt); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + curExtremValVec = vmaxnmaq_m(curExtremValVec, vecSrc, p0); + } + /* + * Get max value across the vector + */ + maxValue = vmaxnmavq(maxValue, curExtremValVec); + *pResult = maxValue; +} +#else +#if defined(ARM_MATH_LOOPUNROLL) +void arm_absmax_no_idx_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult) +{ + float16_t cur_absmax, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt; /* Loop counter */ \ + \ + \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out; \ + \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmax to next consecutive values one by one */ \ + cur_absmax = *pSrc++; \ + cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \ + /* compare for the extrema value */ \ + if ((_Float16)cur_absmax > (_Float16)out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmax; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \ + if ((_Float16)cur_absmax > (_Float16)out) \ + { \ + out = cur_absmax; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \ + if ((_Float16)cur_absmax > (_Float16)out) \ + { \ + out = cur_absmax; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \ + if ((_Float16)cur_absmax > (_Float16)out) \ + { \ + out = cur_absmax; \ + } \ + \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmax = *pSrc++; \ + cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \ + if ((_Float16)cur_absmax > (_Float16)out) \ + { \ + out = cur_absmax; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ +} +#else +void arm_absmax_no_idx_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult) +{ + float16_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* Loop counter */ + + + + /* Load first input value that act as reference value for comparision */ + out = (_Float16)fabsf((float32_t)*pSrc++); + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = (_Float16)fabsf((float32_t)*pSrc++); + + /* compare for the maximum value */ + if ((_Float16)out < (_Float16)maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; +} +#endif /* defined(ARM_MATH_LOOPUNROLL) */ +#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMax group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f32.c new file mode 100644 index 0000000..13d99ba --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f32.c @@ -0,0 +1,225 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmax_no_idx_f32.c + * Description: Maximum value of absolute values of a floating-point vector + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include <limits.h> +#endif + +/** + @ingroup groupStats + */ + + +/** + @addtogroup AbsMax + @{ + */ + +/** + @brief Maximum value of absolute values of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @return none + */ +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_absmax_no_idx_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + int32_t blkCnt; /* loop counters */ + f32x4_t vecSrc; + float32_t const *pSrcVec; + f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMIN); + float32_t maxValue = F32_ABSMIN; + mve_pred16_t p0; + + + pSrcVec = (float32_t const *) pSrc; + blkCnt = blockSize >> 2; + while (blkCnt > 0) + { + vecSrc = vldrwq_f32(pSrcVec); + pSrcVec += 4; + /* + * update per-lane max. + */ + curExtremValVec = vmaxnmaq(vecSrc, curExtremValVec); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 3; + if (blkCnt > 0) + { + vecSrc = vldrwq_f32(pSrcVec); + pSrcVec += 4; + p0 = vctp32q(blkCnt); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + curExtremValVec = vmaxnmaq_m(curExtremValVec, vecSrc, p0); + } + /* + * Get max value across the vector + */ + maxValue = vmaxnmavq(maxValue, curExtremValVec); + *pResult = maxValue; +} + + +#else +#if defined(ARM_MATH_LOOPUNROLL) +void arm_absmax_no_idx_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t cur_absmax, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt; /* Loop counter */ \ + \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0.0f) ? out : -out; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmax to next consecutive values one by one */ \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \ + /* compare for the extrema value */ \ + if (cur_absmax > out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmax; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + } \ + \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ +} +#else +void arm_absmax_no_idx_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* Loop counter */ + + + + + + /* Load first input value that act as reference value for comparision */ + out = fabsf(*pSrc++); + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = fabsf(*pSrc++); + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; +} +#endif /* defined(ARM_MATH_LOOPUNROLL) */ +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMax group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f64.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f64.c new file mode 100644 index 0000000..23e3400 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_f64.c @@ -0,0 +1,87 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmax_no_idx_f64.c + * Description: Maximum value of absolute values of a floating-point vector + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup AbsMax + @{ + */ + +/** + @brief Maximum value of absolute values of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @return none + */ +void arm_absmax_no_idx_f64( + const float64_t * pSrc, + uint32_t blockSize, + float64_t * pResult) +{ + float64_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* Loop counter */ + + + + + + /* Load first input value that act as reference value for comparision */ + out = fabs(*pSrc++); + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = fabs(*pSrc++); + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; +} + +/** + @} end of AbsMax group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q15.c new file mode 100644 index 0000000..941f2dd --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q15.c @@ -0,0 +1,220 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmax_no_idx_q15.c + * Description: Maximum value of absolute values of a Q15 vector + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup AbsMax + @{ + */ + +/** + @brief Maximum value of absolute values of a Q15 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @return none + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_absmax_no_idx_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + uint16_t blkCnt; /* loop counters */ + q15x8_t vecSrc; + q15_t const *pSrcVec; + uint16x8_t curExtremValVec = vdupq_n_s16(Q15_ABSMIN); + q15_t maxValue = Q15_ABSMIN; + mve_pred16_t p0; + + + pSrcVec = (q15_t const *) pSrc; + blkCnt = blockSize >> 3; + while (blkCnt > 0U) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 8; + /* + * update per-lane max. + */ + curExtremValVec = vmaxaq(curExtremValVec, vecSrc); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 7; + if (blkCnt > 0U) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 8; + p0 = vctp16q(blkCnt); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + curExtremValVec = vmaxaq_m(curExtremValVec, vecSrc, p0); + } + /* + * Get max value across the vector + */ + maxValue = vmaxavq(maxValue, (q15x8_t)curExtremValVec); + *pResult = maxValue; +} + +#else +#if defined(ARM_MATH_DSP) +void arm_absmax_no_idx_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + q15_t cur_absmax, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt; /* Loop counter */ \ + \ + \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q15_t)__QSUB16(0, out); \ + \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmax to next consecutive values one by one */ \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \ + /* compare for the extrema value */ \ + if (cur_absmax > out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmax; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + } \ + \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ +} +#else +void arm_absmax_no_idx_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + q15_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* Loop counter */ + + + /* Load first input value that act as reference value for comparision */ + out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc); + pSrc++; + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc); + pSrc++; + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; +} +#endif /* defined(ARM_MATH_DSP) */ +#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMax group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q31.c new file mode 100644 index 0000000..8275a53 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q31.c @@ -0,0 +1,220 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmax_no_idx_q31.c + * Description: Maximum value of absolute values of a Q31 vector + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup AbsMax + @{ + */ + +/** + @brief Maximum value of absolute values of a Q31 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +void arm_absmax_no_idx_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + int32_t blkCnt; /* loop counters */ + q31x4_t vecSrc; + q31_t const *pSrcVec; + uint32x4_t curExtremValVec = vdupq_n_s32(Q31_ABSMIN); + q31_t maxValue = Q31_ABSMIN; + mve_pred16_t p0; + + + pSrcVec = (q31_t const *) pSrc; + blkCnt = blockSize >> 2; + while (blkCnt > 0) + { + vecSrc = vldrwq_s32(pSrcVec); + pSrcVec += 4; + /* + * update per-lane max. + */ + curExtremValVec = vmaxaq(curExtremValVec, vecSrc); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 3; + if (blkCnt > 0) + { + vecSrc = vldrwq_s32(pSrcVec); + pSrcVec += 4; + p0 = vctp32q(blkCnt); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + curExtremValVec = vmaxaq_m(curExtremValVec, vecSrc, p0); + } + /* + * Get max value across the vector + */ + maxValue = vmaxavq(maxValue, (q31x4_t)curExtremValVec); + *pResult = maxValue; +} +#else +#if defined(ARM_MATH_DSP) +void arm_absmax_no_idx_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q31_t cur_absmax, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt; /* Loop counter */ \ + \ + \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q31_t)__QSUB(0, out); \ + \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmax to next consecutive values one by one */ \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \ + /* compare for the extrema value */ \ + if (cur_absmax > out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmax; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + } \ + \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ +} +#else +void arm_absmax_no_idx_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q31_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* Loop counter */ + + + + /* Load first input value that act as reference value for comparision */ + out = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc); + pSrc++; + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc); + pSrc++; + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; +} +#endif /* defined(ARM_MATH_DSP) */ +#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMax group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q7.c new file mode 100644 index 0000000..8eaca9b --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_no_idx_q7.c @@ -0,0 +1,224 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmax_no_idx_q7.c + * Description: Maximum value of absolute values of a Q7 vector + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup AbsMax + @{ + */ + +/** + @brief Maximum value of absolute values of a Q7 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include <stdint.h> +#include "arm_helium_utils.h" + + + +void arm_absmax_no_idx_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult) +{ + int32_t blkCnt; /* loop counters */ + q7x16_t vecSrc; + q7_t const *pSrcVec; + uint8x16_t curExtremValVec = vdupq_n_s8(Q7_ABSMIN); + q7_t maxValue = Q7_ABSMIN; + mve_pred16_t p0; + + + pSrcVec = (q7_t const *) pSrc; + blkCnt = blockSize >> 4; + while (blkCnt > 0) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 16; + /* + * update per-lane max. + */ + curExtremValVec = vmaxaq(curExtremValVec, vecSrc); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 0xF; + if (blkCnt > 0) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 16; + p0 = vctp8q(blkCnt); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + curExtremValVec = vmaxaq_m(curExtremValVec, vecSrc, p0); + } + /* + * Get max value across the vector + */ + maxValue = vmaxavq(maxValue, (q7x16_t)curExtremValVec); + *pResult = maxValue; +} +#else +#if defined(ARM_MATH_DSP) +void arm_absmax_no_idx_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult) +{ + q7_t cur_absmax, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt; /* Loop counter */ \ + \ + \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \ + \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmax to next consecutive values one by one */ \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ + /* compare for the extrema value */ \ + if (cur_absmax > out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmax; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + } \ + \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ +} +#else +void arm_absmax_no_idx_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult) +{ + q7_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* Loop counter */ + + + + /* Load first input value that act as reference value for comparision */ + out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc); + pSrc++; + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc); + pSrc++; + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; +} +#endif /* defined(ARM_MATH_DSP) */ +#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMax group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q15.c new file mode 100644 index 0000000..7a7816d --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q15.c @@ -0,0 +1,236 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmax_q15.c + * Description: Maximum value of absolute values of a Q15 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup AbsMax + @{ + */ + +/** + @brief Maximum value of absolute values of a Q15 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @param[out] pIndex index of maximum value returned here + @return none + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_absmax_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex) +{ + int32_t blkCnt; /* loop counters */ + q15x8_t extremValVec = vdupq_n_s16(Q15_ABSMIN); + q15_t maxValue = Q15_ABSMIN; + uint16x8_t indexVec; + uint16x8_t extremIdxVec; + mve_pred16_t p0; + uint16_t extremIdxArr[8]; + + indexVec = vidupq_u16(0U, 1); + + blkCnt = blockSize; + do { + mve_pred16_t p = vctp16q(blkCnt); + q15x8_t extremIdxVal = vld1q_z_s16(pSrc, p); + + extremIdxVal = vqabsq(extremIdxVal); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + p0 = vcmpgeq_m(extremIdxVal, extremValVec, p); + + extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0); + /* store per-lane extrema indexes */ + vst1q_p_u16(extremIdxArr, indexVec, p0); + + indexVec += 8; + pSrc += 8; + blkCnt -= 8; + } + while (blkCnt > 0); + + + /* Get max value across the vector */ + maxValue = vmaxvq(maxValue, extremValVec); + + /* set index for lower values to max possible index */ + p0 = vcmpgeq(extremValVec, maxValue); + extremIdxVec = vld1q_u16(extremIdxArr); + + indexVec = vpselq(extremIdxVec, vdupq_n_u16(blockSize - 1), p0); + *pIndex = vminvq(blockSize - 1, indexVec); + *pResult = maxValue; +} + +#else +#if defined(ARM_MATH_DSP) +void arm_absmax_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex) +{ + q15_t cur_absmax, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q15_t)__QSUB16(0, out); \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmax to next consecutive values one by one */ \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \ + /* compare for the extrema value */ \ + if (cur_absmax > out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmax; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else +void arm_absmax_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex) +{ + q15_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + + /* Initialise index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc); + pSrc++; + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc); + pSrc++; + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* defined(ARM_MATH_DSP) */ +#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMax group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q31.c new file mode 100644 index 0000000..99b38f3 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q31.c @@ -0,0 +1,236 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmax_q31.c + * Description: Maximum value of absolute values of a Q31 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup AbsMax + @{ + */ + +/** + @brief Maximum value of absolute values of a Q31 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @param[out] pIndex index of maximum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +void arm_absmax_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex) +{ + int32_t blkCnt; /* loop counters */ + q31x4_t extremValVec = vdupq_n_s32(Q31_ABSMIN); + q31_t maxValue = Q31_ABSMIN; + uint32x4_t indexVec; + uint32x4_t extremIdxVec; + mve_pred16_t p0; + uint32_t extremIdxArr[4]; + + indexVec = vidupq_u32(0U, 1); + + blkCnt = blockSize; + do { + mve_pred16_t p = vctp32q(blkCnt); + q31x4_t extremIdxVal = vld1q_z_s32(pSrc, p); + + extremIdxVal = vqabsq(extremIdxVal); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + p0 = vcmpgeq_m(extremIdxVal, extremValVec, p); + + extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0); + /* store per-lane extrema indexes */ + vst1q_p_u32(extremIdxArr, indexVec, p0); + + indexVec += 4; + pSrc += 4; + blkCnt -= 4; + } + while (blkCnt > 0); + + + /* Get max value across the vector */ + maxValue = vmaxvq(maxValue, extremValVec); + + /* set index for lower values to max possible index */ + p0 = vcmpgeq(extremValVec, maxValue); + extremIdxVec = vld1q_u32(extremIdxArr); + + indexVec = vpselq(extremIdxVec, vdupq_n_u32(blockSize - 1), p0); + *pIndex = vminvq(blockSize - 1, indexVec); + *pResult = maxValue; +} +#else +#if defined(ARM_MATH_DSP) +void arm_absmax_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex) +{ + q31_t cur_absmax, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q31_t)__QSUB(0, out); \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmax to next consecutive values one by one */ \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \ + /* compare for the extrema value */ \ + if (cur_absmax > out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmax; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else +void arm_absmax_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex) +{ + q31_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + + + /* Initialise index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc); + pSrc++; + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc); + pSrc++; + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* defined(ARM_MATH_DSP) */ +#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMax group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q7.c new file mode 100644 index 0000000..1259667 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmax_q7.c @@ -0,0 +1,294 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmax_q7.c + * Description: Maximum value of absolute values of a Q7 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup AbsMax + @{ + */ + +/** + @brief Maximum value of absolute values of a Q7 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @param[out] pIndex index of maximum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include <stdint.h> +#include "arm_helium_utils.h" + +#define MAX_BLKSZ_S8 (UINT8_MAX+1) + +static void arm_small_blk_absmax_q7( + const q7_t * pSrc, + uint16_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ + int32_t blkCnt; /* loop counters */ + q7x16_t extremValVec = vdupq_n_s8(Q7_ABSMIN); + q7_t maxValue = Q7_ABSMIN; + uint8x16_t indexVec; + uint8x16_t extremIdxVec; + mve_pred16_t p0; + uint8_t extremIdxArr[16]; + + indexVec = vidupq_u8(0U, 1); + + blkCnt = blockSize; + do { + mve_pred16_t p = vctp8q(blkCnt); + q7x16_t extremIdxVal = vld1q_z_s8(pSrc, p); + + extremIdxVal = vqabsq(extremIdxVal); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + p0 = vcmpgeq_m(extremIdxVal, extremValVec, p); + + extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0); + /* store per-lane extrema indexes */ + vst1q_p_u8(extremIdxArr, indexVec, p0); + + indexVec += 16; + pSrc += 16; + blkCnt -= 16; + } + while (blkCnt > 0); + + + /* Get max value across the vector */ + maxValue = vmaxvq(maxValue, extremValVec); + + /* set index for lower values to max possible index */ + p0 = vcmpgeq(extremValVec, maxValue); + extremIdxVec = vld1q_u8(extremIdxArr); + + indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0); + *pIndex = vminvq_u8(blockSize - 1, indexVec); + *pResult = maxValue; +} + +void arm_absmax_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ + int32_t totalSize = blockSize; + + if (totalSize <= MAX_BLKSZ_S8) + { + arm_small_blk_absmax_q7(pSrc, blockSize, pResult, pIndex); + } + else + { + uint32_t curIdx = 0; + q7_t curBlkExtr = Q7_MIN; + uint32_t curBlkPos = 0; + uint32_t curBlkIdx = 0; + /* + * process blocks of 255 elts + */ + while (totalSize >= MAX_BLKSZ_S8) + { + const q7_t *curSrc = pSrc; + + arm_small_blk_absmax_q7(curSrc, MAX_BLKSZ_S8, pResult, pIndex); + if (*pResult > curBlkExtr) + { + /* + * update partial extrema + */ + curBlkExtr = *pResult; + curBlkPos = *pIndex; + curBlkIdx = curIdx; + } + curIdx++; + pSrc += MAX_BLKSZ_S8; + totalSize -= MAX_BLKSZ_S8; + } + /* + * remainder + */ + arm_small_blk_absmax_q7(pSrc, totalSize, pResult, pIndex); + if (*pResult > curBlkExtr) + { + curBlkExtr = *pResult; + curBlkPos = *pIndex; + curBlkIdx = curIdx; + } + *pIndex = curBlkIdx * MAX_BLKSZ_S8 + curBlkPos; + *pResult = curBlkExtr; + } +} +#else +#if defined(ARM_MATH_DSP) +void arm_absmax_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ + q7_t cur_absmax, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmax to next consecutive values one by one */ \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ + /* compare for the extrema value */ \ + if (cur_absmax > out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmax; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmax = *pSrc++; \ + cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \ + if (cur_absmax > out) \ + { \ + out = cur_absmax; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else +void arm_absmax_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ + q7_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + + + /* Initialise index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc); + pSrc++; + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc); + pSrc++; + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* defined(ARM_MATH_DSP) */ +#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMax group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f16.c new file mode 100644 index 0000000..6d2ff50 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f16.c @@ -0,0 +1,276 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmin_f16.c + * Description: Minimum value of absolute values of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + + +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include <limits.h> +#endif + + +/** + @ingroup groupStats + */ + +/** + @addtogroup AbsMin + @{ + */ + +/** + @brief Minimum value of absolute values of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @param[out] pIndex index of minimum value returned here + @return none + */ + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +void arm_absmin_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult, + uint32_t * pIndex) +{ + uint16_t blkCnt; /* loop counters */ + f16x8_t vecSrc; + float16_t const *pSrcVec; + f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMAX); + float16_t minValue = F16_ABSMAX; + uint16_t idx = blockSize; + uint16x8_t indexVec; + uint16x8_t curExtremIdxVec; + mve_pred16_t p0; + + + indexVec = vidupq_u16((uint32_t)0, 1); + curExtremIdxVec = vdupq_n_u16(0); + + pSrcVec = (float16_t const *) pSrc; + blkCnt = blockSize >> 3; + while (blkCnt > 0U) + { + vecSrc = vldrhq_f16(pSrcVec); + pSrcVec += 8; + vecSrc = vabsq(vecSrc); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + p0 = vcmpleq(vecSrc, curExtremValVec); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + + indexVec = indexVec + 8; + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 7; + if (blkCnt > 0U) + { + p0 = vctp16q(blkCnt); + + vecSrc = vldrhq_f16(pSrcVec); + pSrcVec += 8; + vecSrc = vabsq(vecSrc); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + p0 = vcmpleq_m(vecSrc, curExtremValVec, p0); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + } + /* + * Get min value across the vector + */ + minValue = vminnmvq(minValue, curExtremValVec); + /* + * set index for lower values to max possible index + */ + p0 = vcmpleq(curExtremValVec, minValue); + indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0); + /* + * Get min index which is thus for a max value + */ + idx = vminvq(idx, indexVec); + /* + * Save result + */ + *pIndex = idx; + *pResult = minValue; +} + +#else +#if defined(ARM_MATH_LOOPUNROLL) +void arm_absmin_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult, + uint32_t * pIndex) +{ + float16_t cur_absmin, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out; \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmin to next consecutive values one by one */ \ + cur_absmin = *pSrc++; \ + cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \ + /* compare for the extrema value */ \ + if ((_Float16)cur_absmin < (_Float16)out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmin; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \ + if ((_Float16)cur_absmin < (_Float16)out) \ + { \ + out = cur_absmin; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \ + if ((_Float16)cur_absmin < (_Float16)out) \ + { \ + out = cur_absmin; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \ + if ((_Float16)cur_absmin < (_Float16)out) \ + { \ + out = cur_absmin; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmin = *pSrc++; \ + cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \ + if ((_Float16)cur_absmin < (_Float16)out) \ + { \ + out = cur_absmin; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else +void arm_absmin_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult, + uint32_t * pIndex) +{ + float16_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + + /* Initialise index value to zero. */ + outIndex = 0U; + + /* Load first input value that act as reference value for comparision */ + out = (_Float16)fabsf((float32_t)*pSrc++); + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = (_Float16)fabsf((float32_t)*pSrc++); + + /* compare for the minimum value */ + if ((_Float16)out > (_Float16)minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* defined(ARM_MATH_LOOPUNROLL) */ +#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMin group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f32.c new file mode 100644 index 0000000..3a96322 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f32.c @@ -0,0 +1,279 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmin_f32.c + * Description: Minimum value of absolute values of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include <limits.h> +#endif + + +/** + @ingroup groupStats + */ + +/** + @defgroup AbsMin Absolute Minimum + + Computes the minimum value of absolute values of an array of data. + The function returns both the minimum value and its position within the array. + There are separate functions for floating-point, Q31, Q15, and Q7 data types. + */ + +/** + @addtogroup AbsMin + @{ + */ + +/** + @brief Minimum value of absolute values of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @param[out] pIndex index of minimum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +void arm_absmin_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ + int32_t blkCnt; /* loop counters */ + f32x4_t vecSrc; + float32_t const *pSrcVec; + f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMAX); + float32_t minValue = F32_ABSMAX; + uint32_t idx = blockSize; + uint32x4_t indexVec; + uint32x4_t curExtremIdxVec; + mve_pred16_t p0; + + + indexVec = vidupq_u32((uint32_t)0, 1); + curExtremIdxVec = vdupq_n_u32(0); + + pSrcVec = (float32_t const *) pSrc; + blkCnt = blockSize >> 2; + while (blkCnt > 0) + { + vecSrc = vldrwq_f32(pSrcVec); + pSrcVec += 4; + vecSrc = vabsq(vecSrc); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + p0 = vcmpleq(vecSrc, curExtremValVec); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + + indexVec = indexVec + 4; + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 3; + if (blkCnt > 0) + { + p0 = vctp32q(blkCnt); + + vecSrc = vldrwq_f32(pSrcVec); + pSrcVec += 4; + vecSrc = vabsq(vecSrc); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + p0 = vcmpleq_m(vecSrc, curExtremValVec, p0); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + } + /* + * Get min value across the vector + */ + minValue = vminnmvq(minValue, curExtremValVec); + /* + * set index for lower values to max possible index + */ + p0 = vcmpleq(curExtremValVec, minValue); + indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0); + /* + * Get min index which is thus for a max value + */ + idx = vminvq(idx, indexVec); + /* + * Save result + */ + *pIndex = idx; + *pResult = minValue; +} + +#else +#if defined(ARM_MATH_LOOPUNROLL) +void arm_absmin_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ + float32_t cur_absmin, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0.0f) ? out : -out; \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmin to next consecutive values one by one */ \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \ + /* compare for the extrema value */ \ + if (cur_absmin < out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmin; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else +void arm_absmin_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ + float32_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + + /* Initialise index value to zero. */ + outIndex = 0U; + + /* Load first input value that act as reference value for comparision */ + out = fabsf(*pSrc++); + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = fabsf(*pSrc++); + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +#endif /* defined(ARM_MATH_LOOPUNROLL) */ +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMin group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f64.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f64.c new file mode 100644 index 0000000..7aac970 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_f64.c @@ -0,0 +1,90 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmin_f64.c + * Description: Minimum value of absolute values of a floating-point vector + * + * $Date: 13 September 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup AbsMin + @{ + */ + +/** + @brief Minimum value of absolute values of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @param[out] pIndex index of minimum value returned here + @return none + */ +void arm_absmin_f64( + const float64_t * pSrc, + uint32_t blockSize, + float64_t * pResult, + uint32_t * pIndex) +{ + float64_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + + /* Initialise index value to zero. */ + outIndex = 0U; + + /* Load first input value that act as reference value for comparision */ + out = fabs(*pSrc++); + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = fabs(*pSrc++); + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + @} end of AbsMin group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f16.c new file mode 100644 index 0000000..62e1054 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f16.c @@ -0,0 +1,230 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmin_no_idx_f16.c + * Description: Minimum value of absolute values of a floating-point vector + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + + +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include <limits.h> +#endif + + +/** + @ingroup groupStats + */ + +/** + @addtogroup AbsMin + @{ + */ + +/** + @brief Minimum value of absolute values of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @return none + */ + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +void arm_absmin_no_idx_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult) +{ + int32_t blkCnt; /* loop counters */ + f16x8_t vecSrc; + float16_t const *pSrcVec; + f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMAX); + float16_t minValue = F16_ABSMAX; + mve_pred16_t p0; + + + pSrcVec = (float16_t const *) pSrc; + blkCnt = blockSize >> 3; + while (blkCnt > 0) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 8; + /* + * update per-lane min. + */ + curExtremValVec = vminnmaq(vecSrc, curExtremValVec); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 7; + if (blkCnt > 0) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 8; + p0 = vctp16q(blkCnt); + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + curExtremValVec = vminnmaq_m(curExtremValVec, vecSrc, p0); + } + /* + * Get min value across the vector + */ + minValue = vminnmavq(minValue, curExtremValVec); + *pResult = minValue; +} + +#else +#if defined(ARM_MATH_LOOPUNROLL) +void arm_absmin_no_idx_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult) +{ + float16_t cur_absmin, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt; /* Loop counter */ \ + \ + \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out; \ + \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmin to next consecutive values one by one */ \ + cur_absmin = *pSrc++; \ + cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \ + /* compare for the extrema value */ \ + if ((_Float16)cur_absmin < (_Float16)out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmin; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \ + if ((_Float16)cur_absmin < (_Float16)out) \ + { \ + out = cur_absmin; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \ + if ((_Float16)cur_absmin < (_Float16)out) \ + { \ + out = cur_absmin; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \ + if ((_Float16)cur_absmin < (_Float16)out) \ + { \ + out = cur_absmin; \ + } \ + \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmin = *pSrc++; \ + cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \ + if ((_Float16)cur_absmin < (_Float16)out) \ + { \ + out = cur_absmin; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ +} +#else +void arm_absmin_no_idx_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult) +{ + float16_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* Loop counter */ + + + + /* Load first input value that act as reference value for comparision */ + out = (_Float16)fabsf((float32_t)*pSrc++); + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = (_Float16)fabsf((float32_t)*pSrc++); + + /* compare for the minimum value */ + if ((_Float16)out > (_Float16)minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; +} +#endif /* defined(ARM_MATH_LOOPUNROLL) */ +#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMin group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f32.c new file mode 100644 index 0000000..c7108da --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f32.c @@ -0,0 +1,226 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmin_no_idx_f32.c + * Description: Minimum value of absolute values of a floating-point vector + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include <limits.h> +#endif + + +/** + @ingroup groupStats + */ + + +/** + @addtogroup AbsMin + @{ + */ + +/** + @brief Minimum value of absolute values of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +void arm_absmin_no_idx_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + int32_t blkCnt; /* loop counters */ + f32x4_t vecSrc; + float32_t const *pSrcVec; + f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMAX); + float32_t minValue = F32_ABSMAX; + mve_pred16_t p0; + + + pSrcVec = (float32_t const *) pSrc; + blkCnt = blockSize >> 2; + while (blkCnt > 0) + { + vecSrc = vldrwq_f32(pSrcVec); + pSrcVec += 4; + /* + * update per-lane min. + */ + curExtremValVec = vminnmaq(vecSrc, curExtremValVec); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 3; + if (blkCnt > 0) + { + vecSrc = vldrwq_f32(pSrcVec); + pSrcVec += 4; + p0 = vctp32q(blkCnt); + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + curExtremValVec = vminnmaq_m(curExtremValVec, vecSrc, p0); + } + /* + * Get min value across the vector + */ + minValue = vminnmavq(minValue, curExtremValVec); + *pResult = minValue; +} + +#else +#if defined(ARM_MATH_LOOPUNROLL) +void arm_absmin_no_idx_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t cur_absmin, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt; /* Loop counter */ \ + \ + \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0.0f) ? out : -out; \ + \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmin to next consecutive values one by one */ \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \ + /* compare for the extrema value */ \ + if (cur_absmin < out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmin; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + } \ + \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ +} +#else +void arm_absmin_no_idx_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* Loop counter */ + + + + /* Load first input value that act as reference value for comparision */ + out = fabsf(*pSrc++); + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = fabsf(*pSrc++); + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; +} + +#endif /* defined(ARM_MATH_LOOPUNROLL) */ +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMin group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f64.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f64.c new file mode 100644 index 0000000..ac1e926 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_f64.c @@ -0,0 +1,84 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmin_no_idx_f64.c + * Description: Minimum value of absolute values of a floating-point vector + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup AbsMin + @{ + */ + +/** + @brief Minimum value of absolute values of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @return none + */ +void arm_absmin_no_idx_f64( + const float64_t * pSrc, + uint32_t blockSize, + float64_t * pResult) +{ + float64_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* Loop counter */ + + + /* Load first input value that act as reference value for comparision */ + out = fabs(*pSrc++); + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = fabs(*pSrc++); + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; +} + +/** + @} end of AbsMin group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q15.c new file mode 100644 index 0000000..76a1788 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q15.c @@ -0,0 +1,222 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmin_no_idx_q15.c + * Description: Minimum value of absolute values of a Q15 vector + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + + +/** + @addtogroup AbsMin + @{ + */ + +/** + @brief Minimum value of absolute values of a Q15 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @return none + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_absmin_no_idx_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + uint16_t blkCnt; /* loop counters */ + q15x8_t vecSrc; + q15_t const *pSrcVec; + uint16x8_t curExtremValVec = vdupq_n_s16(Q15_ABSMAX); + q15_t minValue = Q15_ABSMAX; + mve_pred16_t p0; + + + pSrcVec = (q15_t const *) pSrc; + blkCnt = blockSize >> 3; + while (blkCnt > 0) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 8; + /* + * update per-lane min. + */ + curExtremValVec = vminaq(curExtremValVec, vecSrc); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 7; + if (blkCnt > 0) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 8; + p0 = vctp16q(blkCnt); + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + curExtremValVec = vminaq_m(curExtremValVec, vecSrc, p0); + } + /* + * Get min value across the vector + */ + minValue = vminavq(minValue, (q15x8_t)curExtremValVec); + *pResult = minValue; +} + +#else +#if defined(ARM_MATH_DSP) +void arm_absmin_no_idx_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + q15_t cur_absmin, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt; /* Loop counter */ \ + \ + \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q15_t)__QSUB16(0, out); \ + \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmin to next consecutive values one by one */ \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \ + /* compare for the extrema value */ \ + if (cur_absmin < out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmin; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + } \ + \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ +} +#else +void arm_absmin_no_idx_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + q15_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* Loop counter */ + + + + /* Load first input value that act as reference value for comparision */ + out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc); + pSrc++; + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc); + pSrc++; + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; +} +#endif /* defined(ARM_MATH_DSP) */ +#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMin group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q31.c new file mode 100644 index 0000000..b52db17 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q31.c @@ -0,0 +1,221 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmin_no_idx_q31.c + * Description: Minimum value of absolute values of a Q31 vector + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + + +/** + @addtogroup AbsMin + @{ + */ + +/** + @brief Minimum value of absolute values of a Q31 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_absmin_no_idx_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + int32_t blkCnt; /* loop counters */ + q31x4_t vecSrc; + q31_t const *pSrcVec; + uint32x4_t curExtremValVec = vdupq_n_s32(Q31_ABSMAX); + q31_t minValue = Q31_ABSMAX; + mve_pred16_t p0; + + + pSrcVec = (q31_t const *) pSrc; + blkCnt = blockSize >> 2; + while (blkCnt > 0) + { + vecSrc = vldrwq_s32(pSrcVec); + pSrcVec += 4; + /* + * update per-lane min. + */ + curExtremValVec = vminaq(curExtremValVec, vecSrc); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 3; + if (blkCnt > 0) + { + vecSrc = vldrwq_s32(pSrcVec); + pSrcVec += 4; + p0 = vctp32q(blkCnt); + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + curExtremValVec = vminaq_m(curExtremValVec, vecSrc, p0); + } + /* + * Get min value across the vector + */ + minValue = vminavq(minValue, (q31x4_t)curExtremValVec); + *pResult = minValue; +} + +#else +#if defined(ARM_MATH_DSP) +void arm_absmin_no_idx_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q31_t cur_absmin, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt; /* Loop counter */ \ + \ + \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q31_t)__QSUB(0, out); \ + \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmin to next consecutive values one by one */ \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \ + /* compare for the extrema value */ \ + if (cur_absmin < out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmin; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + } \ + \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ +} +#else +void arm_absmin_no_idx_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q31_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* Loop counter */ + + /* Load first input value that act as reference value for comparision */ + out = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc); + pSrc++; + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc); + pSrc++; + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; +} +#endif /* defined(ARM_MATH_DSP) */ +#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMin group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q7.c new file mode 100644 index 0000000..f60f113 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_no_idx_q7.c @@ -0,0 +1,223 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmin_no_idx_q7.c + * Description: Minimum value of absolute values of a Q7 vector + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + + +/** + @addtogroup AbsMin + @{ + */ + +/** + @brief Minimum value of absolute values of a Q7 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @return none + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include <stdint.h> +#include "arm_helium_utils.h" + + + +void arm_absmin_no_idx_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult) +{ + int32_t blkCnt; /* loop counters */ + q7x16_t vecSrc; + q7_t const *pSrcVec; + uint8x16_t curExtremValVec = vdupq_n_s8(Q7_ABSMAX); + q7_t minValue = Q7_ABSMAX; + mve_pred16_t p0; + + + pSrcVec = (q7_t const *) pSrc; + blkCnt = blockSize >> 4; + while (blkCnt > 0) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 16; + /* + * update per-lane min. + */ + curExtremValVec = vminaq(curExtremValVec, vecSrc); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 0xF; + if (blkCnt > 0) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 16; + p0 = vctp8q(blkCnt); + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + curExtremValVec = vminaq_m(curExtremValVec, vecSrc, p0); + } + /* + * Get min value across the vector + */ + minValue = vminavq(minValue, (q7x16_t)curExtremValVec); + *pResult = minValue; +} + +#else +#if defined(ARM_MATH_DSP) +void arm_absmin_no_idx_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult) +{ + q7_t cur_absmin, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt; /* Loop counter */ \ + \ + \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmin to next consecutive values one by one */ \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \ + /* compare for the extrema value */ \ + if (cur_absmin < out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmin; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + } \ + \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ +} +#else +void arm_absmin_no_idx_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult) +{ + q7_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* Loop counter */ + + + /* Load first input value that act as reference value for comparision */ + out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc); + pSrc++; + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc); + pSrc++; + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; +} +#endif /* defined(ARM_MATH_DSP) */ +#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMin group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q15.c new file mode 100644 index 0000000..e4d0704 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q15.c @@ -0,0 +1,269 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmin_q15.c + * Description: Minimum value of absolute values of a Q15 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + + +/** + @addtogroup AbsMin + @{ + */ + +/** + @brief Minimum value of absolute values of a Q15 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @param[out] pIndex index of minimum value returned here + @return none + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_absmin_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex) +{ + uint16_t blkCnt; /* loop counters */ + q15x8_t vecSrc; + q15_t const *pSrcVec; + q15x8_t curExtremValVec = vdupq_n_s16(Q15_ABSMAX); + q15_t minValue = Q15_ABSMAX; + uint16_t idx = blockSize; + uint16x8_t indexVec; + uint16x8_t curExtremIdxVec; + uint32_t startIdx = 0; + mve_pred16_t p0; + + + indexVec = vidupq_wb_u16(&startIdx, 1); + curExtremIdxVec = vdupq_n_u16(0); + + pSrcVec = (q15_t const *) pSrc; + blkCnt = blockSize >> 3; + while (blkCnt > 0U) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 8; + vecSrc = vabsq(vecSrc); + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + p0 = vcmpleq(vecSrc, curExtremValVec); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + + indexVec = vidupq_wb_u16(&startIdx, 1); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 7; + if (blkCnt > 0U) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 8; + vecSrc = vabsq(vecSrc); + + p0 = vctp16q(blkCnt); + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + p0 = vcmpleq_m(vecSrc, curExtremValVec, p0); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + } + /* + * Get min value across the vector + */ + minValue = vminvq(minValue, curExtremValVec); + /* + * set index for lower values to min possible index + */ + p0 = vcmpleq(curExtremValVec, minValue); + indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0); + /* + * Get min index which is thus for a min value + */ + idx = vminvq(idx, indexVec); + /* + * Save result + */ + *pIndex = idx; + *pResult = minValue; +} + +#else +#if defined(ARM_MATH_DSP) +void arm_absmin_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex) +{ + q15_t cur_absmin, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q15_t)__QSUB16(0, out); \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmin to next consecutive values one by one */ \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \ + /* compare for the extrema value */ \ + if (cur_absmin < out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmin; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else +void arm_absmin_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex) +{ + q15_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + + + /* Initialise index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc); + pSrc++; + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc); + pSrc++; + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* defined(ARM_MATH_DSP) */ +#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMin group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q31.c new file mode 100644 index 0000000..21c769b --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q31.c @@ -0,0 +1,269 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmin_q31.c + * Description: Minimum value of absolute values of a Q31 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + + +/** + @addtogroup AbsMin + @{ + */ + +/** + @brief Minimum value of absolute values of a Q31 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @param[out] pIndex index of minimum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_absmin_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex) +{ + uint16_t blkCnt; /* loop counters */ + q31x4_t vecSrc; + q31_t const *pSrcVec; + q31x4_t curExtremValVec = vdupq_n_s32(Q31_ABSMAX); + q31_t minValue = Q31_ABSMAX; + uint16_t idx = blockSize; + uint32x4_t indexVec; + uint32x4_t curExtremIdxVec; + uint32_t startIdx = 0; + mve_pred16_t p0; + + + indexVec = vidupq_wb_u32(&startIdx, 1); + curExtremIdxVec = vdupq_n_u32(0); + + pSrcVec = (q31_t const *) pSrc; + blkCnt = blockSize >> 2; + while (blkCnt > 0U) + { + vecSrc = vldrwq_s32(pSrcVec); + pSrcVec += 4; + vecSrc = vabsq(vecSrc); + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + p0 = vcmpleq(vecSrc, curExtremValVec); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + + indexVec = vidupq_wb_u32(&startIdx, 1); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 3; + if (blkCnt > 0U) + { + vecSrc = vldrwq_s32(pSrcVec); + pSrcVec += 4; + vecSrc = vabsq(vecSrc); + + p0 = vctp32q(blkCnt); + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + p0 = vcmpleq_m(vecSrc, curExtremValVec, p0); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + } + /* + * Get min value across the vector + */ + minValue = vminvq(minValue, curExtremValVec); + /* + * set index for lower values to min possible index + */ + p0 = vcmpleq(curExtremValVec, minValue); + indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0); + /* + * Get min index which is thus for a min value + */ + idx = vminvq(idx, indexVec); + /* + * Save result + */ + *pIndex = idx; + *pResult = minValue; +} + +#else +#if defined(ARM_MATH_DSP) +void arm_absmin_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex) +{ + q31_t cur_absmin, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q31_t)__QSUB(0, out); \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmin to next consecutive values one by one */ \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \ + /* compare for the extrema value */ \ + if (cur_absmin < out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmin; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else +void arm_absmin_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex) +{ + q31_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + + /* Initialise index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc); + pSrc++; + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc); + pSrc++; + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* defined(ARM_MATH_DSP) */ +#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMin group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q7.c new file mode 100644 index 0000000..2889891 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_absmin_q7.c @@ -0,0 +1,322 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_absmin_q7.c + * Description: Minimum value of absolute values of a Q7 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + + +/** + @addtogroup AbsMin + @{ + */ + +/** + @brief Minimum value of absolute values of a Q7 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @param[out] pIndex index of minimum value returned here + @return none + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include <stdint.h> +#include "arm_helium_utils.h" + +#define MAX_BLKSZ_S8 (UINT8_MAX+1) + +static void arm_small_blk_absmin_q7( + const q7_t *pSrc, + uint32_t blockSize, + q7_t *pResult, + uint32_t *pIndex) +{ + uint16_t blkCnt; /* loop counters */ + q7x16_t vecSrc; + q7_t const *pSrcVec; + q7x16_t curExtremValVec = vdupq_n_s8(Q7_ABSMAX); + q7_t minValue = Q7_ABSMAX; + uint16_t idx = blockSize - 1; + uint8x16_t indexVec; + uint8x16_t curExtremIdxVec; + uint32_t startIdx = 0; + mve_pred16_t p0; + + + indexVec = vidupq_wb_u8(&startIdx, 1); + curExtremIdxVec = vdupq_n_u8(0); + + pSrcVec = (q7_t const *) pSrc; + blkCnt = blockSize >> 4; + while (blkCnt > 0U) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 16; + vecSrc = vabsq(vecSrc); + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + p0 = vcmpleq(vecSrc, curExtremValVec); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + + indexVec = vidupq_wb_u8(&startIdx, 1); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 0xF; + if (blkCnt > 0U) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 16; + vecSrc = vabsq(vecSrc); + + p0 = vctp8q(blkCnt); + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + p0 = vcmpleq_m(vecSrc, curExtremValVec, p0); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + } + /* + * Get min value across the vector + */ + minValue = vminvq(minValue, curExtremValVec); + /* + * set index for lower values to min possible index + */ + p0 = vcmpleq(curExtremValVec, minValue); + idx = vminvq_p_u8(idx, curExtremIdxVec, p0); + /* + * Save result + */ + *pIndex = idx; + *pResult = minValue; +} + + +void arm_absmin_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ + int32_t totalSize = blockSize; + + if (totalSize <= MAX_BLKSZ_S8) + { + arm_small_blk_absmin_q7(pSrc, blockSize, pResult, pIndex); + } + else + { + uint32_t curIdx = 0; + q7_t curBlkExtr = Q7_MAX; + uint32_t curBlkPos = 0; + uint32_t curBlkIdx = 0; + /* + * process blocks of 255 elts + */ + while (totalSize >= MAX_BLKSZ_S8) + { + const q7_t *curSrc = pSrc; + + arm_small_blk_absmin_q7(curSrc, MAX_BLKSZ_S8, pResult, pIndex); + if (*pResult < curBlkExtr) + { + /* + * update partial extrema + */ + curBlkExtr = *pResult; + curBlkPos = *pIndex; + curBlkIdx = curIdx; + } + curIdx++; + pSrc += MAX_BLKSZ_S8; + totalSize -= MAX_BLKSZ_S8; + } + /* + * remainder + */ + arm_small_blk_absmin_q7(pSrc, totalSize, pResult, pIndex); + if (*pResult < curBlkExtr) + { + curBlkExtr = *pResult; + curBlkPos = *pIndex; + curBlkIdx = curIdx; + } + *pIndex = curBlkIdx * MAX_BLKSZ_S8 + curBlkPos; + *pResult = curBlkExtr; + } +} + +#else +#if defined(ARM_MATH_DSP) +void arm_absmin_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ + q7_t cur_absmin, out; /* Temporary variables to store the output value. */\ + uint32_t blkCnt, outIndex; /* Loop counter */ \ + uint32_t index; /* index of maximum value */ \ + \ + /* Initialize index value to zero. */ \ + outIndex = 0U; \ + /* Load first input value that act as reference value for comparision */ \ + out = *pSrc++; \ + out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \ + /* Initialize index of extrema value. */ \ + index = 0U; \ + \ + /* Loop unrolling: Compute 4 outputs at a time */ \ + blkCnt = (blockSize - 1U) >> 2U; \ + \ + while (blkCnt > 0U) \ + { \ + /* Initialize cur_absmin to next consecutive values one by one */ \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \ + /* compare for the extrema value */ \ + if (cur_absmin < out) \ + { \ + /* Update the extrema value and it's index */ \ + out = cur_absmin; \ + outIndex = index + 1U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 2U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 3U; \ + } \ + \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = index + 4U; \ + } \ + \ + index += 4U; \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Loop unrolling: Compute remaining outputs */ \ + blkCnt = (blockSize - 1U) % 4U; \ + \ + \ + while (blkCnt > 0U) \ + { \ + cur_absmin = *pSrc++; \ + cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \ + if (cur_absmin < out) \ + { \ + out = cur_absmin; \ + outIndex = blockSize - blkCnt; \ + } \ + \ + /* Decrement loop counter */ \ + blkCnt--; \ + } \ + \ + /* Store the extrema value and it's index into destination pointers */ \ + *pResult = out; \ + *pIndex = outIndex; +} +#else +void arm_absmin_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ + q7_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + + /* Initialise index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc); + pSrc++; + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc); + pSrc++; + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* defined(ARM_MATH_DSP) */ +#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of AbsMin group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f16.c new file mode 100644 index 0000000..9611fd7 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f16.c @@ -0,0 +1,140 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_logsumexp_f16.c + * Description: LogSumExp + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#include <limits.h> +#include <math.h> + +/** + @ingroup groupStats + */ + +/** + @defgroup Entropy Entropy + + Computes the entropy of a distribution + + */ + +/** + * @addtogroup Entropy + * @{ + */ + + +/** + * @brief Entropy + * + * @param[in] pSrcA Array of input values. + * @param[in] blockSize Number of samples in the input array. + * @return Entropy -Sum(p ln p) + * + */ + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +#include "arm_vec_math_f16.h" + +float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize) +{ + uint32_t blkCnt; + _Float16 accum=0.0f16,p; + + + blkCnt = blockSize; + + f16x8_t vSum = vdupq_n_f16(0.0f); + /* Compute 4 outputs at a time */ + blkCnt = blockSize >> 3U; + + while (blkCnt > 0U) + { + f16x8_t vecIn = vld1q(pSrcA); + + vSum = vaddq_f16(vSum, vmulq(vecIn, vlogq_f16(vecIn))); + + /* + * Decrement the blockSize loop counter + * Advance vector source and destination pointers + */ + pSrcA += 8; + blkCnt --; + } + + accum = vecAddAcrossF16Mve(vSum); + + /* Tail */ + blkCnt = blockSize & 0x7; + while(blkCnt > 0) + { + p = *pSrcA++; + accum += p * (_Float16)logf((float32_t)p); + + blkCnt--; + + } + + return (-accum); +} + +#else + +float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize) +{ + const float16_t *pIn; + uint32_t blkCnt; + _Float16 accum, p; + + pIn = pSrcA; + blkCnt = blockSize; + + accum = 0.0f; + + while(blkCnt > 0) + { + p = *pIn++; + accum += p * (_Float16)logf((float32_t)p); + + blkCnt--; + + } + + return(-accum); +} +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + * @} end of Entropy group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f32.c new file mode 100644 index 0000000..a2160c1 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f32.c @@ -0,0 +1,174 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_logsumexp_f32.c + * Description: LogSumExp + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" +#include <limits.h> +#include <math.h> + + +/** + * @addtogroup Entropy + * @{ + */ + + +/** + * @brief Entropy + * + * @param[in] pSrcA Array of input values. + * @param[in] blockSize Number of samples in the input array. + * @return Entropy -Sum(p ln p) + * + */ + +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +#include "arm_vec_math.h" + +float32_t arm_entropy_f32(const float32_t * pSrcA,uint32_t blockSize) +{ + uint32_t blkCnt; + float32_t accum=0.0f,p; + + + blkCnt = blockSize; + + f32x4_t vSum = vdupq_n_f32(0.0f); + /* Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + f32x4_t vecIn = vld1q(pSrcA); + + vSum = vaddq_f32(vSum, vmulq(vecIn, vlogq_f32(vecIn))); + + /* + * Decrement the blockSize loop counter + * Advance vector source and destination pointers + */ + pSrcA += 4; + blkCnt --; + } + + accum = vecAddAcrossF32Mve(vSum); + + /* Tail */ + blkCnt = blockSize & 0x3; + while(blkCnt > 0) + { + p = *pSrcA++; + accum += p * logf(p); + + blkCnt--; + + } + + return (-accum); +} + +#else +#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "NEMath.h" + +float32_t arm_entropy_f32(const float32_t * pSrcA,uint32_t blockSize) +{ + const float32_t *pIn; + uint32_t blkCnt; + float32_t accum, p; + + float32x4_t accumV; + float32x2_t accumV2; + float32x4_t tmpV, tmpV2; + + pIn = pSrcA; + + accum = 0.0f; + accumV = vdupq_n_f32(0.0f); + + blkCnt = blockSize >> 2; + while(blkCnt > 0) + { + tmpV = vld1q_f32(pIn); + pIn += 4; + + tmpV2 = vlogq_f32(tmpV); + accumV = vmlaq_f32(accumV, tmpV, tmpV2); + + blkCnt--; + + } + + accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV)); + accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1); + + + blkCnt = blockSize & 3; + while(blkCnt > 0) + { + p = *pIn++; + accum += p * logf(p); + + blkCnt--; + + } + + return(-accum); +} + +#else +float32_t arm_entropy_f32(const float32_t * pSrcA,uint32_t blockSize) +{ + const float32_t *pIn; + uint32_t blkCnt; + float32_t accum, p; + + pIn = pSrcA; + blkCnt = blockSize; + + accum = 0.0f; + + while(blkCnt > 0) + { + p = *pIn++; + accum += p * logf(p); + + blkCnt--; + + } + + return(-accum); +} +#endif +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + * @} end of Entropy group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f64.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f64.c new file mode 100644 index 0000000..d671791 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_entropy_f64.c @@ -0,0 +1,73 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_logsumexp_f64.c + * Description: LogSumExp + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" +#include <limits.h> +#include <math.h> + +/** + * @addtogroup Entropy + * @{ + */ + +/** + * @brief Entropy + * + * @param[in] pSrcA Array of input values. + * @param[in] blockSize Number of samples in the input array. + * @return Entropy -Sum(p ln p) + * + */ + +float64_t arm_entropy_f64(const float64_t * pSrcA, uint32_t blockSize) +{ + const float64_t *pIn; + uint32_t blkCnt; + float64_t accum, p; + + pIn = pSrcA; + blkCnt = blockSize; + + accum = 0.0; + + while(blkCnt > 0) + { + p = *pIn++; + + accum += p * log(p); + + blkCnt--; + + } + + return(-accum); +} + +/** + * @} end of Entropy group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f16.c new file mode 100644 index 0000000..98ac097 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f16.c @@ -0,0 +1,152 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_logsumexp_f16.c + * Description: LogSumExp + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#include <limits.h> +#include <math.h> + +/** + @ingroup groupStats + */ + +/** + @defgroup Kullback-Leibler Kullback-Leibler divergence + + Computes the Kullback-Leibler divergence between two distributions + + */ + + +/** + * @addtogroup Kullback-Leibler + * @{ + */ + + +/** + * @brief Kullback-Leibler + * + * Distribution A may contain 0 with Neon version. + * Result will be right but some exception flags will be set. + * + * Distribution B must not contain 0 probability. + * + * @param[in] *pSrcA points to an array of input values for probaility distribution A. + * @param[in] *pSrcB points to an array of input values for probaility distribution B. + * @param[in] blockSize number of samples in the input array. + * @return Kullback-Leibler divergence D(A || B) + * + */ + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +#include "arm_vec_math_f16.h" + +float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSrcB,uint32_t blockSize) +{ + uint32_t blkCnt; + _Float16 accum, pA,pB; + + + blkCnt = blockSize; + + accum = 0.0f16; + + f16x8_t vSum = vdupq_n_f16(0.0f16); + blkCnt = blockSize >> 3; + while(blkCnt > 0) + { + f16x8_t vecA = vld1q(pSrcA); + f16x8_t vecB = vld1q(pSrcB); + f16x8_t vRatio; + + vRatio = vdiv_f16(vecB, vecA); + vSum = vaddq_f16(vSum, vmulq(vecA, vlogq_f16(vRatio))); + + /* + * Decrement the blockSize loop counter + * Advance vector source and destination pointers + */ + pSrcA += 8; + pSrcB += 8; + blkCnt --; + } + + accum = vecAddAcrossF16Mve(vSum); + + blkCnt = blockSize & 7; + while(blkCnt > 0) + { + pA = *pSrcA++; + pB = *pSrcB++; + accum += pA * (_Float16)logf((float32_t)pB / (float32_t)pA); + + blkCnt--; + + } + + return(-accum); +} + +#else +float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSrcB,uint32_t blockSize) +{ + const float16_t *pInA, *pInB; + uint32_t blkCnt; + _Float16 accum, pA,pB; + + pInA = pSrcA; + pInB = pSrcB; + blkCnt = blockSize; + + accum = 0.0f; + + while(blkCnt > 0) + { + pA = *pInA++; + pB = *pInB++; + accum += pA * (_Float16)logf((float32_t)pB / (float32_t)pA); + + blkCnt--; + + } + + return(-accum); +} +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + * @} end of Kullback-Leibler group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f32.c new file mode 100644 index 0000000..45a9624 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f32.c @@ -0,0 +1,193 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_logsumexp_f32.c + * Description: LogSumExp + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" +#include <limits.h> +#include <math.h> + + +/** + * @addtogroup Kullback-Leibler + * @{ + */ + + +/** + * @brief Kullback-Leibler + * + * Distribution A may contain 0 with Neon version. + * Result will be right but some exception flags will be set. + * + * Distribution B must not contain 0 probability. + * + * @param[in] *pSrcA points to an array of input values for probaility distribution A. + * @param[in] *pSrcB points to an array of input values for probaility distribution B. + * @param[in] blockSize number of samples in the input array. + * @return Kullback-Leibler divergence D(A || B) + * + */ + +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +#include "arm_vec_math.h" + +float32_t arm_kullback_leibler_f32(const float32_t * pSrcA,const float32_t * pSrcB,uint32_t blockSize) +{ + uint32_t blkCnt; + float32_t accum, pA,pB; + + + blkCnt = blockSize; + + accum = 0.0f; + + f32x4_t vSum = vdupq_n_f32(0.0f); + blkCnt = blockSize >> 2; + while(blkCnt > 0) + { + f32x4_t vecA = vld1q(pSrcA); + f32x4_t vecB = vld1q(pSrcB); + f32x4_t vRatio; + + vRatio = vdiv_f32(vecB, vecA); + vSum = vaddq_f32(vSum, vmulq(vecA, vlogq_f32(vRatio))); + + /* + * Decrement the blockSize loop counter + * Advance vector source and destination pointers + */ + pSrcA += 4; + pSrcB += 4; + blkCnt --; + } + + accum = vecAddAcrossF32Mve(vSum); + + blkCnt = blockSize & 3; + while(blkCnt > 0) + { + pA = *pSrcA++; + pB = *pSrcB++; + accum += pA * logf(pB / pA); + + blkCnt--; + + } + + return(-accum); +} + +#else +#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "NEMath.h" + +float32_t arm_kullback_leibler_f32(const float32_t * pSrcA,const float32_t * pSrcB,uint32_t blockSize) +{ + const float32_t *pInA, *pInB; + uint32_t blkCnt; + float32_t accum, pA,pB; + + float32x4_t accumV; + float32x2_t accumV2; + float32x4_t tmpVA, tmpVB,tmpV; + + pInA = pSrcA; + pInB = pSrcB; + + accum = 0.0f; + accumV = vdupq_n_f32(0.0f); + + blkCnt = blockSize >> 2; + while(blkCnt > 0) + { + tmpVA = vld1q_f32(pInA); + pInA += 4; + + tmpVB = vld1q_f32(pInB); + pInB += 4; + + tmpV = vinvq_f32(tmpVA); + tmpVB = vmulq_f32(tmpVB, tmpV); + tmpVB = vlogq_f32(tmpVB); + + accumV = vmlaq_f32(accumV, tmpVA, tmpVB); + + blkCnt--; + + } + + accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV)); + accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1); + + blkCnt = blockSize & 3; + while(blkCnt > 0) + { + pA = *pInA++; + pB = *pInB++; + accum += pA * logf(pB/pA); + + blkCnt--; + + } + + return(-accum); +} + +#else +float32_t arm_kullback_leibler_f32(const float32_t * pSrcA,const float32_t * pSrcB,uint32_t blockSize) +{ + const float32_t *pInA, *pInB; + uint32_t blkCnt; + float32_t accum, pA,pB; + + pInA = pSrcA; + pInB = pSrcB; + blkCnt = blockSize; + + accum = 0.0f; + + while(blkCnt > 0) + { + pA = *pInA++; + pB = *pInB++; + accum += pA * logf(pB / pA); + + blkCnt--; + + } + + return(-accum); +} +#endif +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + * @} end of Kullback-Leibler group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f64.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f64.c new file mode 100644 index 0000000..b43b218 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_kullback_leibler_f64.c @@ -0,0 +1,75 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_logsumexp_f64.c + * Description: LogSumExp + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" +#include <limits.h> +#include <math.h> + +/** + * @addtogroup Kullback-Leibler + * @{ + */ + +/** + * @brief Kullback-Leibler + * + * @param[in] *pSrcA points to an array of input values for probaility distribution A. + * @param[in] *pSrcB points to an array of input values for probaility distribution B. + * @param[in] blockSize number of samples in the input array. + * @return Kullback-Leibler divergence D(A || B) + * + */ + +float64_t arm_kullback_leibler_f64(const float64_t * pSrcA, const float64_t * pSrcB, uint32_t blockSize) +{ + const float64_t *pInA, *pInB; + uint32_t blkCnt; + float64_t accum, pA,pB; + + pInA = pSrcA; + pInB = pSrcB; + blkCnt = blockSize; + + accum = 0.0; + + while(blkCnt > 0) + { + pA = *pInA++; + pB = *pInB++; + + accum += pA * log(pB / pA); + + blkCnt--; + } + + return(-accum); +} + +/** + * @} end of Kullback-Leibler group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f16.c new file mode 100644 index 0000000..a35ac0e --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f16.c @@ -0,0 +1,84 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_logsumexp_f16.c + * Description: LogSumExp + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#include <limits.h> +#include <math.h> + +/** + @ingroup groupStats + */ + +/** + @defgroup LogSumExp LogSumExp + + LogSumExp optimizations to compute sum of probabilities with Gaussian distributions + + */ + +/** + * @addtogroup LogSumExp + * @{ + */ + + +/** + * @brief Dot product with log arithmetic + * + * Vectors are containing the log of the samples + * + * @param[in] *pSrcA points to the first input vector + * @param[in] *pSrcB points to the second input vector + * @param[in] blockSize number of samples in each vector + * @param[in] *pTmpBuffer temporary buffer of length blockSize + * @return The log of the dot product. + * + */ + + +float16_t arm_logsumexp_dot_prod_f16(const float16_t * pSrcA, + const float16_t * pSrcB, + uint32_t blockSize, + float16_t *pTmpBuffer) +{ + float16_t result; + arm_add_f16((float16_t*)pSrcA, (float16_t*)pSrcB, pTmpBuffer, blockSize); + + result = arm_logsumexp_f16(pTmpBuffer, blockSize); + return(result); +} + +/** + * @} end of LogSumExp group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f32.c new file mode 100644 index 0000000..d2a94da --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_dot_prod_f32.c @@ -0,0 +1,68 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_logsumexp_f32.c + * Description: LogSumExp + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" +#include <limits.h> +#include <math.h> + + +/** + * @addtogroup LogSumExp + * @{ + */ + + +/** + * @brief Dot product with log arithmetic + * + * Vectors are containing the log of the samples + * + * @param[in] *pSrcA points to the first input vector + * @param[in] *pSrcB points to the second input vector + * @param[in] blockSize number of samples in each vector + * @param[in] *pTmpBuffer temporary buffer of length blockSize + * @return The log of the dot product. + * + */ + + +float32_t arm_logsumexp_dot_prod_f32(const float32_t * pSrcA, + const float32_t * pSrcB, + uint32_t blockSize, + float32_t *pTmpBuffer) +{ + float32_t result; + arm_add_f32((float32_t*)pSrcA, (float32_t*)pSrcB, pTmpBuffer, blockSize); + + result = arm_logsumexp_f32(pTmpBuffer, blockSize); + return(result); +} + +/** + * @} end of LogSumExp group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f16.c new file mode 100644 index 0000000..84d4843 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f16.c @@ -0,0 +1,172 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_logsumexp_f16.c + * Description: LogSumExp + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#include <limits.h> +#include <math.h> + + +/** + * @addtogroup LogSumExp + * @{ + */ + + +/** + * @brief Computation of the LogSumExp + * + * In probabilistic computations, the dynamic of the probability values can be very + * wide because they come from gaussian functions. + * To avoid underflow and overflow issues, the values are represented by their log. + * In this representation, multiplying the original exp values is easy : their logs are added. + * But adding the original exp values is requiring some special handling and it is the + * goal of the LogSumExp function. + * + * If the values are x1...xn, the function is computing: + * + * ln(exp(x1) + ... + exp(xn)) and the computation is done in such a way that + * rounding issues are minimised. + * + * The max xm of the values is extracted and the function is computing: + * xm + ln(exp(x1 - xm) + ... + exp(xn - xm)) + * + * @param[in] *in Pointer to an array of input values. + * @param[in] blockSize Number of samples in the input array. + * @return LogSumExp + * + */ + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +#include "arm_vec_math_f16.h" + +float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize) +{ + float16_t maxVal; + const float16_t *pIn; + int32_t blkCnt; + _Float16 accum=0.0f16; + _Float16 tmp; + + + arm_max_no_idx_f16((float16_t *) in, blockSize, &maxVal); + + + blkCnt = blockSize; + pIn = in; + + + f16x8_t vSum = vdupq_n_f16(0.0f16); + blkCnt = blockSize >> 3; + while(blkCnt > 0) + { + f16x8_t vecIn = vld1q(pIn); + f16x8_t vecExp; + + vecExp = vexpq_f16(vsubq_n_f16(vecIn, maxVal)); + + vSum = vaddq_f16(vSum, vecExp); + + /* + * Decrement the blockSize loop counter + * Advance vector source and destination pointers + */ + pIn += 8; + blkCnt --; + } + + /* sum + log */ + accum = vecAddAcrossF16Mve(vSum); + + blkCnt = blockSize & 0x7; + while(blkCnt > 0) + { + tmp = *pIn++; + accum += (_Float16)expf((float32_t)((_Float16)tmp - (_Float16)maxVal)); + blkCnt--; + + } + + accum = (_Float16)maxVal + (_Float16)logf((float32_t)accum); + + return (accum); +} + +#else +float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize) +{ + _Float16 maxVal; + _Float16 tmp; + const float16_t *pIn; + uint32_t blkCnt; + _Float16 accum; + + pIn = in; + blkCnt = blockSize; + + maxVal = *pIn++; + blkCnt--; + + while(blkCnt > 0) + { + tmp = *pIn++; + + if (tmp > maxVal) + { + maxVal = tmp; + } + blkCnt--; + + } + + blkCnt = blockSize; + pIn = in; + accum = 0; + while(blkCnt > 0) + { + tmp = *pIn++; + accum += (_Float16)expf((float32_t)((_Float16)tmp - (_Float16)maxVal)); + blkCnt--; + + } + accum = (_Float16)maxVal + (_Float16)logf((float32_t)accum); + + return(accum); +} +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + * @} end of LogSumExp group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f32.c new file mode 100644 index 0000000..72a5d3a --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_logsumexp_f32.c @@ -0,0 +1,277 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_logsumexp_f32.c + * Description: LogSumExp + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" +#include <limits.h> +#include <math.h> + + +/** + * @addtogroup LogSumExp + * @{ + */ + + +/** + * @brief Computation of the LogSumExp + * + * In probabilistic computations, the dynamic of the probability values can be very + * wide because they come from gaussian functions. + * To avoid underflow and overflow issues, the values are represented by their log. + * In this representation, multiplying the original exp values is easy : their logs are added. + * But adding the original exp values is requiring some special handling and it is the + * goal of the LogSumExp function. + * + * If the values are x1...xn, the function is computing: + * + * ln(exp(x1) + ... + exp(xn)) and the computation is done in such a way that + * rounding issues are minimised. + * + * The max xm of the values is extracted and the function is computing: + * xm + ln(exp(x1 - xm) + ... + exp(xn - xm)) + * + * @param[in] *in Pointer to an array of input values. + * @param[in] blockSize Number of samples in the input array. + * @return LogSumExp + * + */ + +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +#include "arm_vec_math.h" + +float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize) +{ + float32_t maxVal; + const float32_t *pIn; + int32_t blkCnt; + float32_t accum=0.0f; + float32_t tmp; + + + arm_max_no_idx_f32((float32_t *) in, blockSize, &maxVal); + + + blkCnt = blockSize; + pIn = in; + + + f32x4_t vSum = vdupq_n_f32(0.0f); + blkCnt = blockSize >> 2; + while(blkCnt > 0) + { + f32x4_t vecIn = vld1q(pIn); + f32x4_t vecExp; + + vecExp = vexpq_f32(vsubq_n_f32(vecIn, maxVal)); + + vSum = vaddq_f32(vSum, vecExp); + + /* + * Decrement the blockSize loop counter + * Advance vector source and destination pointers + */ + pIn += 4; + blkCnt --; + } + + /* sum + log */ + accum = vecAddAcrossF32Mve(vSum); + + blkCnt = blockSize & 0x3; + while(blkCnt > 0) + { + tmp = *pIn++; + accum += expf(tmp - maxVal); + blkCnt--; + + } + + accum = maxVal + logf(accum); + + return (accum); +} + +#else +#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "NEMath.h" +float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize) +{ + float32_t maxVal; + float32_t tmp; + float32x4_t tmpV, tmpVb; + float32x4_t maxValV; + uint32x4_t idxV; + float32x4_t accumV; + float32x2_t accumV2; + + const float32_t *pIn; + uint32_t blkCnt; + float32_t accum; + + pIn = in; + + blkCnt = blockSize; + + if (blockSize <= 3) + { + maxVal = *pIn++; + blkCnt--; + + while(blkCnt > 0) + { + tmp = *pIn++; + + if (tmp > maxVal) + { + maxVal = tmp; + } + blkCnt--; + } + } + else + { + maxValV = vld1q_f32(pIn); + pIn += 4; + blkCnt = (blockSize - 4) >> 2; + + while(blkCnt > 0) + { + tmpVb = vld1q_f32(pIn); + pIn += 4; + + idxV = vcgtq_f32(tmpVb, maxValV); + maxValV = vbslq_f32(idxV, tmpVb, maxValV ); + + blkCnt--; + } + + accumV2 = vpmax_f32(vget_low_f32(maxValV),vget_high_f32(maxValV)); + accumV2 = vpmax_f32(accumV2,accumV2); + maxVal = vget_lane_f32(accumV2, 0) ; + + blkCnt = (blockSize - 4) & 3; + + while(blkCnt > 0) + { + tmp = *pIn++; + + if (tmp > maxVal) + { + maxVal = tmp; + } + blkCnt--; + } + + } + + + + maxValV = vdupq_n_f32(maxVal); + pIn = in; + accum = 0; + accumV = vdupq_n_f32(0.0f); + + blkCnt = blockSize >> 2; + + while(blkCnt > 0) + { + tmpV = vld1q_f32(pIn); + pIn += 4; + tmpV = vsubq_f32(tmpV, maxValV); + tmpV = vexpq_f32(tmpV); + accumV = vaddq_f32(accumV, tmpV); + + blkCnt--; + + } + accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV)); + accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1); + + blkCnt = blockSize & 0x3; + while(blkCnt > 0) + { + tmp = *pIn++; + accum += expf(tmp - maxVal); + blkCnt--; + + } + + accum = maxVal + logf(accum); + + return(accum); +} +#else +float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize) +{ + float32_t maxVal; + float32_t tmp; + const float32_t *pIn; + uint32_t blkCnt; + float32_t accum; + + pIn = in; + blkCnt = blockSize; + + maxVal = *pIn++; + blkCnt--; + + while(blkCnt > 0) + { + tmp = *pIn++; + + if (tmp > maxVal) + { + maxVal = tmp; + } + blkCnt--; + + } + + blkCnt = blockSize; + pIn = in; + accum = 0; + while(blkCnt > 0) + { + tmp = *pIn++; + accum += expf(tmp - maxVal); + blkCnt--; + + } + accum = maxVal + logf(accum); + + return(accum); +} +#endif +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + * @} end of LogSumExp group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f16.c new file mode 100644 index 0000000..b5017f1 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f16.c @@ -0,0 +1,246 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_max_f16.c + * Description: Maximum value of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include <limits.h> +#endif + +/** + @ingroup groupStats + */ + + +/** + @addtogroup Max + @{ + */ + +/** + @brief Maximum value of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @param[out] pIndex index of maximum value returned here + @return none + */ + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +void arm_max_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult, + uint32_t * pIndex) +{ + int32_t blkCnt; + f16x8_t vecSrc; + f16x8_t curExtremValVec = vdupq_n_f16(F16_MIN); + float16_t maxValue = F16_MIN; + uint32_t idx = blockSize; + uint16x8_t indexVec; + uint16x8_t curExtremIdxVec; + uint32_t curIdx = 0; + mve_pred16_t p0; + float16_t tmp; + + + indexVec = vidupq_wb_u16(&curIdx, 1); + curExtremIdxVec = vdupq_n_u16(0); + + /* Compute 4 outputs at a time */ + blkCnt = blockSize >> 3; + while (blkCnt > 0) + { + vecSrc = vldrhq_f16(pSrc); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + p0 = vcmpgeq(vecSrc, curExtremValVec); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + + indexVec = vidupq_wb_u16(&curIdx, 1); + + pSrc += 8; + /* Decrement the loop counter */ + blkCnt--; + } + + + /* + * Get max value across the vector + */ + maxValue = vmaxnmvq(maxValue, curExtremValVec); + /* + * set index for lower values to max possible index + */ + p0 = vcmpgeq(curExtremValVec, maxValue); + indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0); + /* + * Get min index which is thus for a max value + */ + idx = vminvq(idx, indexVec); + + /* Tail */ + blkCnt = blockSize & 7; + + while (blkCnt > 0) + { + /* Initialize tmp to the next consecutive values one by one */ + tmp = *pSrc++; + + /* compare for the maximum value */ + if ((_Float16)maxValue < (_Float16)tmp) + { + /* Update the maximum value and it's index */ + maxValue = tmp; + idx = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* + * Save result + */ + *pIndex = idx; + *pResult = maxValue; +} + +#else +void arm_max_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult, + uint32_t * pIndex) +{ + float16_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + uint32_t index; /* index of maximum value */ +#endif + + /* Initialise index value to zero. */ + outIndex = 0U; + + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + /* Initialise index of maximum value. */ + index = 0U; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize maxVal to next consecutive values one by one */ + maxVal = *pSrc++; + + /* compare for the maximum value */ + if ((_Float16)out < (_Float16)maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + outIndex = index + 1U; + } + + maxVal = *pSrc++; + if ((_Float16)out < (_Float16)maxVal) + { + out = maxVal; + outIndex = index + 2U; + } + + maxVal = *pSrc++; + if ((_Float16)out < (_Float16)maxVal) + { + out = maxVal; + outIndex = index + 3U; + } + + maxVal = *pSrc++; + if ((_Float16)out < (_Float16)maxVal) + { + out = maxVal; + outIndex = index + 4U; + } + + index += 4U; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = (blockSize - 1U) % 4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = *pSrc++; + + /* compare for the maximum value */ + if ((_Float16)out < (_Float16)maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of Max group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f32.c new file mode 100644 index 0000000..d82b039 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f32.c @@ -0,0 +1,365 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_max_f32.c + * Description: Maximum value of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include <limits.h> +#endif + +/** + @ingroup groupStats + */ + +/** + @defgroup Max Maximum + + Computes the maximum value of an array of data. + The function returns both the maximum value and its position within the array. + There are separate functions for floating-point, Q31, Q15, and Q7 data types. + */ + +/** + @addtogroup Max + @{ + */ + +/** + @brief Maximum value of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @param[out] pIndex index of maximum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_max_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ + uint32_t blkCnt; + f32x4_t vecSrc; + f32x4_t curExtremValVec = vdupq_n_f32(F32_MIN); + float32_t maxValue = F32_MIN; + uint32_t idx = blockSize; + uint32x4_t indexVec; + uint32x4_t curExtremIdxVec; + uint32_t curIdx = 0; + mve_pred16_t p0; + float32_t tmp; + + + indexVec = vidupq_wb_u32(&curIdx, 1); + curExtremIdxVec = vdupq_n_u32(0); + + /* Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + while (blkCnt > 0U) + { + vecSrc = vldrwq_f32(pSrc); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + p0 = vcmpgeq(vecSrc, curExtremValVec); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + + indexVec = vidupq_wb_u32(&curIdx, 1); + + pSrc += 4; + /* Decrement the loop counter */ + blkCnt--; + } + + + /* + * Get max value across the vector + */ + maxValue = vmaxnmvq(maxValue, curExtremValVec); + /* + * set index for lower values to max possible index + */ + p0 = vcmpgeq(curExtremValVec, maxValue); + indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0); + /* + * Get min index which is thus for a max value + */ + idx = vminvq(idx, indexVec); + + /* Tail */ + blkCnt = blockSize & 0x3; + + while (blkCnt > 0U) + { + /* Initialize tmp to the next consecutive values one by one */ + tmp = *pSrc++; + + /* compare for the maximum value */ + if (maxValue < tmp) + { + /* Update the maximum value and it's index */ + maxValue = tmp; + idx = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* + * Save result + */ + *pIndex = idx; + *pResult = maxValue; +} + +#else +#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_max_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ + float32_t maxVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + float32x4_t outV, srcV; + float32x2_t outV2; + + uint32x4_t idxV; + uint32x4_t maxIdx; + static const uint32_t indexInit[4]={4,5,6,7}; + static const uint32_t countVInit[4]={0,1,2,3}; + + uint32x4_t index; + uint32x4_t delta; + uint32x4_t countV; + uint32x2_t countV2; + + maxIdx = vdupq_n_u32(ULONG_MAX); + delta = vdupq_n_u32(4); + index = vld1q_u32(indexInit); + countV = vld1q_u32(countVInit); + + + /* Initialise the index value to zero. */ + outIndex = 0U; + + /* Load first input value that act as reference value for comparison */ + if (blockSize <= 3) + { + out = *pSrc++; + + blkCnt = blockSize - 1; + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal1) + { + /* Update the maximum value and it's index */ + out = maxVal1; + outIndex = blockSize - blkCnt; + } + + /* Decrement the loop counter */ + blkCnt--; + } + } + else + { + outV = vld1q_f32(pSrc); + pSrc += 4; + + /* Compute 4 outputs at a time */ + blkCnt = (blockSize - 4 ) >> 2U; + + while (blkCnt > 0U) + { + srcV = vld1q_f32(pSrc); + pSrc += 4; + + idxV = vcgtq_f32(srcV, outV); + outV = vbslq_f32(idxV, srcV, outV ); + countV = vbslq_u32(idxV, index,countV ); + + index = vaddq_u32(index,delta); + + /* Decrement the loop counter */ + blkCnt--; + } + + outV2 = vpmax_f32(vget_low_f32(outV),vget_high_f32(outV)); + outV2 = vpmax_f32(outV2,outV2); + out = vget_lane_f32(outV2, 0); + + idxV = vceqq_f32(outV, vdupq_n_f32(out)); + countV = vbslq_u32(idxV, countV,maxIdx); + + countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV)); + countV2 = vpmin_u32(countV2,countV2); + outIndex = vget_lane_u32(countV2,0); + + /* if (blockSize - 1U) is not multiple of 4 */ + blkCnt = (blockSize - 4 ) % 4U; + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal1) + { + /* Update the maximum value and it's index */ + out = maxVal1; + outIndex = blockSize - blkCnt ; + } + + /* Decrement the loop counter */ + blkCnt--; + } + + + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#else +void arm_max_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ + float32_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + uint32_t index; /* index of maximum value */ +#endif + + /* Initialise index value to zero. */ + outIndex = 0U; + + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + /* Initialise index of maximum value. */ + index = 0U; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize maxVal to next consecutive values one by one */ + maxVal = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + outIndex = index + 1U; + } + + maxVal = *pSrc++; + if (out < maxVal) + { + out = maxVal; + outIndex = index + 2U; + } + + maxVal = *pSrc++; + if (out < maxVal) + { + out = maxVal; + outIndex = index + 3U; + } + + maxVal = *pSrc++; + if (out < maxVal) + { + out = maxVal; + outIndex = index + 4U; + } + + index += 4U; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = (blockSize - 1U) % 4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* #if defined(ARM_MATH_NEON) */ +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of Max group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f64.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f64.c new file mode 100644 index 0000000..8361219 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_f64.c @@ -0,0 +1,90 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_max_f64.c + * Description: Maximum value of a floating-point vector + * + * $Date: 13 September 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup Max + @{ + */ + +/** + @brief Maximum value of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @param[out] pIndex index of maximum value returned here + @return none + */ +void arm_max_f64( + const float64_t * pSrc, + uint32_t blockSize, + float64_t * pResult, + uint32_t * pIndex) +{ + float64_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + + /* Initialise index value to zero. */ + outIndex = 0U; + + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + @} end of Max group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f16.c new file mode 100644 index 0000000..f989f60 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f16.c @@ -0,0 +1,144 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_max_no_idx_f16.c + * Description: Maximum value of a floating-point vector without returning the index + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include <limits.h> +#endif + +/** + @ingroup groupStats + */ + + +/** + @addtogroup Max + @{ + */ + +/** + @brief Maximum value of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @return none + */ + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +void arm_max_no_idx_f16( + const float16_t *pSrc, + uint32_t blockSize, + float16_t *pResult) +{ + f16x8_t vecSrc; + f16x8_t curExtremValVec = vdupq_n_f16(F16_MIN); + float16_t maxValue = F16_MIN; + float16_t newVal; + uint32_t blkCnt; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 3U; + + while (blkCnt > 0U) + { + + vecSrc = vldrhq_f16(pSrc); + /* + * update per-lane max. + */ + curExtremValVec = vmaxnmq(vecSrc, curExtremValVec); + /* + * Decrement the blockSize loop counter + * Advance vector source and destination pointers + */ + pSrc += 8; + blkCnt --; + } + /* + * Get max value across the vector + */ + maxValue = vmaxnmvq(maxValue, curExtremValVec); + + blkCnt = blockSize & 7; + + while (blkCnt > 0U) + { + newVal = *pSrc++; + + /* compare for the maximum value */ + if ((_Float16)maxValue < (_Float16)newVal) + { + /* Update the maximum value and it's index */ + maxValue = newVal; + } + + blkCnt --; + } + + *pResult = maxValue; +} + +#else + +void arm_max_no_idx_f16( + const float16_t *pSrc, + uint32_t blockSize, + float16_t *pResult) +{ + float16_t maxValue = F16_MIN; + float16_t newVal; + + while (blockSize > 0U) + { + newVal = *pSrc++; + + /* compare for the maximum value */ + if ((_Float16)maxValue < (_Float16)newVal) + { + /* Update the maximum value and it's index */ + maxValue = newVal; + } + + blockSize --; + } + + *pResult = maxValue; +} + +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of Max group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f32.c new file mode 100644 index 0000000..9a2a015 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f32.c @@ -0,0 +1,138 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_max_no_idx_f32.c + * Description: Maximum value of a floating-point vector without returning the index + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include <limits.h> +#endif + +/** + @ingroup groupStats + */ + + +/** + @addtogroup Max + @{ + */ + +/** + @brief Maximum value of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +void arm_max_no_idx_f32( + const float32_t *pSrc, + uint32_t blockSize, + float32_t *pResult) +{ + f32x4_t vecSrc; + f32x4_t curExtremValVec = vdupq_n_f32(F32_MIN); + float32_t maxValue = F32_MIN; + float32_t newVal; + uint32_t blkCnt; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + + vecSrc = vldrwq_f32(pSrc); + /* + * update per-lane max. + */ + curExtremValVec = vmaxnmq(vecSrc, curExtremValVec); + /* + * Decrement the blockSize loop counter + * Advance vector source and destination pointers + */ + pSrc += 4; + blkCnt --; + } + /* + * Get max value across the vector + */ + maxValue = vmaxnmvq(maxValue, curExtremValVec); + + blkCnt = blockSize & 3; + + while (blkCnt > 0U) + { + newVal = *pSrc++; + + /* compare for the maximum value */ + if (maxValue < newVal) + { + /* Update the maximum value and it's index */ + maxValue = newVal; + } + + blkCnt --; + } + + *pResult = maxValue; +} + +#else + +void arm_max_no_idx_f32( + const float32_t *pSrc, + uint32_t blockSize, + float32_t *pResult) +{ + float32_t maxValue = F32_MIN; + float32_t newVal; + + while (blockSize > 0U) + { + newVal = *pSrc++; + + /* compare for the maximum value */ + if (maxValue < newVal) + { + /* Update the maximum value and it's index */ + maxValue = newVal; + } + + blockSize --; + } + + *pResult = maxValue; +} + +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of Max group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f64.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f64.c new file mode 100644 index 0000000..7eca31c --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_f64.c @@ -0,0 +1,75 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_max_no_idx_f64.c + * Description: Maximum value of a floating-point vector without returning the index + * + * $Date: 13 September 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + + +/** + @addtogroup Max + @{ + */ + +/** + @brief Maximum value of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @return none + */ +void arm_max_no_idx_f64( + const float64_t *pSrc, + uint32_t blockSize, + float64_t *pResult) +{ + float64_t maxValue = F64_MIN; + float64_t newVal; + + while (blockSize > 0U) + { + newVal = *pSrc++; + + /* compare for the maximum value */ + if (maxValue < newVal) + { + /* Update the maximum value and it's index */ + maxValue = newVal; + } + + blockSize --; + } + + *pResult = maxValue; +} + +/** + @} end of Max group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q15.c new file mode 100644 index 0000000..2adea1a --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q15.c @@ -0,0 +1,142 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_max_no_idx_q15.c + * Description: Maximum value of a q15 vector without returning the index + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + + +/** + @ingroup groupStats + */ + +/** + @addtogroup Max + @{ + */ + +/** + @brief Maximum value of a q15 vector without index. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_max_no_idx_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + int32_t blkCnt; /* loop counters */ + q15x8_t vecSrc; + q15_t const *pSrcVec; + q15x8_t curExtremValVec = vdupq_n_s16(Q15_MIN); + q15_t maxValue = Q15_MIN; + mve_pred16_t p0; + + + pSrcVec = (q15_t const *) pSrc; + blkCnt = blockSize >> 3; + while (blkCnt > 0) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 8; + /* + * update per-lane max. + */ + curExtremValVec = vmaxq(vecSrc, curExtremValVec); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 7; + if (blkCnt > 0) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 8; + p0 = vctp16q(blkCnt); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + curExtremValVec = vmaxq_m(curExtremValVec, vecSrc, curExtremValVec, p0); + } + /* + * Get max value across the vector + */ + maxValue = vmaxvq(maxValue, curExtremValVec); + *pResult = maxValue; +} + +#else +void arm_max_no_idx_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + q15_t maxVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* loop counter */ + + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1U); + + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal1) + { + /* Update the maximum value */ + out = maxVal1; + } + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the maximum value into destination pointer */ + *pResult = out; +} + +#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of Max group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q31.c new file mode 100644 index 0000000..9c7a425 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q31.c @@ -0,0 +1,142 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_max_no_idx_q31.c + * Description: Maximum value of a q31 vector without returning the index + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + + +/** + @ingroup groupStats + */ + +/** + @addtogroup Max + @{ + */ + +/** + @brief Maximum value of a q31 vector without index. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_max_no_idx_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + int32_t blkCnt; /* loop counters */ + q31x4_t vecSrc; + q31_t const *pSrcVec; + q31x4_t curExtremValVec = vdupq_n_s32(Q31_MIN); + q31_t maxValue = Q31_MIN; + mve_pred16_t p0; + + + pSrcVec = (q31_t const *) pSrc; + blkCnt = blockSize >> 2; + while (blkCnt > 0) + { + vecSrc = vldrwq_s32(pSrcVec); + pSrcVec += 4; + /* + * update per-lane max. + */ + curExtremValVec = vmaxq(vecSrc, curExtremValVec); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 3; + if (blkCnt > 0) + { + vecSrc = vldrwq_s32(pSrcVec); + pSrcVec += 4; + p0 = vctp32q(blkCnt); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + curExtremValVec = vmaxq_m(curExtremValVec, vecSrc, curExtremValVec, p0); + } + /* + * Get max value across the vector + */ + maxValue = vmaxvq(maxValue, curExtremValVec); + *pResult = maxValue; +} + +#else +void arm_max_no_idx_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q31_t maxVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* loop counter */ + + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1U); + + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal1) + { + /* Update the maximum value */ + out = maxVal1; + } + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the maximum value into destination pointer */ + *pResult = out; +} + +#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of Max group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q7.c new file mode 100644 index 0000000..a6a61b7 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_no_idx_q7.c @@ -0,0 +1,143 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_max_no_idx_q7.c + * Description: Maximum value of a q7 vector without returning the index + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + + +/** + @ingroup groupStats + */ + +/** + @addtogroup Max + @{ + */ + +/** + @brief Maximum value of a q7 vector without index. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_max_no_idx_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult) +{ + int32_t blkCnt; /* loop counters */ + q7x16_t vecSrc; + q7_t const *pSrcVec; + q7x16_t curExtremValVec = vdupq_n_s8(Q7_MIN); + q7_t maxValue = Q7_MIN; + mve_pred16_t p0; + + + pSrcVec = (q7_t const *) pSrc; + blkCnt = blockSize >> 4; + while (blkCnt > 0) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 16; + /* + * update per-lane max. + */ + curExtremValVec = vmaxq(vecSrc, curExtremValVec); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 0xF; + if (blkCnt > 0) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 16; + p0 = vctp8q(blkCnt); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + curExtremValVec = vmaxq_m(curExtremValVec, vecSrc, curExtremValVec, p0); + } + /* + * Get max value across the vector + */ + maxValue = vmaxvq(maxValue, curExtremValVec); + *pResult = maxValue; +} + +#else + +void arm_max_no_idx_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult) +{ + q7_t maxVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* loop counter */ + + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1U); + + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal1) + { + /* Update the maximum value */ + out = maxVal1; + } + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the maximum value into destination pointer */ + *pResult = out; +} + +#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of Max group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q15.c new file mode 100644 index 0000000..5715e37 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q15.c @@ -0,0 +1,201 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_max_q15.c + * Description: Maximum value of a Q15 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup Max + @{ + */ + +/** + @brief Maximum value of a Q15 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @param[out] pIndex index of maximum value returned here + @return none + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_max_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex) +{ + int32_t blkCnt; /* loop counters */ + q15x8_t extremValVec = vdupq_n_s16(Q15_MIN); + q15_t maxValue = Q15_MIN; + uint16x8_t indexVec; + uint16x8_t extremIdxVec; + mve_pred16_t p0; + uint16_t extremIdxArr[8]; + + indexVec = vidupq_u16(0U, 1); + + blkCnt = blockSize; + do { + mve_pred16_t p = vctp16q(blkCnt); + q15x8_t extremIdxVal = vld1q_z_s16(pSrc, p); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + p0 = vcmpgeq_m(extremIdxVal, extremValVec, p); + + extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0); + /* store per-lane extrema indexes */ + vst1q_p_u16(extremIdxArr, indexVec, p0); + + indexVec += 8; + pSrc += 8; + blkCnt -= 8; + } + while (blkCnt > 0); + + + /* Get max value across the vector */ + maxValue = vmaxvq(maxValue, extremValVec); + + /* set index for lower values to max possible index */ + p0 = vcmpgeq(extremValVec, maxValue); + extremIdxVec = vld1q_u16(extremIdxArr); + + indexVec = vpselq(extremIdxVec, vdupq_n_u16(blockSize - 1), p0); + *pIndex = vminvq(blockSize - 1, indexVec); + *pResult = maxValue; +} + +#else +void arm_max_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex) +{ + q15_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + +#if defined (ARM_MATH_LOOPUNROLL) + uint32_t index; /* index of maximum value */ +#endif + + /* Initialise index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + +#if defined (ARM_MATH_LOOPUNROLL) + /* Initialise index of maximum value. */ + index = 0U; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize maxVal to next consecutive values one by one */ + maxVal = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + outIndex = index + 1U; + } + + maxVal = *pSrc++; + if (out < maxVal) + { + out = maxVal; + outIndex = index + 2U; + } + + maxVal = *pSrc++; + if (out < maxVal) + { + out = maxVal; + outIndex = index + 3U; + } + + maxVal = *pSrc++; + if (out < maxVal) + { + out = maxVal; + outIndex = index + 4U; + } + + index += 4U; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = (blockSize - 1U) % 4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* defined(ARM_MATH_MVEI) */ +/** + @} end of Max group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q31.c new file mode 100644 index 0000000..fed900b --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q31.c @@ -0,0 +1,202 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_max_q31.c + * Description: Maximum value of a Q31 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup Max + @{ + */ + +/** + @brief Maximum value of a Q31 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @param[out] pIndex index of maximum value returned here + @return none + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_max_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex) +{ + int32_t blkCnt; /* loop counters */ + q31x4_t extremValVec = vdupq_n_s32(Q31_MIN); + q31_t maxValue = Q31_MIN; + uint32x4_t indexVec; + uint32x4_t extremIdxVec; + mve_pred16_t p0; + uint32_t extremIdxArr[4]; + + indexVec = vidupq_u32(0U, 1); + + blkCnt = blockSize; + do { + mve_pred16_t p = vctp32q(blkCnt); + q31x4_t extremIdxVal = vld1q_z_s32(pSrc, p); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + p0 = vcmpgeq_m(extremIdxVal, extremValVec, p); + + extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0); + /* store per-lane extrema indexes */ + vst1q_p_u32(extremIdxArr, indexVec, p0); + + indexVec += 4; + pSrc += 4; + blkCnt -= 4; + } + while (blkCnt > 0); + + + /* Get max value across the vector */ + maxValue = vmaxvq(maxValue, extremValVec); + + /* set index for lower values to max possible index */ + p0 = vcmpgeq(extremValVec, maxValue); + extremIdxVec = vld1q_u32(extremIdxArr); + + indexVec = vpselq(extremIdxVec, vdupq_n_u32(blockSize - 1), p0); + *pIndex = vminvq(blockSize - 1, indexVec); + *pResult = maxValue; +} + +#else +void arm_max_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex) +{ + q31_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + +#if defined (ARM_MATH_LOOPUNROLL) + uint32_t index; /* index of maximum value */ +#endif + + /* Initialise index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + +#if defined (ARM_MATH_LOOPUNROLL) + /* Initialise index of maximum value. */ + index = 0U; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize maxVal to next consecutive values one by one */ + maxVal = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + outIndex = index + 1U; + } + + maxVal = *pSrc++; + if (out < maxVal) + { + out = maxVal; + outIndex = index + 2U; + } + + maxVal = *pSrc++; + if (out < maxVal) + { + out = maxVal; + outIndex = index + 3U; + } + + maxVal = *pSrc++; + if (out < maxVal) + { + out = maxVal; + outIndex = index + 4U; + } + + index += 4U; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = (blockSize - 1U) % 4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of Max group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q7.c new file mode 100644 index 0000000..5deae64 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_max_q7.c @@ -0,0 +1,256 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_max_q7.c + * Description: Maximum value of a Q7 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup Max + @{ + */ + +/** + @brief Maximum value of a Q7 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult maximum value returned here + @param[out] pIndex index of maximum value returned here + @return none + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +static void arm_small_blk_max_q7( + const q7_t * pSrc, + uint16_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ + int32_t blkCnt; /* loop counters */ + q7x16_t extremValVec = vdupq_n_s8(Q7_MIN); + q7_t maxValue = Q7_MIN; + uint8x16_t indexVec; + uint8x16_t extremIdxVec; + mve_pred16_t p0; + uint8_t extremIdxArr[16]; + + indexVec = vidupq_u8(0U, 1); + + blkCnt = blockSize; + do { + mve_pred16_t p = vctp8q(blkCnt); + q7x16_t extremIdxVal = vld1q_z_s8(pSrc, p); + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + p0 = vcmpgeq_m(extremIdxVal, extremValVec, p); + + extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0); + /* store per-lane extrema indexes */ + vst1q_p_u8(extremIdxArr, indexVec, p0); + + indexVec += 16; + pSrc += 16; + blkCnt -= 16; + } + while (blkCnt > 0); + + + /* Get max value across the vector */ + maxValue = vmaxvq(maxValue, extremValVec); + + /* set index for lower values to max possible index */ + p0 = vcmpgeq(extremValVec, maxValue); + extremIdxVec = vld1q_u8(extremIdxArr); + + indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0); + *pIndex = vminvq_u8(blockSize - 1, indexVec); + *pResult = maxValue; +} + +void arm_max_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ + int32_t totalSize = blockSize; + const uint16_t sub_blk_sz = UINT8_MAX + 1; + + if (totalSize <= sub_blk_sz) + { + arm_small_blk_max_q7(pSrc, blockSize, pResult, pIndex); + } + else + { + uint32_t curIdx = 0; + q7_t curBlkExtr = Q7_MIN; + uint32_t curBlkPos = 0; + uint32_t curBlkIdx = 0; + /* + * process blocks of 255 elts + */ + while (totalSize >= sub_blk_sz) + { + const q7_t *curSrc = pSrc; + + arm_small_blk_max_q7(curSrc, sub_blk_sz, pResult, pIndex); + if (*pResult > curBlkExtr) + { + /* + * update partial extrema + */ + curBlkExtr = *pResult; + curBlkPos = *pIndex; + curBlkIdx = curIdx; + } + curIdx++; + pSrc += sub_blk_sz; + totalSize -= sub_blk_sz; + } + /* + * remainder + */ + arm_small_blk_max_q7(pSrc, totalSize, pResult, pIndex); + if (*pResult > curBlkExtr) + { + curBlkExtr = *pResult; + curBlkPos = *pIndex; + curBlkIdx = curIdx; + } + *pIndex = curBlkIdx * sub_blk_sz + curBlkPos; + *pResult = curBlkExtr; + } +} +#else +void arm_max_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ + q7_t maxVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + +#if defined (ARM_MATH_LOOPUNROLL) + uint32_t index; /* index of maximum value */ +#endif + + /* Initialise index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + +#if defined (ARM_MATH_LOOPUNROLL) + /* Initialise index of maximum value. */ + index = 0U; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize maxVal to next consecutive values one by one */ + maxVal = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + outIndex = index + 1U; + } + + maxVal = *pSrc++; + if (out < maxVal) + { + out = maxVal; + outIndex = index + 2U; + } + + maxVal = *pSrc++; + if (out < maxVal) + { + out = maxVal; + outIndex = index + 3U; + } + + maxVal = *pSrc++; + if (out < maxVal) + { + out = maxVal; + outIndex = index + 4U; + } + + index += 4U; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = (blockSize - 1U) % 4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal) + { + /* Update the maximum value and it's index */ + out = maxVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of Max group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f16.c new file mode 100644 index 0000000..d296482 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f16.c @@ -0,0 +1,152 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mean_f16.c + * Description: Mean value of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + + +/** + @ingroup groupStats + */ + +/** + @defgroup mean Mean + + Calculates the mean of the input vector. Mean is defined as the average of the elements in the vector. + The underlying algorithm is used: + + <pre> + Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]) / blockSize; + </pre> + + There are separate functions for floating-point, Q31, Q15, and Q7 data types. + */ + +/** + @addtogroup mean + @{ + */ + +/** + @brief Mean value of a floating-point vector. + @param[in] pSrc points to the input vector. + @param[in] blockSize number of samples in input vector. + @param[out] pResult mean value returned here. + @return none + */ +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_mean_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult) +{ + int32_t blkCnt; /* loop counters */ + f16x8_t vecSrc; + f16x8_t sumVec = vdupq_n_f16(0.0f16); + + blkCnt = blockSize; + do { + mve_pred16_t p = vctp16q(blkCnt); + + vecSrc = vldrhq_z_f16((float16_t const *) pSrc, p); + sumVec = vaddq_m_f16(sumVec, sumVec, vecSrc, p); + + blkCnt -= 8; + pSrc += 8; + } + while (blkCnt > 0); + + *pResult = (_Float16)vecAddAcrossF16Mve(sumVec) / (_Float16) blockSize; +} + + +#else + +void arm_mean_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + float16_t sum = 0.0f; /* Temporary result storage */ + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += (_Float16)*pSrc++; + + sum += (_Float16)*pSrc++; + + sum += (_Float16)*pSrc++; + + sum += (_Float16)*pSrc++; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += (_Float16)*pSrc++; + + /* Decrement loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + /* Store result to destination */ + *pResult = ((_Float16)sum / (_Float16)blockSize); +} +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of mean group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f32.c new file mode 100644 index 0000000..dd6d817 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f32.c @@ -0,0 +1,198 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mean_f32.c + * Description: Mean value of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + + +/** + @addtogroup mean + @{ + */ + +/** + @brief Mean value of a floating-point vector. + @param[in] pSrc points to the input vector. + @param[in] blockSize number of samples in input vector. + @param[out] pResult mean value returned here. + @return none + */ +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_mean_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + uint32_t blkCnt; /* loop counters */ + f32x4_t vecSrc; + f32x4_t sumVec = vdupq_n_f32(0.0f); + float32_t sum = 0.0f; + + /* Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + while (blkCnt > 0U) + { + vecSrc = vldrwq_f32(pSrc); + sumVec = vaddq_f32(sumVec, vecSrc); + + blkCnt --; + pSrc += 4; + } + + sum = vecAddAcrossF32Mve(sumVec); + + /* Tail */ + blkCnt = blockSize & 0x3; + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + /* Decrement loop counter */ + blkCnt--; + } + + *pResult = sum / (float32_t) blockSize; +} + + +#else +#if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_mean_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t sum = 0.0f; /* Temporary result storage */ + float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */ + float32x2_t sumV2; + + uint32_t blkCnt; /* Loop counter */ + + float32x4_t inV; + + blkCnt = blockSize >> 2U; + + /* Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + inV = vld1q_f32(pSrc); + sumV = vaddq_f32(sumV, inV); + + pSrc += 4; + /* Decrement the loop counter */ + blkCnt--; + } + + sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV)); + sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1); + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize & 3; + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + /* Store the result to the destination */ + *pResult = sum / (float32_t) blockSize; +} +#else +void arm_mean_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + float32_t sum = 0.0f; /* Temporary result storage */ + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + sum += *pSrc++; + + sum += *pSrc++; + + sum += *pSrc++; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + /* Decrement loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + /* Store result to destination */ + *pResult = (sum / blockSize); +} +#endif /* #if defined(ARM_MATH_NEON) */ +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of mean group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f64.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f64.c new file mode 100644 index 0000000..72e6f35 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_f64.c @@ -0,0 +1,75 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mean_f64.c + * Description: Mean value of a floating-point vector + * + * $Date: 13 September 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + + +/** + @addtogroup mean + @{ + */ + +/** + @brief Mean value of a floating-point vector. + @param[in] pSrc points to the input vector. + @param[in] blockSize number of samples in input vector. + @param[out] pResult mean value returned here. + @return none + */ +void arm_mean_f64( + const float64_t * pSrc, + uint32_t blockSize, + float64_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + float64_t sum = 0.; /* Temporary result storage */ + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + /* Decrement loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + /* Store result to destination */ + *pResult = (sum / blockSize); +} + +/** + @} end of mean group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q15.c new file mode 100644 index 0000000..54949a1 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q15.c @@ -0,0 +1,156 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mean_q15.c + * Description: Mean value of a Q15 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup mean + @{ + */ + +/** + @brief Mean value of a Q15 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult mean value returned here + @return none + + @par Scaling and Overflow Behavior + The function is implemented using a 32-bit internal accumulator. + The input is represented in 1.15 format and is accumulated in a 32-bit + accumulator in 17.15 format. + There is no risk of internal overflow with this approach, and the + full precision of intermediate result is preserved. + Finally, the accumulator is truncated to yield a result of 1.15 format. + */ + +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_mean_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + uint32_t blkCnt; /* loop counters */ + q15x8_t vecSrc; + q31_t sum = 0L; + + /* Compute 8 outputs at a time */ + blkCnt = blockSize >> 3U; + while (blkCnt > 0U) + { + vecSrc = vldrhq_s16(pSrc); + /* + * sum lanes + */ + sum = vaddvaq(sum, vecSrc); + + blkCnt--; + pSrc += 8; + } + + /* Tail */ + blkCnt = blockSize & 0x7; + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + /* Decrement loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + /* Store the result to the destination */ + *pResult = (q15_t) (sum / (int32_t) blockSize); +} +#else +void arm_mean_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + q31_t sum = 0; /* Temporary result storage */ + +#if defined (ARM_MATH_LOOPUNROLL) + q31_t in; +#endif + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + in = read_q15x2_ia (&pSrc); + sum += ((in << 16U) >> 16U); + sum += (in >> 16U); + + in = read_q15x2_ia (&pSrc); + sum += ((in << 16U) >> 16U); + sum += (in >> 16U); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + /* Decrement loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + /* Store result to destination */ + *pResult = (q15_t) (sum / (int32_t) blockSize); +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of mean group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q31.c new file mode 100644 index 0000000..b33ed00 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q31.c @@ -0,0 +1,149 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mean_q31.c + * Description: Mean value of a Q31 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup mean + @{ + */ + +/** + @brief Mean value of a Q31 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult mean value returned here + @return none + + @par Scaling and Overflow Behavior + The function is implemented using a 64-bit internal accumulator. + The input is represented in 1.31 format and is accumulated in a 64-bit + accumulator in 33.31 format. + There is no risk of internal overflow with this approach, and the + full precision of intermediate result is preserved. + Finally, the accumulator is truncated to yield a result of 1.31 format. + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_mean_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + uint32_t blkCnt; /* loop counters */ + q31x4_t vecSrc; + q63_t sum = 0LL; + + + /* Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + while (blkCnt > 0U) + { + + vecSrc = vldrwq_s32(pSrc); + /* + * sum lanes + */ + sum = vaddlvaq(sum, vecSrc); + + blkCnt --; + pSrc += 4; + } + + /* Tail */ + blkCnt = blockSize & 0x3; + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + blkCnt --; + } + + *pResult = arm_div_q63_to_q31(sum, blockSize); +} +#else +void arm_mean_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + q63_t sum = 0; /* Temporary result storage */ + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + sum += *pSrc++; + + sum += *pSrc++; + + sum += *pSrc++; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + /* Decrement loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + /* Store result to destination */ + *pResult = (q31_t) (sum / blockSize); +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of mean group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q7.c new file mode 100644 index 0000000..f0701eb --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mean_q7.c @@ -0,0 +1,153 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mean_q7.c + * Description: Mean value of a Q7 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup mean + @{ + */ + +/** + @brief Mean value of a Q7 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult mean value returned here + @return none + + @par Scaling and Overflow Behavior + The function is implemented using a 32-bit internal accumulator. + The input is represented in 1.7 format and is accumulated in a 32-bit + accumulator in 25.7 format. + There is no risk of internal overflow with this approach, and the + full precision of intermediate result is preserved. + Finally, the accumulator is truncated to yield a result of 1.7 format. + */ + +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +void arm_mean_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult) +{ + uint32_t blkCnt; /* loop counters */ + q7x16_t vecSrc; + q31_t sum = 0L; + + + blkCnt = blockSize >> 4; + while (blkCnt > 0U) + { + vecSrc = vldrbq_s8(pSrc); + /* + * sum lanes + */ + sum = vaddvaq(sum, vecSrc); + + blkCnt--; + pSrc += 16; + } + + blkCnt = blockSize & 0xF; + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + /* Decrement loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + /* Store the result to the destination */ + *pResult = (q7_t) (sum / (int32_t) blockSize); +} +#else +void arm_mean_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + q31_t sum = 0; /* Temporary result storage */ + +#if defined (ARM_MATH_LOOPUNROLL) + q31_t in; +#endif + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + in = read_q7x4_ia (&pSrc); + sum += ((in << 24U) >> 24U); + sum += ((in << 16U) >> 24U); + sum += ((in << 8U) >> 24U); + sum += (in >> 24U); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + /* Decrement loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + /* Store result to destination */ + *pResult = (q7_t) (sum / (int32_t) blockSize); +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of mean group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f16.c new file mode 100644 index 0000000..3ff9db4 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f16.c @@ -0,0 +1,240 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_min_f16.c + * Description: Minimum value of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + + +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include <limits.h> +#endif + + +/** + @ingroup groupStats + */ + +/** + @addtogroup Min + @{ + */ + +/** + @brief Minimum value of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @param[out] pIndex index of minimum value returned here + @return none + */ + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +void arm_min_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult, + uint32_t * pIndex) +{ + int32_t blkCnt; /* loop counters */ + f16x8_t vecSrc; + float16_t const *pSrcVec; + f16x8_t curExtremValVec = vdupq_n_f16(F16_MAX); + float16_t minValue = F16_MAX; + uint32_t idx = blockSize; + uint16x8_t indexVec; + uint16x8_t curExtremIdxVec; + mve_pred16_t p0; + + indexVec = vidupq_u16((uint32_t)0, 1); + curExtremIdxVec = vdupq_n_u16(0); + + pSrcVec = (float16_t const *) pSrc; + blkCnt = blockSize >> 3; + while (blkCnt > 0) + { + vecSrc = vldrhq_f16(pSrcVec); pSrcVec += 8; + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + p0 = vcmpleq(vecSrc, curExtremValVec); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + + indexVec = indexVec + 8; + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 7; + if (blkCnt > 0) + { + vecSrc = vldrhq_f16(pSrcVec); pSrcVec += 8; + p0 = vctp16q(blkCnt); + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + p0 = vcmpleq_m(vecSrc, curExtremValVec, p0); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + } + /* + * Get min value across the vector + */ + minValue = vminnmvq(minValue, curExtremValVec); + /* + * set index for lower values to min possible index + */ + p0 = vcmpleq(curExtremValVec, minValue); + indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0); + /* + * Get min index which is thus for a min value + */ + idx = vminvq(idx, indexVec); + /* + * Save result + */ + *pIndex = idx; + *pResult = minValue; +} + +#else + +void arm_min_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult, + uint32_t * pIndex) +{ + float16_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + uint32_t index; /* index of maximum value */ +#endif + + /* Initialise index value to zero. */ + outIndex = 0U; + + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + /* Initialise index of maximum value. */ + index = 0U; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize minVal to next consecutive values one by one */ + minVal = *pSrc++; + + /* compare for the minimum value */ + if ((_Float16)out > (_Float16)minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + outIndex = index + 1U; + } + + minVal = *pSrc++; + if ((_Float16)out > (_Float16)minVal) + { + out = minVal; + outIndex = index + 2U; + } + + minVal = *pSrc++; + if ((_Float16)out > (_Float16)minVal) + { + out = minVal; + outIndex = index + 3U; + } + + minVal = *pSrc++; + if ((_Float16)out > (_Float16)minVal) + { + out = minVal; + outIndex = index + 4U; + } + + index += 4U; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = (blockSize - 1U) % 4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = *pSrc++; + + /* compare for the minimum value */ + if ((_Float16)out > (_Float16)minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of Min group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f32.c new file mode 100644 index 0000000..ad8a472 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f32.c @@ -0,0 +1,363 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_min_f32.c + * Description: Minimum value of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include <limits.h> +#endif + + +/** + @ingroup groupStats + */ + +/** + @defgroup Min Minimum + + Computes the minimum value of an array of data. + The function returns both the minimum value and its position within the array. + There are separate functions for floating-point, Q31, Q15, and Q7 data types. + */ + +/** + @addtogroup Min + @{ + */ + +/** + @brief Minimum value of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @param[out] pIndex index of minimum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +void arm_min_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ + uint32_t blkCnt; /* loop counters */ + f32x4_t vecSrc; + float32_t const *pSrcVec; + f32x4_t curExtremValVec = vdupq_n_f32(F32_MAX); + float32_t minValue = F32_MAX; + uint32_t idx = blockSize; + uint32x4_t indexVec; + uint32x4_t curExtremIdxVec; + float32_t tmp; + mve_pred16_t p0; + + indexVec = vidupq_u32((uint32_t)0, 1); + curExtremIdxVec = vdupq_n_u32(0); + + pSrcVec = (float32_t const *) pSrc; + /* Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + while (blkCnt > 0U) + { + vecSrc = vldrwq_f32(pSrcVec); + pSrcVec += 4; + /* + * Get current max per lane and current index per lane + * when a max is selected + */ + p0 = vcmpleq(vecSrc, curExtremValVec); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + + indexVec = indexVec + 4; + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + + /* + * Get min value across the vector + */ + minValue = vminnmvq(minValue, curExtremValVec); + /* + * set index for lower values to max possible index + */ + p0 = vcmpleq(curExtremValVec, minValue); + indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0); + /* + * Get min index which is thus for a max value + */ + idx = vminvq(idx, indexVec); + + /* + * tail + */ + blkCnt = blockSize & 0x3; + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + tmp = *pSrc++; + + /* compare for the minimum value */ + if (minValue > tmp) + { + /* Update the minimum value and it's index */ + minValue = tmp; + idx = blockSize - blkCnt; + } + blkCnt--; + } + /* + * Save result + */ + *pIndex = idx; + *pResult = minValue; +} + +#else +#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_min_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ + float32_t maxVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + float32x4_t outV, srcV; + float32x2_t outV2; + + uint32x4_t idxV; + static const uint32_t indexInit[4]={4,5,6,7}; + static const uint32_t countVInit[4]={0,1,2,3}; + uint32x4_t maxIdx; + uint32x4_t index; + uint32x4_t delta; + uint32x4_t countV; + uint32x2_t countV2; + + maxIdx = vdupq_n_u32(ULONG_MAX); + delta = vdupq_n_u32(4); + index = vld1q_u32(indexInit); + countV = vld1q_u32(countVInit); + + /* Initialise the index value to zero. */ + outIndex = 0U; + + /* Load first input value that act as reference value for comparison */ + if (blockSize <= 3) + { + out = *pSrc++; + + blkCnt = blockSize - 1; + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if (out > maxVal1) + { + /* Update the maximum value and it's index */ + out = maxVal1; + outIndex = blockSize - blkCnt; + } + + /* Decrement the loop counter */ + blkCnt--; + } + } + else + { + outV = vld1q_f32(pSrc); + pSrc += 4; + + /* Compute 4 outputs at a time */ + blkCnt = (blockSize - 4 ) >> 2U; + + while (blkCnt > 0U) + { + srcV = vld1q_f32(pSrc); + pSrc += 4; + + idxV = vcltq_f32(srcV, outV); + outV = vbslq_f32(idxV, srcV, outV ); + countV = vbslq_u32(idxV, index,countV ); + + index = vaddq_u32(index,delta); + + /* Decrement the loop counter */ + blkCnt--; + } + + outV2 = vpmin_f32(vget_low_f32(outV),vget_high_f32(outV)); + outV2 = vpmin_f32(outV2,outV2); + out = vget_lane_f32(outV2,0); + + idxV = vceqq_f32(outV, vdupq_n_f32(out)); + countV = vbslq_u32(idxV, countV,maxIdx); + + countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV)); + countV2 = vpmin_u32(countV2,countV2); + outIndex = vget_lane_u32(countV2,0); + + /* if (blockSize - 1U) is not multiple of 4 */ + blkCnt = (blockSize - 4 ) % 4U; + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if (out > maxVal1) + { + /* Update the maximum value and it's index */ + out = maxVal1; + outIndex = blockSize - blkCnt ; + } + + /* Decrement the loop counter */ + blkCnt--; + } + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#else +void arm_min_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ + float32_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + uint32_t index; /* index of maximum value */ +#endif + + /* Initialise index value to zero. */ + outIndex = 0U; + + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + /* Initialise index of maximum value. */ + index = 0U; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize minVal to next consecutive values one by one */ + minVal = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + outIndex = index + 1U; + } + + minVal = *pSrc++; + if (out > minVal) + { + out = minVal; + outIndex = index + 2U; + } + + minVal = *pSrc++; + if (out > minVal) + { + out = minVal; + outIndex = index + 3U; + } + + minVal = *pSrc++; + if (out > minVal) + { + out = minVal; + outIndex = index + 4U; + } + + index += 4U; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = (blockSize - 1U) % 4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* #if defined(ARM_MATH_NEON) */ +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of Min group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f64.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f64.c new file mode 100644 index 0000000..ef25f92 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_f64.c @@ -0,0 +1,90 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_min_f64.c + * Description: Minimum value of a floating-point vector + * + * $Date: 13 September 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup Min + @{ + */ + +/** + @brief Minimum value of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @param[out] pIndex index of minimum value returned here + @return none + */ +void arm_min_f64( + const float64_t * pSrc, + uint32_t blockSize, + float64_t * pResult, + uint32_t * pIndex) +{ + float64_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + + /* Initialise index value to zero. */ + outIndex = 0U; + + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + @} end of Min group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f16.c new file mode 100644 index 0000000..5b4c0d6 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f16.c @@ -0,0 +1,144 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_min_no_idx_f16.c + * Description: Minimum value of a floating-point vector without returning the index + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include <limits.h> +#endif + +/** + @ingroup groupStats + */ + + +/** + @addtogroup Min + @{ + */ + +/** + @brief Minimum value of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @return none + */ + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +void arm_min_no_idx_f16( + const float16_t *pSrc, + uint32_t blockSize, + float16_t *pResult) +{ + f16x8_t vecSrc; + f16x8_t curExtremValVec = vdupq_n_f16(F16_MAX); + float16_t minValue = F16_MAX; + float16_t newVal; + uint32_t blkCnt; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 3U; + + while (blkCnt > 0U) + { + + vecSrc = vldrhq_f16(pSrc); + /* + * update per-lane min. + */ + curExtremValVec = vminnmq(vecSrc, curExtremValVec); + /* + * Decrement the blockSize loop counter + * Advance vector source and destination pointers + */ + pSrc += 8; + blkCnt --; + } + /* + * Get min value across the vector + */ + minValue = vminnmvq(minValue, curExtremValVec); + + blkCnt = blockSize & 7; + + while (blkCnt > 0U) + { + newVal = *pSrc++; + + /* compare for the minimum value */ + if ((_Float16)minValue > (_Float16)newVal) + { + /* Update the minimum value and it's index */ + minValue = newVal; + } + + blkCnt --; + } + + *pResult = minValue; +} + +#else + +void arm_min_no_idx_f16( + const float16_t *pSrc, + uint32_t blockSize, + float16_t *pResult) +{ + float16_t minValue = F16_MAX; + float16_t newVal; + + while (blockSize > 0U) + { + newVal = *pSrc++; + + /* compare for the minimum value */ + if ((_Float16)minValue > (_Float16)newVal) + { + /* Update the minimum value and it's index */ + minValue = newVal; + } + + blockSize --; + } + + *pResult = minValue; +} + +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of Min group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f32.c new file mode 100644 index 0000000..ea7c137 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f32.c @@ -0,0 +1,138 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_min_no_idx_f32.c + * Description: Minimum value of a floating-point vector without returning the index + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include <limits.h> +#endif + +/** + @ingroup groupStats + */ + + +/** + @addtogroup Min + @{ + */ + +/** + @brief Minimum value of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +void arm_min_no_idx_f32( + const float32_t *pSrc, + uint32_t blockSize, + float32_t *pResult) +{ + f32x4_t vecSrc; + f32x4_t curExtremValVec = vdupq_n_f32(F32_MAX); + float32_t minValue = F32_MAX; + float32_t newVal; + uint32_t blkCnt; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + + vecSrc = vldrwq_f32(pSrc); + /* + * update per-lane min. + */ + curExtremValVec = vminnmq(vecSrc, curExtremValVec); + /* + * Decrement the blockSize loop counter + * Advance vector source and destination pointers + */ + pSrc += 4; + blkCnt --; + } + /* + * Get min value across the vector + */ + minValue = vminnmvq(minValue, curExtremValVec); + + blkCnt = blockSize & 3; + + while (blkCnt > 0U) + { + newVal = *pSrc++; + + /* compare for the minimum value */ + if (minValue > newVal) + { + /* Update the minimum value and it's index */ + minValue = newVal; + } + + blkCnt --; + } + + *pResult = minValue; +} + +#else + +void arm_min_no_idx_f32( + const float32_t *pSrc, + uint32_t blockSize, + float32_t *pResult) +{ + float32_t minValue = F32_MAX; + float32_t newVal; + + while (blockSize > 0U) + { + newVal = *pSrc++; + + /* compare for the minimum value */ + if (minValue > newVal) + { + /* Update the minimum value and it's index */ + minValue = newVal; + } + + blockSize --; + } + + *pResult = minValue; +} + +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of Min group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f64.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f64.c new file mode 100644 index 0000000..6e69572 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_f64.c @@ -0,0 +1,75 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_min_no_idx_f64.c + * Description: Maximum value of a floating-point vector without returning the index + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + + +/** + @addtogroup Min + @{ + */ + +/** + @brief Maximum value of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @return none + */ +void arm_min_no_idx_f64( + const float64_t *pSrc, + uint32_t blockSize, + float64_t *pResult) +{ + float64_t minValue = F64_MAX; + float64_t newVal; + + while (blockSize > 0U) + { + newVal = *pSrc++; + + /* compare for the minimum value */ + if (minValue > newVal) + { + /* Update the minimum value and it's index */ + minValue = newVal; + } + + blockSize --; + } + + *pResult = minValue; +} + +/** + @} end of Min group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q15.c new file mode 100644 index 0000000..207f9ea --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q15.c @@ -0,0 +1,142 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_min_no_idx_q15.c + * Description: Minimum value of a q15 vector without returning the index + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + + +/** + @ingroup groupStats + */ + +/** + @addtogroup Min + @{ + */ + +/** + @brief Minimum value of a q15 vector without index. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_min_no_idx_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + int32_t blkCnt; /* loop counters */ + q15x8_t vecSrc; + q15_t const *pSrcVec; + q15x8_t curExtremValVec = vdupq_n_s16(Q15_MAX); + q15_t minValue = Q15_MAX; + mve_pred16_t p0; + + + pSrcVec = (q15_t const *) pSrc; + blkCnt = blockSize >> 3; + while (blkCnt > 0) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 8; + /* + * update per-lane min. + */ + curExtremValVec = vminq(vecSrc, curExtremValVec); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 7; + if (blkCnt > 0) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 8; + p0 = vctp16q(blkCnt); + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + curExtremValVec = vminq_m(curExtremValVec, vecSrc, curExtremValVec, p0); + } + /* + * Get min value across the vector + */ + minValue = vminvq(minValue, curExtremValVec); + *pResult = minValue; +} + +#else +void arm_min_no_idx_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + q15_t minVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* loop counter */ + + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1U); + + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal1) + { + /* Update the minimum value */ + out = minVal1; + } + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the minimum value into destination pointer */ + *pResult = out; +} + +#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of Min group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q31.c new file mode 100644 index 0000000..e425db4 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q31.c @@ -0,0 +1,141 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_min_no_idx_q31.c + * Description: Minimum value of a q31 vector without returning the index + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + + +/** + @ingroup groupStats + */ + +/** + @addtogroup Min + @{ + */ + +/** + @brief Minimum value of a q31 vector without index. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @return none + */ + +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" +void arm_min_no_idx_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + int32_t blkCnt; /* loop counters */ + q31x4_t vecSrc; + q31_t const *pSrcVec; + q31x4_t curExtremValVec = vdupq_n_s32(Q31_MAX); + q31_t minValue = Q31_MAX; + mve_pred16_t p0; + + + pSrcVec = (q31_t const *) pSrc; + blkCnt = blockSize >> 2; + while (blkCnt > 0) + { + vecSrc = vldrwq_s32(pSrcVec); + pSrcVec += 4; + /* + * update per-lane min. + */ + curExtremValVec = vminq(vecSrc, curExtremValVec); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 3; + if (blkCnt > 0) + { + vecSrc = vldrwq_s32(pSrcVec); + pSrcVec += 4; + p0 = vctp32q(blkCnt); + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + curExtremValVec = vminq_m(curExtremValVec, vecSrc, curExtremValVec, p0); + } + /* + * Get min value across the vector + */ + minValue = vminvq(minValue, curExtremValVec); + *pResult = minValue; +} + +#else +void arm_min_no_idx_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q31_t minVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* loop counter */ + + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1U); + + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal1) + { + /* Update the minimum value */ + out = minVal1; + } + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the minimum value into destination pointer */ + *pResult = out; +} + +#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of Min group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q7.c new file mode 100644 index 0000000..2ee9a99 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_no_idx_q7.c @@ -0,0 +1,141 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_min_no_idx_q7.c + * Description: Minimum value of a q7 vector without returning the index + * + * $Date: 16 November 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + + +/** + @ingroup groupStats + */ + +/** + @addtogroup Min + @{ + */ + +/** + @brief Minimum value of a q7 vector without index. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @return none + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_min_no_idx_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult) +{ + int32_t blkCnt; /* loop counters */ + q7x16_t vecSrc; + q7_t const *pSrcVec; + q7x16_t curExtremValVec = vdupq_n_s8(Q7_MAX); + q7_t minValue = Q7_MAX; + mve_pred16_t p0; + + + pSrcVec = (q7_t const *) pSrc; + blkCnt = blockSize >> 4; + while (blkCnt > 0) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 16; + /* + * update per-lane min. + */ + curExtremValVec = vminq(vecSrc, curExtremValVec); + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* + * tail + * (will be merged thru tail predication) + */ + blkCnt = blockSize & 0xF; + if (blkCnt > 0) + { + vecSrc = vld1q(pSrcVec); + pSrcVec += 16; + p0 = vctp8q(blkCnt); + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + curExtremValVec = vminq_m(curExtremValVec, vecSrc, curExtremValVec, p0); + } + /* + * Get min value across the vector + */ + minValue = vminvq(minValue, curExtremValVec); + *pResult = minValue; +} + +#else +void arm_min_no_idx_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult) +{ + q7_t minVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt; /* loop counter */ + + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1U); + + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal1) + { + /* Update the minimum value */ + out = minVal1; + } + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the minimum value into destination pointer */ + *pResult = out; +} + +#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */ +/** + @} end of Min group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q15.c new file mode 100644 index 0000000..f31019d --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q15.c @@ -0,0 +1,203 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_min_q15.c + * Description: Minimum value of a Q15 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + + +/** + @addtogroup Min + @{ + */ + +/** + @brief Minimum value of a Q15 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @param[out] pIndex index of minimum value returned here + @return none + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_min_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex) +{ + + int32_t blkCnt; /* loop counters */ + q15x8_t extremValVec = vdupq_n_s16(Q15_MAX); + q15_t minValue = Q15_MAX; + uint16x8_t indexVec; + uint16x8_t extremIdxVec; + mve_pred16_t p0; + uint16_t extremIdxArr[8]; + + indexVec = vidupq_u16(0U, 1); + + blkCnt = blockSize; + do { + mve_pred16_t p = vctp16q(blkCnt); + q15x8_t extremIdxVal = vld1q_z_s16(pSrc, p); + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + p0 = vcmpleq_m(extremIdxVal, extremValVec, p); + + extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0); + /* store per-lane extrema indexes */ + vst1q_p_u16(extremIdxArr, indexVec, p0); + + indexVec += 8; + pSrc += 8; + blkCnt -= 8; + } + while (blkCnt > 0); + + /* Get min value across the vector */ + minValue = vminvq(minValue, extremValVec); + + /* set index for lower values to min possible index */ + p0 = vcmpleq(extremValVec, minValue); + extremIdxVec = vld1q_u16(extremIdxArr); + + indexVec = vpselq(extremIdxVec, vdupq_n_u16(blockSize - 1), p0); + *pIndex = vminvq(blockSize - 1, indexVec); + *pResult = minValue; + +} +#else +void arm_min_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex) +{ + q15_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + +#if defined (ARM_MATH_LOOPUNROLL) + uint32_t index; /* index of maximum value */ +#endif + + /* Initialise index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + +#if defined (ARM_MATH_LOOPUNROLL) + /* Initialise index of maximum value. */ + index = 0U; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize minVal to next consecutive values one by one */ + minVal = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + outIndex = index + 1U; + } + + minVal = *pSrc++; + if (out > minVal) + { + out = minVal; + outIndex = index + 2U; + } + + minVal = *pSrc++; + if (out > minVal) + { + out = minVal; + outIndex = index + 3U; + } + + minVal = *pSrc++; + if (out > minVal) + { + out = minVal; + outIndex = index + 4U; + } + + index += 4U; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = (blockSize - 1U) % 4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of Min group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q31.c new file mode 100644 index 0000000..c993004 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q31.c @@ -0,0 +1,203 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_min_q31.c + * Description: Minimum value of a Q31 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + + +/** + @addtogroup Min + @{ + */ + +/** + @brief Minimum value of a Q31 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @param[out] pIndex index of minimum value returned here + @return none + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_min_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex) +{ + int32_t blkCnt; /* loop counters */ + q31x4_t extremValVec = vdupq_n_s32(Q31_MAX); + q31_t minValue = Q31_MAX; + uint32x4_t indexVec; + uint32x4_t extremIdxVec; + mve_pred16_t p0; + uint32_t extremIdxArr[4]; + + indexVec = vidupq_u32(0U, 1); + + blkCnt = blockSize; + do { + mve_pred16_t p = vctp32q(blkCnt); + q31x4_t extremIdxVal = vld1q_z_s32(pSrc, p); + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + p0 = vcmpleq_m(extremIdxVal, extremValVec, p); + + extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0); + /* store per-lane extrema indexes */ + vst1q_p_u32(extremIdxArr, indexVec, p0); + + indexVec += 4; + pSrc += 4; + blkCnt -= 4; + } + while (blkCnt > 0); + + + /* Get min value across the vector */ + minValue = vminvq(minValue, extremValVec); + + /* set index for lower values to min possible index */ + p0 = vcmpleq(extremValVec, minValue); + extremIdxVec = vld1q_u32(extremIdxArr); + + indexVec = vpselq(extremIdxVec, vdupq_n_u32(blockSize - 1), p0); + *pIndex = vminvq(blockSize - 1, indexVec); + *pResult = minValue; +} + +#else +void arm_min_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex) +{ + q31_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + +#if defined (ARM_MATH_LOOPUNROLL) + uint32_t index; /* index of maximum value */ +#endif + + /* Initialise index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + +#if defined (ARM_MATH_LOOPUNROLL) + /* Initialise index of maximum value. */ + index = 0U; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize minVal to next consecutive values one by one */ + minVal = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + outIndex = index + 1U; + } + + minVal = *pSrc++; + if (out > minVal) + { + out = minVal; + outIndex = index + 2U; + } + + minVal = *pSrc++; + if (out > minVal) + { + out = minVal; + outIndex = index + 3U; + } + + minVal = *pSrc++; + if (out > minVal) + { + out = minVal; + outIndex = index + 4U; + } + + index += 4U; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = (blockSize - 1U) % 4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of Min group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q7.c new file mode 100644 index 0000000..3e5aae5 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_min_q7.c @@ -0,0 +1,284 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_min_q7.c + * Description: Minimum value of a Q7 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + + +/** + @addtogroup Min + @{ + */ + +/** + @brief Minimum value of a Q7 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult minimum value returned here + @param[out] pIndex index of minimum value returned here + @return none + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +static void arm_small_blk_min_q7( + const q7_t * pSrc, + uint8_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ + uint32_t blkCnt; /* loop counters */ + q7x16_t vecSrc; + q7x16_t curExtremValVec = vdupq_n_s8(Q7_MAX); + q7_t minValue = Q7_MAX,temp; + uint32_t idx = blockSize; + uint8x16_t indexVec; + uint8x16_t curExtremIdxVec; + mve_pred16_t p0; + + + indexVec = vidupq_u8((uint32_t)0, 1); + curExtremIdxVec = vdupq_n_u8(0); + + blkCnt = blockSize >> 4; + while (blkCnt > 0U) + { + vecSrc = vldrbq_s8(pSrc); + pSrc += 16; + /* + * Get current min per lane and current index per lane + * when a min is selected + */ + p0 = vcmpleq(vecSrc, curExtremValVec); + curExtremValVec = vpselq(vecSrc, curExtremValVec, p0); + curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0); + + indexVec = indexVec + 16; + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + + /* + * Get min value across the vector + */ + minValue = vminvq(minValue, curExtremValVec); + /* + * set index for lower values to min possible index + */ + p0 = vcmpleq(curExtremValVec, minValue); + indexVec = vpselq(curExtremIdxVec, vdupq_n_u8(blockSize), p0); + /* + * Get min index which is thus for a min value + */ + idx = vminvq(idx, indexVec); + + blkCnt = blockSize & 0xF; + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + temp = *pSrc++; + + /* compare for the minimum value */ + if (minValue > temp) + { + /* Update the minimum value and it's index */ + minValue = temp; + idx = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + /* + * Save result + */ + *pIndex = idx; + *pResult = minValue; +} + +void arm_min_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ + int32_t totalSize = blockSize; + + if (totalSize <= UINT8_MAX) + { + arm_small_blk_min_q7(pSrc, blockSize, pResult, pIndex); + } + else + { + uint32_t curIdx = 0; + q7_t curBlkExtr = Q7_MAX; + uint32_t curBlkPos = 0; + uint32_t curBlkIdx = 0; + /* + * process blocks of 255 elts + */ + while (totalSize >= UINT8_MAX) + { + const q7_t *curSrc = pSrc; + + arm_small_blk_min_q7(curSrc, UINT8_MAX, pResult, pIndex); + if (*pResult < curBlkExtr) + { + /* + * update partial extrema + */ + curBlkExtr = *pResult; + curBlkPos = *pIndex; + curBlkIdx = curIdx; + } + curIdx++; + pSrc += UINT8_MAX; + totalSize -= UINT8_MAX; + } + /* + * remainder + */ + arm_small_blk_min_q7(pSrc, totalSize, pResult, pIndex); + if (*pResult < curBlkExtr) + { + curBlkExtr = *pResult; + curBlkPos = *pIndex; + curBlkIdx = curIdx; + } + *pIndex = curBlkIdx * UINT8_MAX + curBlkPos; + *pResult = curBlkExtr; + } +} +#else +void arm_min_q7( + const q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ + q7_t minVal, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* Loop counter */ + +#if defined (ARM_MATH_LOOPUNROLL) + uint32_t index; /* index of maximum value */ +#endif + + /* Initialise index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + +#if defined (ARM_MATH_LOOPUNROLL) + /* Initialise index of maximum value. */ + index = 0U; + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize minVal to next consecutive values one by one */ + minVal = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + outIndex = index + 1U; + } + + minVal = *pSrc++; + if (out > minVal) + { + out = minVal; + outIndex = index + 2U; + } + + minVal = *pSrc++; + if (out > minVal) + { + out = minVal; + outIndex = index + 3U; + } + + minVal = *pSrc++; + if (out > minVal) + { + out = minVal; + outIndex = index + 4U; + } + + index += 4U; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = (blockSize - 1U) % 4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal) + { + /* Update the minimum value and it's index */ + out = minVal; + outIndex = blockSize - blkCnt; + } + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of Min group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f16.c new file mode 100644 index 0000000..3b192dd --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f16.c @@ -0,0 +1,203 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mse_f16.c + * Description: Half floating point mean square error + * + * $Date: 05 April 2022 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup MSE + @{ + */ + +/** + @brief Mean square error between two half floating point vectors. + @param[in] pSrcA points to the first input vector + @param[in] pSrcB points to the second input vector + @param[in] blockSize number of samples in input vector + @param[out] result mean square error + @return none + */ + +#if !defined(ARM_MATH_AUTOVECTORIZE) + +#if defined(ARM_MATH_MVE_FLOAT16) +#include "arm_helium_utils.h" + +void arm_mse_f16( + const float16_t * pSrcA, + const float16_t * pSrcB, + uint32_t blockSize, + float16_t * result) + +{ + float16x8_t vecA, vecB; + float16x8_t vecSum; + uint32_t blkCnt; + _Float16 sum = 0.0f16; + vecSum = vdupq_n_f16(0.0f16); + + blkCnt = (blockSize) >> 3; + while (blkCnt > 0U) + { + vecA = vld1q(pSrcA); + pSrcA += 8; + + vecB = vld1q(pSrcB); + pSrcB += 8; + + vecA = vsubq(vecA, vecB); + + vecSum = vfmaq(vecSum, vecA, vecA); + /* + * Decrement the blockSize loop counter + */ + blkCnt --; + } + + + blkCnt = (blockSize) & 7; + if (blkCnt > 0U) + { + mve_pred16_t p0 = vctp16q(blkCnt); + vecA = vld1q(pSrcA); + vecB = vld1q(pSrcB); + + vecA = vsubq(vecA, vecB); + vecSum = vfmaq_m(vecSum, vecA, vecA, p0); + } + + sum = vecAddAcrossF16Mve(vecSum); + + /* Store result in destination buffer */ + *result = (_Float16)sum / (_Float16)blockSize; + +} + +#endif + + +#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/ + + +#if defined(ARM_FLOAT16_SUPPORTED) + +#if (!defined(ARM_MATH_MVE_FLOAT16)) || defined(ARM_MATH_AUTOVECTORIZE) + + + +void arm_mse_f16( + const float16_t * pSrcA, + const float16_t * pSrcB, + uint32_t blockSize, + float16_t * result) + +{ + uint32_t blkCnt; /* Loop counter */ + _Float16 inA, inB; + _Float16 sum = 0.0f16; /* Temporary return variable */ +#if defined (ARM_MATH_LOOPUNROLL) + blkCnt = (blockSize) >> 3; + + + while (blkCnt > 0U) + { + inA = *pSrcA++; + inB = *pSrcB++; + inA = (_Float16)inA - (_Float16)inB; + sum += (_Float16)inA * (_Float16)inA; + + inA = *pSrcA++; + inB = *pSrcB++; + inA = (_Float16)inA - (_Float16)inB; + sum += (_Float16)inA * (_Float16)inA; + + inA = *pSrcA++; + inB = *pSrcB++; + inA = (_Float16)inA - (_Float16)inB; + sum += (_Float16)inA * (_Float16)inA; + + inA = *pSrcA++; + inB = *pSrcB++; + inA = (_Float16)inA - (_Float16)inB; + sum += (_Float16)inA * (_Float16)inA; + + inA = *pSrcA++; + inB = *pSrcB++; + inA = (_Float16)inA - (_Float16)inB; + sum += (_Float16)inA * (_Float16)inA; + + inA = *pSrcA++; + inB = *pSrcB++; + inA = (_Float16)inA - (_Float16)inB; + sum += (_Float16)inA * (_Float16)inA; + + inA = *pSrcA++; + inB = *pSrcB++; + inA = (_Float16)inA - (_Float16)inB; + sum += (_Float16)inA * (_Float16)inA; + + inA = *pSrcA++; + inB = *pSrcB++; + inA = (_Float16)inA - (_Float16)inB; + sum += (_Float16)inA * (_Float16)inA; + + /* Decrement loop counter */ + blkCnt--; + } + + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = (blockSize) & 7; +#else + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; +#endif + while (blkCnt > 0U) + { + inA = *pSrcA++; + inB = *pSrcB++; + inA = (_Float16)inA - (_Float16)inB; + sum += (_Float16)inA * (_Float16)inA; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store result in destination buffer */ + *result = (_Float16)sum / (_Float16)blockSize; +} + +#endif /* end of test for vector instruction availability */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ +/** + @} end of MSE group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f32.c new file mode 100644 index 0000000..1c592f5 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f32.c @@ -0,0 +1,247 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mse_f32.c + * Description: Floating point mean square error + * + * $Date: 05 April 2022 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup MSE + @{ + */ + +/** + @brief Mean square error between two floating point vectors. + @param[in] pSrcA points to the first input vector + @param[in] pSrcB points to the second input vector + @param[in] blockSize number of samples in input vector + @param[out] result mean square error + @return none + */ + +#if !defined(ARM_MATH_AUTOVECTORIZE) + +#if defined(ARM_MATH_MVEF) +#include "arm_helium_utils.h" + +void arm_mse_f32( + const float32_t * pSrcA, + const float32_t * pSrcB, + uint32_t blockSize, + float32_t * result) + +{ + float32x4_t vecA, vecB; + float32x4_t vecSum; + uint32_t blkCnt; + float32_t sum = 0.0f; + vecSum = vdupq_n_f32(0.0f); + + /* Compute 4 outputs at a time */ + blkCnt = (blockSize) >> 2; + while (blkCnt > 0U) + { + vecA = vld1q(pSrcA); + pSrcA += 4; + + vecB = vld1q(pSrcB); + pSrcB += 4; + + vecA = vsubq(vecA, vecB); + + vecSum = vfmaq(vecSum, vecA, vecA); + /* + * Decrement the blockSize loop counter + */ + blkCnt --; + } + + + blkCnt = (blockSize) & 3; + if (blkCnt > 0U) + { + mve_pred16_t p0 = vctp32q(blkCnt); + vecA = vld1q(pSrcA); + vecB = vld1q(pSrcB); + + vecA = vsubq(vecA, vecB); + vecSum = vfmaq_m(vecSum, vecA, vecA, p0); + } + + sum = vecAddAcrossF32Mve(vecSum); + + /* Store result in destination buffer */ + *result = sum / blockSize; + +} + +#endif + +#if defined(ARM_MATH_NEON) +void arm_mse_f32( + const float32_t * pSrcA, + const float32_t * pSrcB, + uint32_t blockSize, + float32_t * result) + +{ + float32x4_t vecA, vecB; + float32x4_t vecSum; + uint32_t blkCnt; + float32_t inA, inB; + float32_t sum = 0.0f; + vecSum = vdupq_n_f32(0.0f); +#if !defined(__aarch64__) + f32x2_t tmp = vdup_n_f32(0.0f); +#endif + + /* Compute 4 outputs at a time */ + blkCnt = (blockSize) >> 2; + while (blkCnt > 0U) + { + vecA = vld1q_f32(pSrcA); + pSrcA += 4; + + vecB = vld1q_f32(pSrcB); + pSrcB += 4; + + vecA = vsubq_f32(vecA, vecB); + + vecSum = vfmaq_f32(vecSum, vecA, vecA); + /* + * Decrement the blockSize loop counter + */ + blkCnt --; + } + +#if defined(__aarch64__) + sum = vpadds_f32(vpadd_f32(vget_low_f32(vecSum), vget_high_f32(vecSum))); +#else + tmp = vpadd_f32(vget_low_f32(vecSum), vget_high_f32(vecSum)); + sum = vget_lane_f32(tmp, 0) + vget_lane_f32(tmp, 1); + +#endif + + blkCnt = (blockSize) & 3; + while (blkCnt > 0U) + { + /* Calculate dot product and store result in a temporary buffer. */ + inA = *pSrcA++; + inB = *pSrcB++; + inA = inA - inB; + sum += inA * inA; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store result in destination buffer */ + *result = sum / blockSize; + +} +#endif + +#endif /*#if !defined(ARM_MATH_AUTOVECTORIZE)*/ + + + +#if (!defined(ARM_MATH_MVEF) && !defined(ARM_MATH_NEON)) || defined(ARM_MATH_AUTOVECTORIZE) + + +void arm_mse_f32( + const float32_t * pSrcA, + const float32_t * pSrcB, + uint32_t blockSize, + float32_t * result) + +{ + uint32_t blkCnt; /* Loop counter */ + float32_t inA, inB; + float32_t sum = 0.0f; /* Temporary return variable */ +#if defined (ARM_MATH_LOOPUNROLL) + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = (blockSize) >> 2; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + + inA = *pSrcA++; + inB = *pSrcB++; + inA = inA - inB; + sum += inA * inA; + + inA = *pSrcA++; + inB = *pSrcB++; + inA = inA - inB; + sum += inA * inA; + + inA = *pSrcA++; + inB = *pSrcB++; + inA = inA - inB; + sum += inA * inA; + + inA = *pSrcA++; + inB = *pSrcB++; + inA = inA - inB; + sum += inA * inA; + + /* Decrement loop counter */ + blkCnt--; + } + + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = (blockSize) & 3; +#else + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; +#endif + while (blkCnt > 0U) + { + inA = *pSrcA++; + inB = *pSrcB++; + inA = inA - inB; + sum += inA * inA; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store result in destination buffer */ + *result = sum / blockSize; +} + +#endif /* end of test for vector instruction availability */ + +/** + @} end of MSE group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f64.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f64.c new file mode 100644 index 0000000..b785bf8 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_f64.c @@ -0,0 +1,110 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mse_f64.c + * Description: Double floating point mean square error + * + * $Date: 05 April 2022 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup MSE + @{ + */ + +/** + @brief Mean square error between two double floating point vectors. + @param[in] pSrcA points to the first input vector + @param[in] pSrcB points to the second input vector + @param[in] blockSize number of samples in input vector + @param[out] result mean square error + @return none + */ + + + + + +void arm_mse_f64( + const float64_t * pSrcA, + const float64_t * pSrcB, + uint32_t blockSize, + float64_t * result) + +{ + uint32_t blkCnt; /* Loop counter */ + float64_t inA, inB; + float64_t sum = 0.0; /* Temporary return variable */ +#if defined (ARM_MATH_LOOPUNROLL) + blkCnt = (blockSize) >> 1; + + + while (blkCnt > 0U) + { + + + inA = *pSrcA++; + inB = *pSrcB++; + inA = inA - inB; + sum += inA * inA; + + inA = *pSrcA++; + inB = *pSrcB++; + inA = inA - inB; + sum += inA * inA; + + /* Decrement loop counter */ + blkCnt--; + } + + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = (blockSize) & 1; +#else + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; +#endif + while (blkCnt > 0U) + { + inA = *pSrcA++; + inB = *pSrcB++; + inA = inA - inB; + sum += inA * inA; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store result in destination buffer */ + *result = sum / blockSize; +} + + +/** + @} end of MSE group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q15.c new file mode 100644 index 0000000..9e6f0c0 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q15.c @@ -0,0 +1,175 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mse_q15.c + * Description: Mean square error between two Q15 vectors + * + * $Date: 04 April 2022 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + + +/** + @addtogroup MSE + @{ + */ + +/** + @brief Mean square error between two Q15 vectors. + @param[in] pSrcA points to the first input vector + @param[in] pSrcB points to the second input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult mean square error + @return none + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_mse_q15( + const q15_t * pSrcA, + const q15_t * pSrcB, + uint32_t blockSize, + q15_t * pResult) +{ + uint32_t blkCnt; /* loop counters */ + q15x8_t vecSrcA,vecSrcB; + q63_t sum = 0LL; + + blkCnt = blockSize >> 3U; + while (blkCnt > 0U) + { + vecSrcA = vld1q(pSrcA); + vecSrcB = vld1q(pSrcB); + + vecSrcA = vshrq(vecSrcA,1); + vecSrcB = vshrq(vecSrcB,1); + + vecSrcA = vqsubq(vecSrcA,vecSrcB); + /* + * sum lanes + */ + sum = vmlaldavaq(sum, vecSrcA, vecSrcA); + + blkCnt--; + pSrcA += 8; + pSrcB += 8; + } + + /* + * tail + */ + blkCnt = blockSize & 7; + if (blkCnt > 0U) + { + mve_pred16_t p0 = vctp16q(blkCnt); + vecSrcA = vld1q(pSrcA); + vecSrcB = vld1q(pSrcB); + + vecSrcA = vshrq(vecSrcA,1); + vecSrcB = vshrq(vecSrcB,1); + + vecSrcA = vqsubq(vecSrcA,vecSrcB); + + sum = vmlaldavaq_p(sum, vecSrcA, vecSrcA, p0); + } + + + + *pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16); +} +#else +void arm_mse_q15( + const q15_t * pSrcA, + const q15_t * pSrcB, + uint32_t blockSize, + q15_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + q63_t sum = 0; /* Temporary result storage */ + q15_t inA,inB; /* Temporary variable to store input value */ + + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + + inA = *pSrcA++ >> 1; + inB = *pSrcB++ >> 1; + inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16); + sum += (q63_t)((q31_t) inA * inA); + + inA = *pSrcA++ >> 1; + inB = *pSrcB++ >> 1; + inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16); + sum += (q63_t)((q31_t) inA * inA); + + inA = *pSrcA++ >> 1; + inB = *pSrcB++ >> 1; + inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16); + sum += (q63_t)((q31_t) inA * inA); + + inA = *pSrcA++ >> 1; + inB = *pSrcB++ >> 1; + inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16); + sum += (q63_t)((q31_t) inA * inA); + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + + inA = *pSrcA++ >> 1; + inB = *pSrcB++ >> 1; + inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16); + sum += (q63_t)((q31_t) inA * inA); + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store result in q15 format */ + *pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16); +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of MSE group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q31.c new file mode 100644 index 0000000..e444404 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q31.c @@ -0,0 +1,176 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mse_q31.c + * Description: Mean square error between two Q31 vectors + * + * $Date: 04 April 2022 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + + +/** + @addtogroup MSE + @{ + */ + +/** + @brief Mean square error between two Q31 vectors. + @param[in] pSrcA points to the first input vector + @param[in] pSrcB points to the second input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult mean square error + @return none + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_mse_q31( + const q31_t * pSrcA, + const q31_t * pSrcB, + uint32_t blockSize, + q31_t * pResult) +{ + uint32_t blkCnt; /* loop counters */ + q31x4_t vecSrcA,vecSrcB; + q63_t sum = 0LL; + + /* Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + while (blkCnt > 0U) + { + vecSrcA = vld1q(pSrcA); + vecSrcB = vld1q(pSrcB); + + vecSrcA = vshrq(vecSrcA,1); + vecSrcB = vshrq(vecSrcB,1); + + + vecSrcA = vqsubq(vecSrcA,vecSrcB); + /* + * sum lanes + */ + sum = vrmlaldavhaq(sum, vecSrcA, vecSrcA); + + blkCnt--; + pSrcA += 4; + pSrcB += 4; + } + + /* + * tail + */ + blkCnt = blockSize & 3; + if (blkCnt > 0U) + { + mve_pred16_t p0 = vctp32q(blkCnt); + vecSrcA = vld1q(pSrcA); + vecSrcB = vld1q(pSrcB); + + vecSrcA = vshrq(vecSrcA,1); + vecSrcB = vshrq(vecSrcB,1); + + vecSrcA = vqsubq(vecSrcA,vecSrcB); + + sum = vrmlaldavhaq_p(sum, vecSrcA, vecSrcA, p0); + } + + + *pResult = (q31_t) ((sum / blockSize)>>21); + +} +#else +void arm_mse_q31( + const q31_t * pSrcA, + const q31_t * pSrcB, + uint32_t blockSize, + q31_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + q63_t sum = 0; /* Temporary result storage */ + + q31_t inA32,inB32; /* Temporary variable to store packed input value */ + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + inA32 = *pSrcA++ >> 1; + inB32 = *pSrcB++ >> 1; + inA32 = __QSUB(inA32, inB32); + sum += ((q63_t) inA32 * inA32) >> 14U; + + inA32 = *pSrcA++ >> 1; + inB32 = *pSrcB++ >> 1; + inA32 = __QSUB(inA32, inB32); + sum += ((q63_t) inA32 * inA32) >> 14U; + + inA32 = *pSrcA++ >> 1; + inB32 = *pSrcB++ >> 1; + inA32 = __QSUB(inA32, inB32); + sum += ((q63_t) inA32 * inA32) >> 14U; + + inA32 = *pSrcA++ >> 1; + inB32 = *pSrcB++ >> 1; + inA32 = __QSUB(inA32, inB32); + sum += ((q63_t) inA32 * inA32) >> 14U; + + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + inA32 = *pSrcA++ >> 1; + inB32 = *pSrcB++ >> 1; + inA32 = __QSUB(inA32, inB32); + sum += ((q63_t) inA32 * inA32) >> 14U; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store result in q31 format */ + *pResult = (q31_t) ((sum / blockSize)>>15); +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of MSE group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q7.c new file mode 100644 index 0000000..298e9da --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_mse_q7.c @@ -0,0 +1,179 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mse_q7.c + * Description: Mean square error between two Q7 vectors + * + * $Date: 04 April 2022 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @defgroup MSE Mean Square Error + + Calculates the mean square error between two vectors. + + */ + +/** + @addtogroup MSE + @{ + */ + +/** + @brief Mean square error between two Q7 vectors. + @param[in] pSrcA points to the first input vector + @param[in] pSrcB points to the second input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult mean square error + @return none + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_mse_q7( + const q7_t * pSrcA, + const q7_t * pSrcB, + uint32_t blockSize, + q7_t * pResult) +{ + uint32_t blkCnt; /* loop counters */ + q7x16_t vecSrcA,vecSrcB; + q31_t sum = 0LL; + + /* Compute 16 outputs at a time */ + blkCnt = blockSize >> 4U; + while (blkCnt > 0U) + { + vecSrcA = vld1q(pSrcA); + vecSrcB = vld1q(pSrcB); + + vecSrcA = vshrq(vecSrcA,1); + vecSrcB = vshrq(vecSrcB,1); + + vecSrcA = vqsubq(vecSrcA,vecSrcB); + /* + * sum lanes + */ + sum = vmladavaq(sum, vecSrcA, vecSrcA); + + blkCnt--; + pSrcA += 16; + pSrcB += 16; + } + + /* + * tail + */ + blkCnt = blockSize & 0xF; + if (blkCnt > 0U) + { + mve_pred16_t p0 = vctp8q(blkCnt); + vecSrcA = vld1q(pSrcA); + vecSrcB = vld1q(pSrcB); + + vecSrcA = vshrq(vecSrcA,1); + vecSrcB = vshrq(vecSrcB,1); + + vecSrcA = vqsubq(vecSrcA,vecSrcB); + + sum = vmladavaq_p(sum, vecSrcA, vecSrcA, p0); + } + + *pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>5, 8); +} +#else +void arm_mse_q7( + const q7_t * pSrcA, + const q7_t * pSrcB, + uint32_t blockSize, + q7_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + q31_t sum = 0; /* Temporary result storage */ + q7_t inA,inB; /* Temporary variable to store input value */ + + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + inA = *pSrcA++ >> 1; + inB = *pSrcB++ >> 1; + inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8); + sum += ((q15_t) inA * inA); + + inA = *pSrcA++ >> 1; + inB = *pSrcB++ >> 1; + inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8); + sum += ((q15_t) inA * inA); + + inA = *pSrcA++ >> 1; + inB = *pSrcB++ >> 1; + inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8); + sum += ((q15_t) inA * inA); + + inA = *pSrcA++ >> 1; + inB = *pSrcB++ >> 1; + inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8); + sum += ((q15_t) inA * inA); + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + inA = *pSrcA++ >> 1; + inB = *pSrcB++ >> 1; + + inA = (q7_t) __SSAT((q15_t) inA - (q15_t)inB, 8); + sum += ((q15_t) inA * inA); + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store result in q7 format */ + *pResult = (q7_t) __SSAT((q15_t) (sum / blockSize)>>5, 8);; +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of MSE group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f16.c new file mode 100644 index 0000000..fd135eb --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f16.c @@ -0,0 +1,152 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_power_f16.c + * Description: Sum of the squares of the elements of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + + +/** + @ingroup groupStats + */ + + + +/** + @addtogroup power + @{ + */ + +/** + @brief Sum of the squares of the elements of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult sum of the squares value returned here + @return none + */ +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_power_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult) +{ + int32_t blkCnt; /* loop counters */ + f16x8_t vecSrc; + f16x8_t sumVec = vdupq_n_f16(0.0f); + + + blkCnt = blockSize; + do { + mve_pred16_t p = vctp16q(blkCnt); + + vecSrc = vldrhq_z_f16((float16_t const *) pSrc, p); + /* + * sum lanes + */ + sumVec = vfmaq_m(sumVec, vecSrc, vecSrc, p); + + blkCnt -= 8; + pSrc += 8; + } + while (blkCnt > 0); + + *pResult = vecAddAcrossF16Mve(sumVec); +} +#else + +void arm_power_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + _Float16 sum = 0.0f16; /* Temporary result storage */ + _Float16 in; /* Temporary variable to store input value */ + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + /* Compute Power and store result in a temporary variable, sum. */ + in = *pSrc++; + sum += in * in; + + in = *pSrc++; + sum += in * in; + + in = *pSrc++; + sum += in * in; + + in = *pSrc++; + sum += in * in; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + /* Compute Power and store result in a temporary variable, sum. */ + in = *pSrc++; + sum += in * in; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store result to destination */ + *pResult = sum; +} +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of power group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f32.c new file mode 100644 index 0000000..ce12e57 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f32.c @@ -0,0 +1,229 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_power_f32.c + * Description: Sum of the squares of the elements of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @defgroup power Power + + Calculates the sum of the squares of the elements in the input vector. + The underlying algorithm is used: + + <pre> + Result = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + pSrc[2] * pSrc[2] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]; + </pre> + + There are separate functions for floating point, Q31, Q15, and Q7 data types. + + Since the result is not divided by the length, those functions are in fact computing + something which is more an energy than a power. + + */ + +/** + @addtogroup power + @{ + */ + +/** + @brief Sum of the squares of the elements of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult sum of the squares value returned here + @return none + */ +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_power_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + uint32_t blkCnt; /* loop counters */ + f32x4_t vecSrc; + f32x4_t sumVec = vdupq_n_f32(0.0f); + float32_t sum = 0.0f; + float32_t in; + + /* Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + while (blkCnt > 0U) + { + vecSrc = vldrwq_f32(pSrc); + /* + * sum lanes + */ + sumVec = vfmaq(sumVec, vecSrc, vecSrc); + + blkCnt --; + pSrc += 4; + } + sum = vecAddAcrossF32Mve(sumVec); + + /* + * tail + */ + blkCnt = blockSize & 0x3; + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + /* Compute Power and store result in a temporary variable, sum. */ + in = *pSrc++; + sum += in * in; + + /* Decrement loop counter */ + blkCnt--; + } + + *pResult = sum; +} +#else +#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_power_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t sum = 0.0f; /* accumulator */ + float32_t in; /* Temporary variable to store input value */ + uint32_t blkCnt; /* loop counter */ + + float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */ + float32x2_t sumV2; + float32x4_t inV; + + blkCnt = blockSize >> 2U; + + /* Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute Power and then store the result in a temporary variable, sum. */ + inV = vld1q_f32(pSrc); + sumV = vmlaq_f32(sumV, inV, inV); + pSrc += 4; + + /* Decrement the loop counter */ + blkCnt--; + } + sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV)); + sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1); + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* compute power and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += in * in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the result to the destination */ + *pResult = sum; +} +#else +void arm_power_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + float32_t sum = 0.0f; /* Temporary result storage */ + float32_t in; /* Temporary variable to store input value */ + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + /* Compute Power and store result in a temporary variable, sum. */ + in = *pSrc++; + sum += in * in; + + in = *pSrc++; + sum += in * in; + + in = *pSrc++; + sum += in * in; + + in = *pSrc++; + sum += in * in; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + /* Compute Power and store result in a temporary variable, sum. */ + in = *pSrc++; + sum += in * in; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store result to destination */ + *pResult = sum; +} +#endif /* #if defined(ARM_MATH_NEON) */ +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of power group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f64.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f64.c new file mode 100644 index 0000000..057e9f7 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_f64.c @@ -0,0 +1,77 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_power_f64.c + * Description: Sum of the squares of the elements of a floating-point vector + * + * $Date: 13 September 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup power + @{ + */ + +/** + @brief Sum of the squares of the elements of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult sum of the squares value returned here + @return none + */ +void arm_power_f64( + const float64_t * pSrc, + uint32_t blockSize, + float64_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + float64_t sum = 0.; /* Temporary result storage */ + float64_t in; /* Temporary variable to store input value */ + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + /* Compute Power and store result in a temporary variable, sum. */ + in = *pSrc++; + sum += in * in; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store result to destination */ + *pResult = sum; +} + +/** + @} end of power group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q15.c new file mode 100644 index 0000000..2c47f0b --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q15.c @@ -0,0 +1,177 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_power_q15.c + * Description: Sum of the squares of the elements of a Q15 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup power + @{ + */ + +/** + @brief Sum of the squares of the elements of a Q15 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult sum of the squares value returned here + @return none + + @par Scaling and Overflow Behavior + The function is implemented using a 64-bit internal accumulator. + The input is represented in 1.15 format. + Intermediate multiplication yields a 2.30 format, and this + result is added without saturation to a 64-bit accumulator in 34.30 format. + With 33 guard bits in the accumulator, there is no risk of overflow, and the + full precision of the intermediate multiplication is preserved. + Finally, the return result is in 34.30 format. + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +void arm_power_q15( + const q15_t * pSrc, + uint32_t blockSize, + q63_t * pResult) +{ + uint32_t blkCnt; /* loop counters */ + q15x8_t vecSrc; + q63_t sum = 0LL; + q15_t in; + + /* Compute 8 outputs at a time */ + blkCnt = blockSize >> 3U; + while (blkCnt > 0U) + { + vecSrc = vldrhq_s16(pSrc); + /* + * sum lanes + */ + sum = vmlaldavaq(sum, vecSrc, vecSrc); + + blkCnt --; + pSrc += 8; + } + + /* + * tail + */ + blkCnt = blockSize & 0x7; + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + /* Compute Power and store result in a temporary variable, sum. */ + in = *pSrc++; + sum += ((q31_t) in * in); + + /* Decrement loop counter */ + blkCnt--; + } + + *pResult = sum; +} +#else +void arm_power_q15( + const q15_t * pSrc, + uint32_t blockSize, + q63_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + q63_t sum = 0; /* Temporary result storage */ + q15_t in; /* Temporary variable to store input value */ + +#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP) + q31_t in32; /* Temporary variable to store packed input value */ +#endif + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + /* Compute Power and store result in a temporary variable, sum. */ +#if defined (ARM_MATH_DSP) + in32 = read_q15x2_ia (&pSrc); + sum = __SMLALD(in32, in32, sum); + + in32 = read_q15x2_ia (&pSrc); + sum = __SMLALD(in32, in32, sum); +#else + in = *pSrc++; + sum += ((q31_t) in * in); + + in = *pSrc++; + sum += ((q31_t) in * in); + + in = *pSrc++; + sum += ((q31_t) in * in); + + in = *pSrc++; + sum += ((q31_t) in * in); +#endif /* #if defined (ARM_MATH_DSP) */ + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + /* Compute Power and store result in a temporary variable, sum. */ + in = *pSrc++; + sum += ((q31_t) in * in); + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store result in 34.30 format */ + *pResult = sum; +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of power group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q31.c new file mode 100644 index 0000000..a39b3a7 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q31.c @@ -0,0 +1,165 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_power_q31.c + * Description: Sum of the squares of the elements of a Q31 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup power + @{ + */ + +/** + @brief Sum of the squares of the elements of a Q31 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult sum of the squares value returned here + @return none + + @par Scaling and Overflow Behavior + The function is implemented using a 64-bit internal accumulator. + The input is represented in 1.31 format. + Intermediate multiplication yields a 2.62 format, and this + result is truncated to 2.48 format by discarding the lower 14 bits. + The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format. + With 15 guard bits in the accumulator, there is no risk of overflow, and the + full precision of the intermediate multiplication is preserved. + Finally, the return result is in 16.48 format. + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_power_q31( + const q31_t * pSrc, + uint32_t blockSize, + q63_t * pResult) +{ + uint32_t blkCnt; /* loop counters */ + q31x4_t vecSrc; + q63_t sum = 0LL; + q31_t in; + + /* Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + while (blkCnt > 0U) + { + vecSrc = vldrwq_s32(pSrc); + /* + * sum lanes + */ + sum = vrmlaldavhaq(sum, vecSrc, vecSrc); + + blkCnt --; + pSrc += 4; + } + + /* + * tail + */ + blkCnt = blockSize & 0x3; + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + /* Compute Power and store result in a temporary variable, sum. */ + in = *pSrc++; + sum += ((q63_t) in * in) >> 8; + + /* Decrement loop counter */ + blkCnt--; + } + + *pResult = asrl(sum, 6); +} +#else +void arm_power_q31( + const q31_t * pSrc, + uint32_t blockSize, + q63_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + q63_t sum = 0; /* Temporary result storage */ + q31_t in; /* Temporary variable to store input value */ + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + /* Compute Power then shift intermediate results by 14 bits to maintain 16.48 format and store result in a temporary variable sum, providing 15 guard bits. */ + in = *pSrc++; + sum += ((q63_t) in * in) >> 14U; + + in = *pSrc++; + sum += ((q63_t) in * in) >> 14U; + + in = *pSrc++; + sum += ((q63_t) in * in) >> 14U; + + in = *pSrc++; + sum += ((q63_t) in * in) >> 14U; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + /* Compute Power and store result in a temporary variable, sum. */ + in = *pSrc++; + sum += ((q63_t) in * in) >> 14U; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store results in 16.48 format */ + *pResult = sum; +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of power group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q7.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q7.c new file mode 100644 index 0000000..0545f7c --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_power_q7.c @@ -0,0 +1,180 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_power_q7.c + * Description: Sum of the squares of the elements of a Q7 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup power + @{ + */ + +/** + @brief Sum of the squares of the elements of a Q7 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult sum of the squares value returned here + @return none + + @par Scaling and Overflow Behavior + The function is implemented using a 32-bit internal accumulator. + The input is represented in 1.7 format. + Intermediate multiplication yields a 2.14 format, and this + result is added without saturation to an accumulator in 18.14 format. + With 17 guard bits in the accumulator, there is no risk of overflow, and the + full precision of the intermediate multiplication is preserved. + Finally, the return result is in 18.14 format. + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_power_q7( + const q7_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + uint32_t blkCnt; /* loop counters */ + q7x16_t vecSrc; + q31_t sum = 0LL; + q7_t in; + + /* Compute 16 outputs at a time */ + blkCnt = blockSize >> 4U; + while (blkCnt > 0U) + { + vecSrc = vldrbq_s8(pSrc); + /* + * sum lanes + */ + sum = vmladavaq(sum, vecSrc, vecSrc); + + blkCnt--; + pSrc += 16; + } + + /* + * tail + */ + blkCnt = blockSize & 0xF; + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + /* Compute Power and store result in a temporary variable, sum. */ + in = *pSrc++; + sum += ((q15_t) in * in); + + /* Decrement loop counter */ + blkCnt--; + } + + *pResult = sum; +} +#else +void arm_power_q7( + const q7_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + q31_t sum = 0; /* Temporary result storage */ + q7_t in; /* Temporary variable to store input value */ + +#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP) + q31_t in32; /* Temporary variable to store packed input value */ + q31_t in1, in2; /* Temporary variables to store input value */ +#endif + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + /* Compute Power and store result in a temporary variable, sum. */ +#if defined (ARM_MATH_DSP) + in32 = read_q7x4_ia (&pSrc); + + in1 = __SXTB16(__ROR(in32, 8)); + in2 = __SXTB16(in32); + + /* calculate power and accumulate to accumulator */ + sum = __SMLAD(in1, in1, sum); + sum = __SMLAD(in2, in2, sum); +#else + in = *pSrc++; + sum += ((q15_t) in * in); + + in = *pSrc++; + sum += ((q15_t) in * in); + + in = *pSrc++; + sum += ((q15_t) in * in); + + in = *pSrc++; + sum += ((q15_t) in * in); +#endif /* #if defined (ARM_MATH_DSP) */ + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + /* Compute Power and store result in a temporary variable, sum. */ + in = *pSrc++; + sum += ((q15_t) in * in); + + /* Decrement loop counter */ + blkCnt--; + } + + /* Store result in 18.14 format */ + *pResult = sum; +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of power group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f16.c new file mode 100644 index 0000000..42a00ef --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f16.c @@ -0,0 +1,147 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_rms_f16.c + * Description: Root mean square value of the elements of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + + +/** + @ingroup groupStats + */ + +/** + @defgroup RMS Root mean square (RMS) + + Calculates the Root Mean Square of the elements in the input vector. + The underlying algorithm is used: + + <pre> + Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize)); + </pre> + + There are separate functions for floating point, Q31, and Q15 data types. + */ + +/** + @addtogroup RMS + @{ + */ + +/** + @brief Root Mean Square of the elements of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult root mean square value returned here + @return none + */ + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +void arm_rms_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult) +{ + float16_t pow = 0.0f; + + arm_power_f16(pSrc, blockSize, &pow); + + /* Compute Rms and store the result in the destination */ + arm_sqrt_f16((_Float16)pow / (_Float16) blockSize, pResult); +} +#else + +void arm_rms_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + _Float16 sum = 0.0f16; /* Temporary result storage */ + _Float16 in; /* Temporary variable to store input value */ + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + in = *pSrc++; + /* Compute sum of squares and store result in a temporary variable, sum. */ + sum += in * in; + + in = *pSrc++; + sum += in * in; + + in = *pSrc++; + sum += in * in; + + in = *pSrc++; + sum += in * in; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + in = *pSrc++; + /* Compute sum of squares and store result in a temporary variable. */ + sum += ( in * in); + + /* Decrement loop counter */ + blkCnt--; + } + + /* Compute Rms and store result in destination */ + arm_sqrt_f16((_Float16)sum / (_Float16) blockSize, pResult); +} +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of RMS group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f32.c new file mode 100644 index 0000000..cb45752 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_f32.c @@ -0,0 +1,192 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_rms_f32.c + * Description: Root mean square value of the elements of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @defgroup RMS Root mean square (RMS) + + Calculates the Root Mean Square of the elements in the input vector. + The underlying algorithm is used: + + <pre> + Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize)); + </pre> + + There are separate functions for floating point, Q31, and Q15 data types. + */ + +/** + @addtogroup RMS + @{ + */ + +/** + @brief Root Mean Square of the elements of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult root mean square value returned here + @return none + */ + +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_rms_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t pow = 0.0f; + + arm_power_f32(pSrc, blockSize, &pow); + + /* Compute Rms and store the result in the destination */ + arm_sqrt_f32(pow / (float32_t) blockSize, pResult); +} +#else +#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_rms_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t sum = 0.0f; /* accumulator */ + float32_t in; /* Temporary variable to store input value */ + uint32_t blkCnt; /* loop counter */ + + float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */ + float32x2_t sumV2; + float32x4_t inV; + + blkCnt = blockSize >> 2U; + + /* Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute Power and then store the result in a temporary variable, sum. */ + inV = vld1q_f32(pSrc); + sumV = vmlaq_f32(sumV, inV, inV); + pSrc += 4; + + /* Decrement the loop counter */ + blkCnt--; + } + + sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV)); + sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1); + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* compute power and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += in * in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Rms and store the result in the destination */ + arm_sqrt_f32(sum / (float32_t) blockSize, pResult); +} +#else +void arm_rms_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + float32_t sum = 0.0f; /* Temporary result storage */ + float32_t in; /* Temporary variable to store input value */ + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + in = *pSrc++; + /* Compute sum of squares and store result in a temporary variable, sum. */ + sum += in * in; + + in = *pSrc++; + sum += in * in; + + in = *pSrc++; + sum += in * in; + + in = *pSrc++; + sum += in * in; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + in = *pSrc++; + /* Compute sum of squares and store result in a temporary variable. */ + sum += ( in * in); + + /* Decrement loop counter */ + blkCnt--; + } + + /* Compute Rms and store result in destination */ + arm_sqrt_f32(sum / (float32_t) blockSize, pResult); +} +#endif /* #if defined(ARM_MATH_NEON) */ +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of RMS group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q15.c new file mode 100644 index 0000000..1df17b1 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q15.c @@ -0,0 +1,149 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_rms_q15.c + * Description: Root Mean Square of the elements of a Q15 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup RMS + @{ + */ + +/** + @brief Root Mean Square of the elements of a Q15 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult root mean square value returned here + @return none + + @par Scaling and Overflow Behavior + The function is implemented using a 64-bit internal accumulator. + The input is represented in 1.15 format. + Intermediate multiplication yields a 2.30 format, and this + result is added without saturation to a 64-bit accumulator in 34.30 format. + With 33 guard bits in the accumulator, there is no risk of overflow, and the + full precision of the intermediate multiplication is preserved. + Finally, the 34.30 result is truncated to 34.15 format by discarding the lower + 15 bits, and then saturated to yield a result in 1.15 format. + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_rms_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + q63_t pow = 0.0f; + q15_t normalizedPower; + + arm_power_q15(pSrc, blockSize, &pow); + + normalizedPower=__SSAT((pow / (q63_t) blockSize) >> 15,16); + arm_sqrt_q15(normalizedPower, pResult); +} +#else +void arm_rms_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + q63_t sum = 0; /* Temporary result storage */ + q15_t in; /* Temporary variable to store input value */ + +#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP) + q31_t in32; /* Temporary variable to store input value */ +#endif + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + /* Compute sum of squares and store result in a temporary variable. */ +#if defined (ARM_MATH_DSP) + in32 = read_q15x2_ia (&pSrc); + sum = __SMLALD(in32, in32, sum); + + in32 = read_q15x2_ia (&pSrc); + sum = __SMLALD(in32, in32, sum); +#else + in = *pSrc++; + sum += ((q31_t) in * in); + + in = *pSrc++; + sum += ((q31_t) in * in); + + in = *pSrc++; + sum += ((q31_t) in * in); + + in = *pSrc++; + sum += ((q31_t) in * in); +#endif /* #if defined (ARM_MATH_DSP) */ + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + in = *pSrc++; + /* Compute sum of squares and store result in a temporary variable. */ + sum += ((q31_t) in * in); + + /* Decrement loop counter */ + blkCnt--; + } + + /* Truncating and saturating the accumulator to 1.15 format */ + /* Store result in destination */ + arm_sqrt_q15(__SSAT((sum / (q63_t)blockSize) >> 15, 16), pResult); +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of RMS group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q31.c new file mode 100644 index 0000000..ba39b7b --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_rms_q31.c @@ -0,0 +1,141 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_rms_q31.c + * Description: Root Mean Square of the elements of a Q31 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup RMS + @{ + */ + +/** + @brief Root Mean Square of the elements of a Q31 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult root mean square value returned here + @return none + + @par Scaling and Overflow Behavior + The function is implemented using an internal 64-bit accumulator. + The input is represented in 1.31 format, and intermediate multiplication + yields a 2.62 format. + The accumulator maintains full precision of the intermediate multiplication results, + but provides only a single guard bit. + There is no saturation on intermediate additions. + If the accumulator overflows, it wraps around and distorts the result. + In order to avoid overflows completely, the input signal must be scaled down by + log2(blockSize) bits, as a total of blockSize additions are performed internally. + Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value. + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + +void arm_rms_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q63_t pow = 0.0f; + q31_t normalizedPower; + arm_power_q31(pSrc, blockSize, &pow); + + normalizedPower=clip_q63_to_q31((pow / (q63_t) blockSize) >> 17); + arm_sqrt_q31(normalizedPower, pResult); + +} + +#else +void arm_rms_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + uint64_t sum = 0; /* Temporary result storage (can get never negative. changed type from q63 to uint64 */ + q31_t in; /* Temporary variable to store input value */ + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + in = *pSrc++; + /* Compute sum of squares and store result in a temporary variable, sum. */ + sum += ((q63_t) in * in); + + in = *pSrc++; + sum += ((q63_t) in * in); + + in = *pSrc++; + sum += ((q63_t) in * in); + + in = *pSrc++; + sum += ((q63_t) in * in); + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + + in = *pSrc++; + /* Compute sum of squares and store result in a temporary variable. */ + sum += ((q63_t) in * in); + + /* Decrement loop counter */ + blkCnt--; + } + + /* Convert data in 2.62 to 1.31 by 31 right shifts and saturate */ + /* Compute Rms and store result in destination vector */ + arm_sqrt_q31(clip_q63_to_q31((sum / (q63_t) blockSize) >> 31), pResult); +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of RMS group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f16.c new file mode 100644 index 0000000..951d165 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f16.c @@ -0,0 +1,67 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_std_f16.c + * Description: Standard deviation of the elements of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + + +/** + @ingroup groupStats + */ + + + +/** + @addtogroup STD + @{ + */ + +/** + @brief Standard deviation of the elements of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult standard deviation value returned here + @return none + */ +void arm_std_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult) +{ + float16_t var; + arm_var_f16(pSrc,blockSize,&var); + arm_sqrt_f16(var, pResult); +} + +/** + @} end of STD group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f32.c new file mode 100644 index 0000000..682443d --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f32.c @@ -0,0 +1,83 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_std_f32.c + * Description: Standard deviation of the elements of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @defgroup STD Standard deviation + + Calculates the standard deviation of the elements in the input vector. + + The float implementation is relying on arm_var_f32 which is using a two-pass algorithm + to avoid problem of numerical instabilities and cancellation errors. + + Fixed point versions are using the standard textbook algorithm since the fixed point + numerical behavior is different from the float one. + + Algorithm for fixed point versions is summarized below: + + + <pre> + Result = sqrt((sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1)) + + sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1] + sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1] + </pre> + + There are separate functions for floating point, Q31, and Q15 data types. + */ + +/** + @addtogroup STD + @{ + */ + +/** + @brief Standard deviation of the elements of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult standard deviation value returned here + @return none + */ +void arm_std_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t var; + arm_var_f32(pSrc,blockSize,&var); + arm_sqrt_f32(var, pResult); +} + +/** + @} end of STD group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f64.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f64.c new file mode 100644 index 0000000..620ec04 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_f64.c @@ -0,0 +1,59 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_std_f64.c + * Description: Standard deviation of the elements of a floating-point vector + * + * $Date: 13 September 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup STD + @{ + */ + +/** + @brief Standard deviation of the elements of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult standard deviation value returned here + @return none + */ +void arm_std_f64( + const float64_t * pSrc, + uint32_t blockSize, + float64_t * pResult) +{ + float64_t var; + arm_var_f64(pSrc,blockSize,&var); + *pResult = sqrt(var); +} + +/** + @} end of STD group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q15.c new file mode 100644 index 0000000..88e273a --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q15.c @@ -0,0 +1,173 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_std_q15.c + * Description: Standard deviation of an array of Q15 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup STD + @{ + */ + +/** + @brief Standard deviation of the elements of a Q15 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult standard deviation value returned here + @return none + + @par Scaling and Overflow Behavior + The function is implemented using a 64-bit internal accumulator. + The input is represented in 1.15 format. + Intermediate multiplication yields a 2.30 format, and this + result is added without saturation to a 64-bit accumulator in 34.30 format. + With 33 guard bits in the accumulator, there is no risk of overflow, and the + full precision of the intermediate multiplication is preserved. + Finally, the 34.30 result is truncated to 34.15 format by discarding the lower + 15 bits, and then saturated to yield a result in 1.15 format. + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_std_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + q15_t var=0; + + arm_var_q15(pSrc, blockSize, &var); + arm_sqrt_q15(var,pResult); +} +#else +void arm_std_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + q31_t sum = 0; /* Accumulator */ + q31_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */ + q63_t sumOfSquares = 0; /* Sum of squares */ + q15_t in; /* Temporary variable to store input value */ + +#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP) + q31_t in32; /* Temporary variable to store input value */ +#endif + + if (blockSize <= 1U) + { + *pResult = 0; + return; + } + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + /* C = A[0] + A[1] + ... + A[blockSize-1] */ + + /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ + /* Compute sum and store result in a temporary variable, sum. */ +#if defined (ARM_MATH_DSP) + in32 = read_q15x2_ia (&pSrc); + sumOfSquares = __SMLALD(in32, in32, sumOfSquares); + sum += ((in32 << 16U) >> 16U); + sum += (in32 >> 16U); + + in32 = read_q15x2_ia (&pSrc); + sumOfSquares = __SMLALD(in32, in32, sumOfSquares); + sum += ((in32 << 16U) >> 16U); + sum += (in32 >> 16U); +#else + in = *pSrc++; + sumOfSquares += (in * in); + sum += in; + + in = *pSrc++; + sumOfSquares += (in * in); + sum += in; + + in = *pSrc++; + sumOfSquares += (in * in); + sum += in; + + in = *pSrc++; + sumOfSquares += (in * in); + sum += in; +#endif /* #if defined (ARM_MATH_DSP) */ + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + /* C = A[0] + A[1] + ... + A[blockSize-1] */ + + in = *pSrc++; + /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ + sumOfSquares += (in * in); + /* Compute sum and store result in a temporary variable, sum. */ + sum += in; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */ + meanOfSquares = (q31_t) (sumOfSquares / (q63_t)(blockSize - 1U)); + + /* Compute square of mean */ + squareOfMean = (q31_t) ((q63_t) sum * sum / (q63_t)(blockSize * (blockSize - 1U))); + + /* mean of squares minus the square of mean. */ + /* Compute standard deviation and store result in destination */ + arm_sqrt_q15(__SSAT((meanOfSquares - squareOfMean) >> 15U, 16U), pResult); +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of STD group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q31.c new file mode 100644 index 0000000..63170e7 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_std_q31.c @@ -0,0 +1,159 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_std_q31.c + * Description: Standard deviation of the elements of a Q31 vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup STD + @{ + */ + +/** + @brief Standard deviation of the elements of a Q31 vector. + @param[in] pSrc points to the input vector. + @param[in] blockSize number of samples in input vector. + @param[out] pResult standard deviation value returned here. + @return none + + @par Scaling and Overflow Behavior + The function is implemented using an internal 64-bit accumulator. + The input is represented in 1.31 format, which is then downshifted by 8 bits + which yields 1.23, and intermediate multiplication yields a 2.46 format. + The accumulator maintains full precision of the intermediate multiplication results, + but provides only a 16 guard bits. + There is no saturation on intermediate additions. + If the accumulator overflows it wraps around and distorts the result. + In order to avoid overflows completely the input signal must be scaled down by + log2(blockSize)-8 bits, as a total of blockSize additions are performed internally. + After division, internal variables should be Q18.46 + Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value. + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_std_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q31_t var=0; + + arm_var_q31(pSrc, blockSize, &var); + arm_sqrt_q31(var, pResult); +} +#else +void arm_std_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + q63_t sum = 0; /* Accumulator */ + q63_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */ + q63_t sumOfSquares = 0; /* Sum of squares */ + q31_t in; /* Temporary variable to store input value */ + + if (blockSize <= 1U) + { + *pResult = 0; + return; + } + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + /* C = A[0] + A[1] + ... + A[blockSize-1] */ + + in = *pSrc++ >> 8U; + /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ + sumOfSquares += ((q63_t) (in) * (in)); + /* Compute sum and store result in a temporary variable, sum. */ + sum += in; + + in = *pSrc++ >> 8U; + sumOfSquares += ((q63_t) (in) * (in)); + sum += in; + + in = *pSrc++ >> 8U; + sumOfSquares += ((q63_t) (in) * (in)); + sum += in; + + in = *pSrc++ >> 8U; + sumOfSquares += ((q63_t) (in) * (in)); + sum += in; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + /* C = A[0] + A[1] + ... + A[blockSize-1] */ + + in = *pSrc++ >> 8U; + /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ + sumOfSquares += ((q63_t) (in) * (in)); + /* Compute sum and store result in a temporary variable, sum. */ + sum += in; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */ + meanOfSquares = (sumOfSquares / (q63_t)(blockSize - 1U)); + + /* Compute square of mean */ + squareOfMean = ( sum * sum / (q63_t)(blockSize * (blockSize - 1U))); + + /* Compute standard deviation and store result in destination */ + arm_sqrt_q31((meanOfSquares - squareOfMean) >> 15U, pResult); +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of STD group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f16.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f16.c new file mode 100644 index 0000000..0e3ade5 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f16.c @@ -0,0 +1,209 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_var_f16.c + * Description: Variance of the elements of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions_f16.h" + + +#if defined(ARM_FLOAT16_SUPPORTED) + + +/** + @ingroup groupStats + */ + + +/** + @addtogroup variance + @{ + */ + +/** + @brief Variance of the elements of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult variance value returned here + @return none + */ +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + + +void arm_var_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult) +{ + int32_t blkCnt; /* loop counters */ + f16x8_t vecSrc; + f16x8_t sumVec = vdupq_n_f16(0.0f16); + float16_t fMean; + + if (blockSize <= 1U) { + *pResult = 0; + return; + } + + + arm_mean_f16(pSrc, blockSize, &fMean); + + blkCnt = blockSize; + do { + mve_pred16_t p = vctp16q(blkCnt); + + vecSrc = vldrhq_z_f16((float16_t const *) pSrc, p); + /* + * sum lanes + */ + vecSrc = vsubq_m(vuninitializedq_f16(), vecSrc, fMean, p); + sumVec = vfmaq_m(sumVec, vecSrc, vecSrc, p); + + blkCnt -= 8; + pSrc += 8; + } + while (blkCnt > 0); + + /* Variance */ + *pResult = (_Float16)vecAddAcrossF16Mve(sumVec) / (_Float16) (blockSize - 1.0f16); +} +#else + +void arm_var_f16( + const float16_t * pSrc, + uint32_t blockSize, + float16_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + _Float16 sum = 0.0f; /* Temporary result storage */ + _Float16 fSum = 0.0f; + _Float16 fMean, fValue; + const float16_t * pInput = pSrc; + + if (blockSize <= 1U) + { + *pResult = 0; + return; + } + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + + sum += (_Float16)*pInput++; + sum += (_Float16)*pInput++; + sum += (_Float16)*pInput++; + sum += (_Float16)*pInput++; + + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + + sum += (_Float16)*pInput++; + + /* Decrement loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + fMean = (_Float16)sum / (_Float16) blockSize; + + pInput = pSrc; + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + fValue = (_Float16)*pInput++ - (_Float16)fMean; + fSum += (_Float16)fValue * (_Float16)fValue; + + fValue = (_Float16)*pInput++ - (_Float16)fMean; + fSum += (_Float16)fValue * (_Float16)fValue; + + fValue = (_Float16)*pInput++ - (_Float16)fMean; + fSum += (_Float16)fValue * (_Float16)fValue; + + fValue = (_Float16)*pInput++ - (_Float16)fMean; + fSum += (_Float16)fValue * (_Float16)fValue; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + fValue = (_Float16)*pInput++ - (_Float16)fMean; + fSum += (_Float16)fValue * (_Float16)fValue; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Variance */ + *pResult = (_Float16)fSum / ((_Float16)blockSize - 1.0f16); +} +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of variance group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f32.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f32.c new file mode 100644 index 0000000..7ff344c --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f32.c @@ -0,0 +1,293 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_var_f32.c + * Description: Variance of the elements of a floating-point vector + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @defgroup variance Variance + + Calculates the variance of the elements in the input vector. + The underlying algorithm used is the direct method sometimes referred to as the two-pass method: + + <pre> + Result = sum(element - meanOfElements)^2) / numElement - 1 + + meanOfElements = ( pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] ) / blockSize + </pre> + + There are separate functions for floating point, Q31, and Q15 data types. + */ + +/** + @addtogroup variance + @{ + */ + +/** + @brief Variance of the elements of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult variance value returned here + @return none + */ +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +void arm_var_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + uint32_t blkCnt; /* loop counters */ + f32x4_t vecSrc; + f32x4_t sumVec = vdupq_n_f32(0.0f); + float32_t fMean; + float32_t sum = 0.0f; /* accumulator */ + float32_t in; /* Temporary variable to store input value */ + + if (blockSize <= 1U) { + *pResult = 0; + return; + } + + arm_mean_f32(pSrc, blockSize, &fMean); + + /* Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + while (blkCnt > 0U) + { + + vecSrc = vldrwq_f32(pSrc); + /* + * sum lanes + */ + vecSrc = vsubq(vecSrc, fMean); + sumVec = vfmaq(sumVec, vecSrc, vecSrc); + + blkCnt --; + pSrc += 4; + } + + sum = vecAddAcrossF32Mve(sumVec); + + /* + * tail + */ + blkCnt = blockSize & 0x3; + while (blkCnt > 0U) + { + in = *pSrc++ - fMean; + sum += in * in; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Variance */ + *pResult = sum / (float32_t) (blockSize - 1); +} +#else +#if defined(ARM_MATH_NEON_EXPERIMENTAL) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_var_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t mean; + + float32_t sum = 0.0f; /* accumulator */ + float32_t in; /* Temporary variable to store input value */ + uint32_t blkCnt; /* loop counter */ + + float32x4_t sumV = vdupq_n_f32(0.0f); /* Temporary result storage */ + float32x2_t sumV2; + float32x4_t inV; + float32x4_t avg; + + arm_mean_f32(pSrc,blockSize,&mean); + avg = vdupq_n_f32(mean); + + blkCnt = blockSize >> 2U; + + /* Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute Power and then store the result in a temporary variable, sum. */ + inV = vld1q_f32(pSrc); + inV = vsubq_f32(inV, avg); + sumV = vmlaq_f32(sumV, inV, inV); + pSrc += 4; + + /* Decrement the loop counter */ + blkCnt--; + } + + sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV)); + sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1); + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* compute power and then store the result in a temporary variable, sum. */ + in = *pSrc++; + in = in - mean; + sum += in * in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Variance */ + *pResult = sum / (float32_t)(blockSize - 1.0f); + +} + +#else +void arm_var_f32( + const float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + float32_t sum = 0.0f; /* Temporary result storage */ + float32_t fSum = 0.0f; + float32_t fMean, fValue; + const float32_t * pInput = pSrc; + + if (blockSize <= 1U) + { + *pResult = 0; + return; + } + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + + sum += *pInput++; + sum += *pInput++; + sum += *pInput++; + sum += *pInput++; + + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + + sum += *pInput++; + + /* Decrement loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + fMean = sum / (float32_t) blockSize; + + pInput = pSrc; + +#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Variance */ + *pResult = fSum / (float32_t)(blockSize - 1.0f); +} +#endif /* #if defined(ARM_MATH_NEON) */ +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + @} end of variance group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f64.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f64.c new file mode 100644 index 0000000..6fd106a --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_f64.c @@ -0,0 +1,100 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_var_f64.c + * Description: Variance of the elements of a floating-point vector + * + * $Date: 13 September 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup variance + @{ + */ + +/** + @brief Variance of the elements of a floating-point vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult variance value returned here + @return none + */ +void arm_var_f64( + const float64_t * pSrc, + uint32_t blockSize, + float64_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + float64_t sum = 0.; /* Temporary result storage */ + float64_t fSum = 0.; + float64_t fMean, fValue; + const float64_t * pInput = pSrc; + + if (blockSize <= 1U) + { + *pResult = 0; + return; + } + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + + sum += *pInput++; + + /* Decrement loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + fMean = sum / (float64_t) blockSize; + + pInput = pSrc; + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + + while (blkCnt > 0U) + { + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Variance */ + *pResult = fSum / (float64_t)(blockSize - 1.); +} + +/** + @} end of variance group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q15.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q15.c new file mode 100644 index 0000000..f020c88 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q15.c @@ -0,0 +1,230 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_var_q15.c + * Description: Variance of an array of Q15 type + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup variance + @{ + */ + +/** + @brief Variance of the elements of a Q15 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult variance value returned here + @return none + + @par Scaling and Overflow Behavior + The function is implemented using a 64-bit internal accumulator. + The input is represented in 1.15 format. + Intermediate multiplication yields a 2.30 format, and this + result is added without saturation to a 64-bit accumulator in 34.30 format. + With 33 guard bits in the accumulator, there is no risk of overflow, and the + full precision of the intermediate multiplication is preserved. + Finally, the 34.30 result is truncated to 34.15 format by discarding the lower + 15 bits, and then saturated to yield a result in 1.15 format. + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_var_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + uint32_t blkCnt; /* loop counters */ + q15x8_t vecSrc; + q63_t sumOfSquares = 0LL; + q63_t meanOfSquares, squareOfMean; /* square of mean and mean of square */ + q63_t sum = 0LL; + q15_t in; + + if (blockSize <= 1U) { + *pResult = 0; + return; + } + + + blkCnt = blockSize >> 3; + while (blkCnt > 0U) + { + vecSrc = vldrhq_s16(pSrc); + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sumOfSquares. */ + + sumOfSquares = vmlaldavaq_s16(sumOfSquares, vecSrc, vecSrc); + sum = vaddvaq_s16(sum, vecSrc); + + blkCnt --; + pSrc += 8; + } + + /* Tail */ + blkCnt = blockSize & 7; + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + /* C = A[0] + A[1] + ... + A[blockSize-1] */ + + in = *pSrc++; + /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ +#if defined (ARM_MATH_DSP) + sumOfSquares = __SMLALD(in, in, sumOfSquares); +#else + sumOfSquares += (in * in); +#endif /* #if defined (ARM_MATH_DSP) */ + /* Compute sum and store result in a temporary variable, sum. */ + sum += in; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + meanOfSquares = arm_div_q63_to_q31(sumOfSquares, (blockSize - 1U)); + + /* Compute square of mean */ + squareOfMean = arm_div_q63_to_q31((q63_t)sum * sum, (q31_t)(blockSize * (blockSize - 1U))); + + /* mean of the squares minus the square of the mean. */ + *pResult = (meanOfSquares - squareOfMean) >> 15; +} +#else +void arm_var_q15( + const q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + q31_t sum = 0; /* Accumulator */ + q31_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */ + q63_t sumOfSquares = 0; /* Sum of squares */ + q15_t in; /* Temporary variable to store input value */ + +#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP) + q31_t in32; /* Temporary variable to store input value */ +#endif + + if (blockSize <= 1U) + { + *pResult = 0; + return; + } + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + /* C = A[0] + A[1] + ... + A[blockSize-1] */ + + /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ + /* Compute sum and store result in a temporary variable, sum. */ +#if defined (ARM_MATH_DSP) + in32 = read_q15x2_ia (&pSrc); + sumOfSquares = __SMLALD(in32, in32, sumOfSquares); + sum += ((in32 << 16U) >> 16U); + sum += (in32 >> 16U); + + in32 = read_q15x2_ia (&pSrc); + sumOfSquares = __SMLALD(in32, in32, sumOfSquares); + sum += ((in32 << 16U) >> 16U); + sum += (in32 >> 16U); +#else + in = *pSrc++; + sumOfSquares += (in * in); + sum += in; + + in = *pSrc++; + sumOfSquares += (in * in); + sum += in; + + in = *pSrc++; + sumOfSquares += (in * in); + sum += in; + + in = *pSrc++; + sumOfSquares += (in * in); + sum += in; +#endif /* #if defined (ARM_MATH_DSP) */ + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + /* C = A[0] + A[1] + ... + A[blockSize-1] */ + + in = *pSrc++; + /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ +#if defined (ARM_MATH_DSP) + sumOfSquares = __SMLALD(in, in, sumOfSquares); +#else + sumOfSquares += (in * in); +#endif /* #if defined (ARM_MATH_DSP) */ + /* Compute sum and store result in a temporary variable, sum. */ + sum += in; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */ + meanOfSquares = (q31_t) (sumOfSquares / (q63_t)(blockSize - 1U)); + + /* Compute square of mean */ + squareOfMean = (q31_t) ((q63_t) sum * sum / (q63_t)(blockSize * (blockSize - 1U))); + + /* mean of squares minus the square of mean. */ + *pResult = (meanOfSquares - squareOfMean) >> 15U; +} +#endif /* defined(ARM_MATH_MVEI) */ + +/** + @} end of variance group + */ diff --git a/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q31.c b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q31.c new file mode 100644 index 0000000..45b6b66 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/StatisticsFunctions/arm_var_q31.c @@ -0,0 +1,214 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_var_q31.c + * Description: Variance of an array of Q31 type + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/statistics_functions.h" + +/** + @ingroup groupStats + */ + +/** + @addtogroup variance + @{ + */ + +/** + @brief Variance of the elements of a Q31 vector. + @param[in] pSrc points to the input vector + @param[in] blockSize number of samples in input vector + @param[out] pResult variance value returned here + @return none + + @par Scaling and Overflow Behavior + The function is implemented using an internal 64-bit accumulator. + The input is represented in 1.31 format, which is then downshifted by 8 bits + which yields 1.23, and intermediate multiplication yields a 2.46 format. + The accumulator maintains full precision of the intermediate multiplication results, + and as a consequence has only 16 guard bits. + There is no saturation on intermediate additions. + If the accumulator overflows it wraps around and distorts the result. + In order to avoid overflows completely the input signal must be scaled down by + log2(blockSize)-8 bits, as a total of blockSize additions are performed internally. + After division, internal variables should be Q18.46 + Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value. + */ +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_var_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + uint32_t blkCnt; /* loop counters */ + q31x4_t vecSrc; + q63_t sumOfSquares = 0LL; + q63_t meanOfSquares, squareOfMean; /* square of mean and mean of square */ + q63_t sum = 0LL; + q31_t in; + + if (blockSize <= 1U) { + *pResult = 0; + return; + } + + + /* Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + while (blkCnt > 0U) + { + vecSrc = vldrwq_s32(pSrc); + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sumOfSquares. */ + + /* downscale */ + vecSrc = vshrq(vecSrc, 8); + sumOfSquares = vmlaldavaq(sumOfSquares, vecSrc, vecSrc); + sum = vaddlvaq(sum, vecSrc); + + blkCnt --; + pSrc += 4; + } + + + /* + * tail + */ + blkCnt = blockSize & 0x3; + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + /* C = A[0] + A[1] + ... + A[blockSize-1] */ + + in = *pSrc++ >> 8U; + /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ + sumOfSquares += ((q63_t) (in) * (in)); + /* Compute sum and store result in a temporary variable, sum. */ + sum += in; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + meanOfSquares = sumOfSquares / (q63_t) (blockSize - 1U); + + /* Compute square of mean */ + squareOfMean = sum * sum / (q63_t) (blockSize * (blockSize - 1U)); + + /* Compute standard deviation and then store the result to the destination */ + *pResult = asrl(meanOfSquares - squareOfMean, 15U); +} +#else +void arm_var_q31( + const q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + uint32_t blkCnt; /* Loop counter */ + q63_t sum = 0; /* Temporary result storage */ + q63_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */ + q63_t sumOfSquares = 0; /* Sum of squares */ + q31_t in; /* Temporary variable to store input value */ + + if (blockSize <= 1U) + { + *pResult = 0; + return; + } + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 outputs at a time */ + blkCnt = blockSize >> 2U; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + /* C = A[0] + A[1] + ... + A[blockSize-1] */ + + in = *pSrc++ >> 8U; + /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ + sumOfSquares += ((q63_t) (in) * (in)); + /* Compute sum and store result in a temporary variable, sum. */ + sum += in; + + in = *pSrc++ >> 8U; + sumOfSquares += ((q63_t) (in) * (in)); + sum += in; + + in = *pSrc++ >> 8U; + sumOfSquares += ((q63_t) (in) * (in)); + sum += in; + + in = *pSrc++ >> 8U; + sumOfSquares += ((q63_t) (in) * (in)); + sum += in; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Loop unrolling: Compute remaining outputs */ + blkCnt = blockSize % 0x4U; + +#else + + /* Initialize blkCnt with number of samples */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */ + /* C = A[0] + A[1] + ... + A[blockSize-1] */ + + in = *pSrc++ >> 8U; + /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */ + sumOfSquares += ((q63_t) (in) * (in)); + /* Compute sum and store result in a temporary variable, sum. */ + sum += in; + + /* Decrement loop counter */ + blkCnt--; + } + + /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */ + meanOfSquares = (sumOfSquares / (q63_t)(blockSize - 1U)); + + /* Compute square of mean */ + squareOfMean = ( sum * sum / (q63_t)(blockSize * (blockSize - 1U))); + + /* Compute variance and store result in destination */ + *pResult = (meanOfSquares - squareOfMean) >> 15U; +} +#endif +/** + @} end of variance group + */ |