diff options
Diffstat (limited to 'Drivers/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f16.c')
-rw-r--r-- | Drivers/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f16.c | 274 |
1 files changed, 274 insertions, 0 deletions
diff --git a/Drivers/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f16.c b/Drivers/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f16.c new file mode 100644 index 0000000..6dfe55c --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f16.c @@ -0,0 +1,274 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_barycenter_f16.c + * Description: Barycenter + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/support_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#include <limits.h> +#include <math.h> + +/** + @ingroup groupSupport + */ + +/** + @defgroup barycenter Barycenter + + Barycenter of weighted vectors + */ + +/** + @addtogroup barycenter + @{ + */ + + +/** + * @brief Barycenter + * + * + * @param[in] *in List of vectors + * @param[in] *weights Weights of the vectors + * @param[out] *out Barycenter + * @param[in] nbVectors Number of vectors + * @param[in] vecDim Dimension of space (vector dimension) + * @return None + * + */ + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +void arm_barycenter_f16(const float16_t *in, + const float16_t *weights, + float16_t *out, + uint32_t nbVectors, + uint32_t vecDim) +{ + const float16_t *pIn, *pW; + const float16_t *pIn1, *pIn2, *pIn3, *pIn4; + float16_t *pOut; + uint32_t blkCntVector, blkCntSample; + float16_t accum, w; + + blkCntVector = nbVectors; + blkCntSample = vecDim; + + accum = 0.0f; + + pW = weights; + pIn = in; + + + arm_fill_f16(0.0f, out, vecDim); + + + /* Sum */ + pIn1 = pIn; + pIn2 = pIn1 + vecDim; + pIn3 = pIn2 + vecDim; + pIn4 = pIn3 + vecDim; + + blkCntVector = nbVectors >> 2; + while (blkCntVector > 0) + { + f16x8_t outV, inV1, inV2, inV3, inV4; + float16_t w1, w2, w3, w4; + + pOut = out; + w1 = *pW++; + w2 = *pW++; + w3 = *pW++; + w4 = *pW++; + accum += (_Float16)w1 + (_Float16)w2 + (_Float16)w3 + (_Float16)w4; + + blkCntSample = vecDim >> 3; + while (blkCntSample > 0) { + outV = vld1q((const float16_t *) pOut); + inV1 = vld1q(pIn1); + inV2 = vld1q(pIn2); + inV3 = vld1q(pIn3); + inV4 = vld1q(pIn4); + outV = vfmaq(outV, inV1, w1); + outV = vfmaq(outV, inV2, w2); + outV = vfmaq(outV, inV3, w3); + outV = vfmaq(outV, inV4, w4); + vst1q(pOut, outV); + + pOut += 8; + pIn1 += 8; + pIn2 += 8; + pIn3 += 8; + pIn4 += 8; + + blkCntSample--; + } + + blkCntSample = vecDim & 7; + while (blkCntSample > 0) { + *pOut = (_Float16)*pOut + (_Float16)*pIn1++ * (_Float16)w1; + *pOut = (_Float16)*pOut + (_Float16)*pIn2++ * (_Float16)w2; + *pOut = (_Float16)*pOut + (_Float16)*pIn3++ * (_Float16)w3; + *pOut = (_Float16)*pOut + (_Float16)*pIn4++ * (_Float16)w4; + pOut++; + blkCntSample--; + } + + pIn1 += 3 * vecDim; + pIn2 += 3 * vecDim; + pIn3 += 3 * vecDim; + pIn4 += 3 * vecDim; + + blkCntVector--; + } + + pIn = pIn1; + + blkCntVector = nbVectors & 3; + while (blkCntVector > 0) + { + f16x8_t inV, outV; + + pOut = out; + w = *pW++; + accum += (_Float16)w; + + blkCntSample = vecDim >> 3; + while (blkCntSample > 0) + { + outV = vld1q_f16(pOut); + inV = vld1q_f16(pIn); + outV = vfmaq(outV, inV, w); + vst1q_f16(pOut, outV); + pOut += 8; + pIn += 8; + + blkCntSample--; + } + + blkCntSample = vecDim & 7; + while (blkCntSample > 0) + { + *pOut = (_Float16)*pOut + (_Float16)*pIn++ * (_Float16)w; + pOut++; + blkCntSample--; + } + + blkCntVector--; + } + + /* Normalize */ + pOut = out; + accum = 1.0f16 / (_Float16)accum; + + blkCntSample = vecDim >> 3; + while (blkCntSample > 0) + { + f16x8_t tmp; + + tmp = vld1q((const float16_t *) pOut); + tmp = vmulq(tmp, accum); + vst1q(pOut, tmp); + pOut += 8; + blkCntSample--; + } + + blkCntSample = vecDim & 7; + while (blkCntSample > 0) + { + *pOut = (_Float16)*pOut * (_Float16)accum; + pOut++; + blkCntSample--; + } +} +#else +void arm_barycenter_f16(const float16_t *in, const float16_t *weights, float16_t *out, uint32_t nbVectors,uint32_t vecDim) +{ + + const float16_t *pIn,*pW; + float16_t *pOut; + uint32_t blkCntVector,blkCntSample; + float16_t accum, w; + + blkCntVector = nbVectors; + blkCntSample = vecDim; + + accum = 0.0f16; + + pW = weights; + pIn = in; + + /* Set counters to 0 */ + blkCntSample = vecDim; + pOut = out; + + while(blkCntSample > 0) + { + *pOut = 0.0f16; + pOut++; + blkCntSample--; + } + + /* Sum */ + while(blkCntVector > 0) + { + pOut = out; + w = *pW++; + accum += (_Float16)w; + + blkCntSample = vecDim; + while(blkCntSample > 0) + { + *pOut = (_Float16)*pOut + (_Float16)*pIn++ * (_Float16)w; + pOut++; + blkCntSample--; + } + + blkCntVector--; + } + + /* Normalize */ + blkCntSample = vecDim; + pOut = out; + + while(blkCntSample > 0) + { + *pOut = (_Float16)*pOut / (_Float16)accum; + pOut++; + blkCntSample--; + } + +} +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + * @} end of barycenter group + */ + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + |