diff options
Diffstat (limited to 'Drivers/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f32.c')
-rw-r--r-- | Drivers/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f32.c | 414 |
1 files changed, 414 insertions, 0 deletions
diff --git a/Drivers/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f32.c b/Drivers/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f32.c new file mode 100644 index 0000000..817bb58 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f32.c @@ -0,0 +1,414 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_barycenter_f32.c + * Description: Barycenter + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/support_functions.h" +#include <limits.h> +#include <math.h> + + +/** + @ingroup barycenter + */ + + +/** + * @brief Barycenter + * + * + * @param[in] *in List of vectors + * @param[in] *weights Weights of the vectors + * @param[out] *out Barycenter + * @param[in] nbVectors Number of vectors + * @param[in] vecDim Dimension of space (vector dimension) + * @return None + * + */ + +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) +void arm_barycenter_f32(const float32_t *in, + const float32_t *weights, + float32_t *out, + uint32_t nbVectors, + uint32_t vecDim) +{ + const float32_t *pIn, *pW; + const float32_t *pIn1, *pIn2, *pIn3, *pIn4; + float32_t *pOut; + uint32_t blkCntVector, blkCntSample; + float32_t accum, w; + + blkCntVector = nbVectors; + blkCntSample = vecDim; + + accum = 0.0f; + + pW = weights; + pIn = in; + + + arm_fill_f32(0.0f, out, vecDim); + + + /* Sum */ + pIn1 = pIn; + pIn2 = pIn1 + vecDim; + pIn3 = pIn2 + vecDim; + pIn4 = pIn3 + vecDim; + + blkCntVector = nbVectors >> 2; + while (blkCntVector > 0) + { + f32x4_t outV, inV1, inV2, inV3, inV4; + float32_t w1, w2, w3, w4; + + pOut = out; + w1 = *pW++; + w2 = *pW++; + w3 = *pW++; + w4 = *pW++; + accum += w1 + w2 + w3 + w4; + + blkCntSample = vecDim >> 2; + while (blkCntSample > 0) { + outV = vld1q((const float32_t *) pOut); + inV1 = vld1q(pIn1); + inV2 = vld1q(pIn2); + inV3 = vld1q(pIn3); + inV4 = vld1q(pIn4); + outV = vfmaq(outV, inV1, w1); + outV = vfmaq(outV, inV2, w2); + outV = vfmaq(outV, inV3, w3); + outV = vfmaq(outV, inV4, w4); + vst1q(pOut, outV); + + pOut += 4; + pIn1 += 4; + pIn2 += 4; + pIn3 += 4; + pIn4 += 4; + + blkCntSample--; + } + + blkCntSample = vecDim & 3; + while (blkCntSample > 0) { + *pOut = *pOut + *pIn1++ * w1; + *pOut = *pOut + *pIn2++ * w2; + *pOut = *pOut + *pIn3++ * w3; + *pOut = *pOut + *pIn4++ * w4; + pOut++; + blkCntSample--; + } + + pIn1 += 3 * vecDim; + pIn2 += 3 * vecDim; + pIn3 += 3 * vecDim; + pIn4 += 3 * vecDim; + + blkCntVector--; + } + + pIn = pIn1; + + blkCntVector = nbVectors & 3; + while (blkCntVector > 0) + { + f32x4_t inV, outV; + + pOut = out; + w = *pW++; + accum += w; + + blkCntSample = vecDim >> 2; + while (blkCntSample > 0) + { + outV = vld1q_f32(pOut); + inV = vld1q_f32(pIn); + outV = vfmaq(outV, inV, w); + vst1q_f32(pOut, outV); + pOut += 4; + pIn += 4; + + blkCntSample--; + } + + blkCntSample = vecDim & 3; + while (blkCntSample > 0) + { + *pOut = *pOut + *pIn++ * w; + pOut++; + blkCntSample--; + } + + blkCntVector--; + } + + /* Normalize */ + pOut = out; + accum = 1.0f / accum; + + blkCntSample = vecDim >> 2; + while (blkCntSample > 0) + { + f32x4_t tmp; + + tmp = vld1q((const float32_t *) pOut); + tmp = vmulq(tmp, accum); + vst1q(pOut, tmp); + pOut += 4; + blkCntSample--; + } + + blkCntSample = vecDim & 3; + while (blkCntSample > 0) + { + *pOut = *pOut * accum; + pOut++; + blkCntSample--; + } +} +#else +#if defined(ARM_MATH_NEON) + +#include "NEMath.h" +void arm_barycenter_f32(const float32_t *in, const float32_t *weights, float32_t *out, uint32_t nbVectors,uint32_t vecDim) +{ + + const float32_t *pIn,*pW, *pIn1, *pIn2, *pIn3, *pIn4; + float32_t *pOut; + uint32_t blkCntVector,blkCntSample; + float32_t accum, w,w1,w2,w3,w4; + + float32x4_t tmp, inV,outV, inV1, inV2, inV3, inV4; + + blkCntVector = nbVectors; + blkCntSample = vecDim; + + accum = 0.0f; + + pW = weights; + pIn = in; + + /* Set counters to 0 */ + tmp = vdupq_n_f32(0.0f); + pOut = out; + + blkCntSample = vecDim >> 2; + while(blkCntSample > 0) + { + vst1q_f32(pOut, tmp); + pOut += 4; + blkCntSample--; + } + + blkCntSample = vecDim & 3; + while(blkCntSample > 0) + { + *pOut = 0.0f; + pOut++; + blkCntSample--; + } + + /* Sum */ + + pIn1 = pIn; + pIn2 = pIn1 + vecDim; + pIn3 = pIn2 + vecDim; + pIn4 = pIn3 + vecDim; + + blkCntVector = nbVectors >> 2; + while(blkCntVector > 0) + { + pOut = out; + w1 = *pW++; + w2 = *pW++; + w3 = *pW++; + w4 = *pW++; + accum += w1 + w2 + w3 + w4; + + blkCntSample = vecDim >> 2; + while(blkCntSample > 0) + { + outV = vld1q_f32(pOut); + inV1 = vld1q_f32(pIn1); + inV2 = vld1q_f32(pIn2); + inV3 = vld1q_f32(pIn3); + inV4 = vld1q_f32(pIn4); + outV = vmlaq_n_f32(outV,inV1,w1); + outV = vmlaq_n_f32(outV,inV2,w2); + outV = vmlaq_n_f32(outV,inV3,w3); + outV = vmlaq_n_f32(outV,inV4,w4); + vst1q_f32(pOut, outV); + pOut += 4; + pIn1 += 4; + pIn2 += 4; + pIn3 += 4; + pIn4 += 4; + + blkCntSample--; + } + + blkCntSample = vecDim & 3; + while(blkCntSample > 0) + { + *pOut = *pOut + *pIn1++ * w1; + *pOut = *pOut + *pIn2++ * w2; + *pOut = *pOut + *pIn3++ * w3; + *pOut = *pOut + *pIn4++ * w4; + pOut++; + blkCntSample--; + } + + pIn1 += 3*vecDim; + pIn2 += 3*vecDim; + pIn3 += 3*vecDim; + pIn4 += 3*vecDim; + + blkCntVector--; + } + + pIn = pIn1; + + blkCntVector = nbVectors & 3; + while(blkCntVector > 0) + { + pOut = out; + w = *pW++; + accum += w; + + blkCntSample = vecDim >> 2; + while(blkCntSample > 0) + { + outV = vld1q_f32(pOut); + inV = vld1q_f32(pIn); + outV = vmlaq_n_f32(outV,inV,w); + vst1q_f32(pOut, outV); + pOut += 4; + pIn += 4; + + blkCntSample--; + } + + blkCntSample = vecDim & 3; + while(blkCntSample > 0) + { + *pOut = *pOut + *pIn++ * w; + pOut++; + blkCntSample--; + } + + blkCntVector--; + } + + /* Normalize */ + pOut = out; + accum = 1.0f / accum; + + blkCntSample = vecDim >> 2; + while(blkCntSample > 0) + { + tmp = vld1q_f32(pOut); + tmp = vmulq_n_f32(tmp,accum); + vst1q_f32(pOut, tmp); + pOut += 4; + blkCntSample--; + } + + blkCntSample = vecDim & 3; + while(blkCntSample > 0) + { + *pOut = *pOut * accum; + pOut++; + blkCntSample--; + } + +} +#else +void arm_barycenter_f32(const float32_t *in, const float32_t *weights, float32_t *out, uint32_t nbVectors,uint32_t vecDim) +{ + + const float32_t *pIn,*pW; + float32_t *pOut; + uint32_t blkCntVector,blkCntSample; + float32_t accum, w; + + blkCntVector = nbVectors; + blkCntSample = vecDim; + + accum = 0.0f; + + pW = weights; + pIn = in; + + /* Set counters to 0 */ + blkCntSample = vecDim; + pOut = out; + + while(blkCntSample > 0) + { + *pOut = 0.0f; + pOut++; + blkCntSample--; + } + + /* Sum */ + while(blkCntVector > 0) + { + pOut = out; + w = *pW++; + accum += w; + + blkCntSample = vecDim; + while(blkCntSample > 0) + { + *pOut = *pOut + *pIn++ * w; + pOut++; + blkCntSample--; + } + + blkCntVector--; + } + + /* Normalize */ + blkCntSample = vecDim; + pOut = out; + + while(blkCntSample > 0) + { + *pOut = *pOut / accum; + pOut++; + blkCntSample--; + } + +} +#endif +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + * @} end of barycenter group + */ |