diff options
author | Clyne Sullivan <clyne@bitgloo.com> | 2025-01-29 21:34:25 -0500 |
---|---|---|
committer | Clyne Sullivan <clyne@bitgloo.com> | 2025-01-29 21:34:25 -0500 |
commit | 5b81bc8ccbd342b8566d88fc9f17a73aec03b5b6 (patch) | |
tree | cc57486912cfa74c6440d8b97c28f451ec787d78 /Drivers/CMSIS/DSP/Source/FastMathFunctions |
initial commit
Diffstat (limited to 'Drivers/CMSIS/DSP/Source/FastMathFunctions')
26 files changed, 3212 insertions, 0 deletions
diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/CMakeLists.txt b/Drivers/CMSIS/DSP/Source/FastMathFunctions/CMakeLists.txt new file mode 100644 index 0000000..95a9eb6 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/CMakeLists.txt @@ -0,0 +1,80 @@ +cmake_minimum_required (VERSION 3.14) + +project(CMSISDSPFastMath) + +include(configLib) +include(configDsp) + + +add_library(CMSISDSPFastMath STATIC) +configLib(CMSISDSPFastMath ${ROOT}) +configDsp(CMSISDSPFastMath ${ROOT}) + +include(interpol) +interpol(CMSISDSPFastMath) + +if (CONFIGTABLE AND ALLFAST) + target_compile_definitions(CMSISDSPFastMath PUBLIC ARM_ALL_FAST_TABLES) +endif() + +if (NOT CONFIGTABLE OR ALLFAST OR ARM_COS_F32) +target_sources(CMSISDSPFastMath PRIVATE arm_cos_f32.c) +endif() + +if (NOT CONFIGTABLE OR ALLFAST OR ARM_COS_Q15) +target_sources(CMSISDSPFastMath PRIVATE arm_cos_q15.c) +endif() + +if (NOT CONFIGTABLE OR ALLFAST OR ARM_COS_Q31) +target_sources(CMSISDSPFastMath PRIVATE arm_cos_q31.c) +endif() + +if (NOT CONFIGTABLE OR ALLFAST OR ARM_SIN_F32) +target_sources(CMSISDSPFastMath PRIVATE arm_sin_f32.c) +endif() + +if (NOT CONFIGTABLE OR ALLFAST OR ARM_SIN_Q15) +target_sources(CMSISDSPFastMath PRIVATE arm_sin_q15.c) +endif() + +if (NOT CONFIGTABLE OR ALLFAST OR ARM_SIN_Q31) +target_sources(CMSISDSPFastMath PRIVATE arm_sin_q31.c) +endif() + +if (NOT CONFIGTABLE OR ALLFAST OR ARM_SQRT_Q31) +target_sources(CMSISDSPFastMath PRIVATE arm_sqrt_q31.c) +endif() + +if (NOT CONFIGTABLE OR ALLFAST OR ARM_SQRT_Q15) +target_sources(CMSISDSPFastMath PRIVATE arm_sqrt_q15.c) +endif() + +target_sources(CMSISDSPFastMath PRIVATE arm_vlog_f32.c) +target_sources(CMSISDSPFastMath PRIVATE arm_vlog_f64.c) +target_sources(CMSISDSPFastMath PRIVATE arm_vexp_f32.c) +target_sources(CMSISDSPFastMath PRIVATE arm_vexp_f64.c) + +target_sources(CMSISDSPFastMath PRIVATE arm_vlog_q31.c) +target_sources(CMSISDSPFastMath PRIVATE arm_vlog_q15.c) + + +if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16)) +target_sources(CMSISDSPFastMath PRIVATE arm_vlog_f16.c) +target_sources(CMSISDSPFastMath PRIVATE arm_vexp_f16.c) +target_sources(CMSISDSPFastMath PRIVATE arm_vinverse_f16.c) +target_sources(CMSISDSPFastMath PRIVATE arm_atan2_f16.c) +endif() + +target_sources(CMSISDSPFastMath PRIVATE arm_divide_q15.c) +target_sources(CMSISDSPFastMath PRIVATE arm_divide_q31.c) + +target_sources(CMSISDSPFastMath PRIVATE arm_atan2_f32.c) +target_sources(CMSISDSPFastMath PRIVATE arm_atan2_q31.c) +target_sources(CMSISDSPFastMath PRIVATE arm_atan2_q15.c) + + +### Includes +target_include_directories(CMSISDSPFastMath PUBLIC "${DSP}/Include") + + + diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/FastMathFunctions.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/FastMathFunctions.c new file mode 100644 index 0000000..0146d61 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/FastMathFunctions.c @@ -0,0 +1,75 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: FastMathFunctions.c + * Description: Combination of all fast math function source files. + * + * $Date: 16. March 2020 + * $Revision: V1.1.0 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FAST_ALLOW_TABLES) + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_F32) +#include "arm_cos_f32.c" +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q15) +#include "arm_cos_q15.c" +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q31) +#include "arm_cos_q31.c" +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_F32) +#include "arm_sin_f32.c" +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q15) +#include "arm_sin_q15.c" +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q31) +#include "arm_sin_q31.c" +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SQRT_Q31) +#include "arm_sqrt_q31.c" +#endif + +#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SQRT_Q15) +#include "arm_sqrt_q15.c" +#endif + +#endif + +#include "arm_vexp_f32.c" +#include "arm_vexp_f64.c" +#include "arm_vlog_f32.c" +#include "arm_vlog_f64.c" +#include "arm_divide_q15.c" +#include "arm_divide_q31.c" +#include "arm_vlog_q31.c" +#include "arm_vlog_q15.c" +#include "arm_atan2_f32.c" +#include "arm_atan2_q31.c" +#include "arm_atan2_q15.c" diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/FastMathFunctionsF16.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/FastMathFunctionsF16.c new file mode 100644 index 0000000..79c731f --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/FastMathFunctionsF16.c @@ -0,0 +1,32 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: FastMathFunctions.c + * Description: Combination of all fast math function source files. + * + * $Date: 16. March 2020 + * $Revision: V1.1.0 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_vexp_f16.c" +#include "arm_vlog_f16.c" +#include "arm_vinverse_f16.c" +#include "arm_atan2_f16.c" diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_f16.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_f16.c new file mode 100644 index 0000000..b4a03a4 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_f16.c @@ -0,0 +1,171 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_atan2_f16.c + * Description: float16 Arc tangent of y/x + * + * $Date: 22 April 2022 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +/* + +atan for argument between in [0, 1.0] + + +*/ + +#define PIF16 3.14f16 +#define PI16HALF 1.571f16 + +#define ATANHALFF16 0.463648f16 + +#define ATAN2_NB_COEFS_F16 5 + +static const float16_t atan2_coefs_f16[ATAN2_NB_COEFS_F16]={0.f16 +,1.f16 +,0.f16 +,-0.367f16 +,0.152f16 +}; + +__STATIC_FORCEINLINE float16_t arm_atan_limited_f16(float16_t x) +{ + float16_t res=atan2_coefs_f16[ATAN2_NB_COEFS_F16-1]; + int i=1; + for(i=1;i<ATAN2_NB_COEFS_F16;i++) + { + res = (_Float16)x*(_Float16)res + (_Float16)atan2_coefs_f16[ATAN2_NB_COEFS_F16-1-i]; + } + + + return(res); +} + +__STATIC_FORCEINLINE float16_t arm_atan_f16(float16_t x) +{ + int sign=0; + float16_t res=0.0f16; + + if ((_Float16)x < 0.0f16) + { + sign=1; + x=-(_Float16)x; + } + + if ((_Float16)x > 1.0f16) + { + x = 1.0f16 / (_Float16)x; + res = (_Float16)PI16HALF - (_Float16)arm_atan_limited_f16(x); + } + else + { + res += (_Float16)arm_atan_limited_f16(x); + } + + + if (sign) + { + res = -(_Float16)res; + } + + return(res); +} + +/** + @ingroup groupFastMath + */ + + +/** + @addtogroup atan2 + @{ + */ + +/** + @brief Arc Tangent of y/x using sign of y and x to get right quadrant + @param[in] y y coordinate + @param[in] x x coordinate + @param[out] result Result + @return error status. + + @par Compute the Arc tangent of y/x: + The sign of y and x are used to determine the right quadrant + and compute the right angle. + +*/ +arm_status arm_atan2_f16(float16_t y,float16_t x,float16_t *result) +{ + if ((_Float16)x > 0.0f16) + { + *result=arm_atan_f16((_Float16)y/(_Float16)x); + return(ARM_MATH_SUCCESS); + } + if ((_Float16)x < 0.0f16) + { + if ((_Float16)y > 0.0f16) + { + *result=(_Float16)arm_atan_f16((_Float16)y/(_Float16)x) + (_Float16)PIF16; + } + else if ((_Float16)y < 0.0f16) + { + *result=(_Float16)arm_atan_f16((_Float16)y/(_Float16)x) - (_Float16)PIF16; + } + else + { + if (signbit(y)) + { + *result= -(_Float16)PIF16; + } + else + { + *result= PIF16; + } + } + return(ARM_MATH_SUCCESS); + } + if ((_Float16)x == 0.0f16) + { + if ((_Float16)y > 0.0f16) + { + *result=PI16HALF; + return(ARM_MATH_SUCCESS); + } + if ((_Float16)y < 0.0f16) + { + *result=-(_Float16)PI16HALF; + return(ARM_MATH_SUCCESS); + } + } + + + return(ARM_MATH_NANINF); + +} + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ +/** + @} end of atan2 group + */ diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_f32.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_f32.c new file mode 100644 index 0000000..3e398f7 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_f32.c @@ -0,0 +1,183 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_atan2_f32.c + * Description: float32 Arc tangent of y/x + * + * $Date: 22 April 2022 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" + +/* + +atan for argument between in [0, 1.0] + + +*/ + +#define ATANHALFF32 0.463648f +#define PIHALFF32 1.5707963267948966192313f + +#define ATAN2_NB_COEFS_F32 10 + +static const float32_t atan2_coefs_f32[ATAN2_NB_COEFS_F32]={0.0f +,1.0000001638308195518f +,-0.0000228941363602264f +,-0.3328086544578890873f +,-0.004404814619311061f +,0.2162217461808173258f +,-0.0207504842057097504f +,-0.1745263362250363339f +,0.1340557235283553386f +,-0.0323664125927477625f +}; + +__STATIC_FORCEINLINE float32_t arm_atan_limited_f32(float32_t x) +{ + float32_t res=atan2_coefs_f32[ATAN2_NB_COEFS_F32-1]; + int i=1; + for(i=1;i<ATAN2_NB_COEFS_F32;i++) + { + res = x*res + atan2_coefs_f32[ATAN2_NB_COEFS_F32-1-i]; + } + + + return(res); +} + +__STATIC_FORCEINLINE float32_t arm_atan_f32(float32_t x) +{ + int sign=0; + float32_t res=0.0f; + + if (x < 0.0f) + { + sign=1; + x=-x; + } + + if (x > 1.0f) + { + x = 1.0f / x; + res = PIHALFF32 - arm_atan_limited_f32(x); + } + else + { + res += arm_atan_limited_f32(x); + } + + + if (sign) + { + res = -res; + } + + return(res); +} + + +/** + @ingroup groupFastMath + */ + +/** + @defgroup atan2 ArcTan2 + + Computing Arc tangent only using the ratio y/x is not enough to determine the angle + since there is an indeterminacy. Opposite quadrants are giving the same ratio. + + ArcTan2 is not using y/x to compute the angle but y and x and use the sign of y and x + to determine the quadrant. + + */ + +/** + @addtogroup atan2 + @{ + */ + +/** + @brief Arc Tangent of y/x using sign of y and x to get right quadrant + @param[in] y y coordinate + @param[in] x x coordinate + @param[out] result Result + @return error status. + + @par Compute the Arc tangent of y/x: + The sign of y and x are used to determine the right quadrant + and compute the right angle. +*/ + + +arm_status arm_atan2_f32(float32_t y,float32_t x,float32_t *result) +{ + if (x > 0.0f) + { + *result=arm_atan_f32(y/x); + return(ARM_MATH_SUCCESS); + } + if (x < 0.0f) + { + if (y > 0.0f) + { + *result=arm_atan_f32(y/x) + PI; + } + else if (y < 0.0f) + { + *result=arm_atan_f32(y/x) - PI; + } + else + { + if (signbit(y)) + { + *result= -PI; + } + else + { + *result= PI; + } + } + return(ARM_MATH_SUCCESS); + } + if (x == 0.0f) + { + if (y > 0.0f) + { + *result=PIHALFF32; + return(ARM_MATH_SUCCESS); + } + if (y < 0.0f) + { + *result=-PIHALFF32; + return(ARM_MATH_SUCCESS); + } + } + + + return(ARM_MATH_NANINF); + +} + +/** + @} end of atan2 group + */ diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_q15.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_q15.c new file mode 100644 index 0000000..ddf98d8 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_q15.c @@ -0,0 +1,201 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_atan2_q15.c + * Description: float32 Arc tangent of y/x + * + * $Date: 22 April 2022 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" +#include "dsp/utils.h" + +/* + +atan for argument between in [0, 1.0] + +*/ + + +/* Q2.13 */ +#define ATANHALFQ13 0xed6 +#define PIHALFQ13 0x3244 +#define PIQ13 0x6488 + +#define ATAN2_NB_COEFS_Q15 10 + +static const q15_t atan2_coefs_q15[ATAN2_NB_COEFS_Q15]={0x0000 +,0x7fff +,0xffff +,0xd567 +,0xff70 +,0x1bad +,0xfd58 +,0xe9a9 +,0x1129 +,0xfbdb +}; + +__STATIC_FORCEINLINE q15_t arm_atan_limited_q15(q15_t x) +{ + q31_t res=(q31_t)atan2_coefs_q15[ATAN2_NB_COEFS_Q15-1]; + int i=1; + for(i=1;i<ATAN2_NB_COEFS_Q15;i++) + { + res = ((q31_t) x * res) >> 15U; + res = res + ((q31_t) atan2_coefs_q15[ATAN2_NB_COEFS_Q15-1-i]) ; + } + + res = __SSAT(res>>2,16); + + + return(res); +} + + +__STATIC_FORCEINLINE q15_t arm_atan_q15(q15_t y,q15_t x) +{ + int sign=0; + q15_t res=0; + + if (y<0) + { + arm_negate_q15(&y,&y,1); + sign=1-sign; + } + + if (x < 0) + { + sign=1 - sign; + arm_negate_q15(&x,&x,1); + } + + if (y > x) + { + q15_t ratio; + int16_t shift; + + arm_divide_q15(x,y,&ratio,&shift); + + arm_shift_q15(&ratio,shift,&ratio,1); + + res = PIHALFQ13 - arm_atan_limited_q15(ratio); + + } + else + { + q15_t ratio; + int16_t shift; + + arm_divide_q15(y,x,&ratio,&shift); + + arm_shift_q15(&ratio,shift,&ratio,1); + + res = arm_atan_limited_q15(ratio); + + } + + + if (sign) + { + arm_negate_q15(&res,&res,1); + } + + return(res); +} + + +/** + @ingroup groupFastMath + */ + + +/** + @addtogroup atan2 + @{ + */ + +/** + @brief Arc Tangent of y/x using sign of y and x to get right quadrant + @param[in] y y coordinate + @param[in] x x coordinate + @param[out] result Result in Q2.13 + @return error status. + + @par Compute the Arc tangent of y/x: + The sign of y and x are used to determine the right quadrant + and compute the right angle. +*/ + + +arm_status arm_atan2_q15(q15_t y,q15_t x,q15_t *result) +{ + if (x > 0) + { + *result=arm_atan_q15(y,x); + return(ARM_MATH_SUCCESS); + } + if (x < 0) + { + if (y > 0) + { + *result=arm_atan_q15(y,x) + PIQ13; + } + else if (y < 0) + { + *result=arm_atan_q15(y,x) - PIQ13; + } + else + { + if (y<0) + { + *result= -PIQ13; + } + else + { + *result= PIQ13; + } + } + return(ARM_MATH_SUCCESS); + } + if (x == 0) + { + if (y > 0) + { + *result=PIHALFQ13; + return(ARM_MATH_SUCCESS); + } + if (y < 0) + { + *result=-PIHALFQ13; + return(ARM_MATH_SUCCESS); + } + } + + + return(ARM_MATH_NANINF); + +} + +/** + @} end of atan2 group + */ diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_q31.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_q31.c new file mode 100644 index 0000000..1c83e43 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_atan2_q31.c @@ -0,0 +1,202 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_atan2_q31.c + * Description: float32 Arc tangent of y/x + * + * $Date: 22 April 2022 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" +#include "dsp/utils.h" + +/* + +atan for argument between in [0, 1.0] + +*/ + + +/* Q2.29 */ +#define ATANHALF_Q29 0xed63383 +#define PIHALF_Q29 0x3243f6a9 +#define PIQ29 0x6487ed51 + +#define ATAN2_NB_COEFS_Q31 13 + +static const q31_t atan2_coefs_q31[ATAN2_NB_COEFS_Q31]={0x00000000 +,0x7ffffffe +,0x000001b6 +,0xd555158e +,0x00036463 +,0x1985f617 +,0x001992ae +,0xeed53a7f +,0xf8f15245 +,0x2215a3a4 +,0xe0fab004 +,0x0cdd4825 +,0xfddbc054 +}; + + +__STATIC_FORCEINLINE q31_t arm_atan_limited_q31(q31_t x) +{ + q63_t res=(q63_t)atan2_coefs_q31[ATAN2_NB_COEFS_Q31-1]; + int i=1; + for(i=1;i<ATAN2_NB_COEFS_Q31;i++) + { + res = ((q63_t) x * res) >> 31U; + res = res + ((q63_t) atan2_coefs_q31[ATAN2_NB_COEFS_Q31-1-i]) ; + } + + return(clip_q63_to_q31(res>>2)); +} + + +__STATIC_FORCEINLINE q31_t arm_atan_q31(q31_t y,q31_t x) +{ + int sign=0; + q31_t res=0; + + if (y<0) + { + arm_negate_q31(&y,&y,1); + sign=1-sign; + } + + if (x < 0) + { + sign=1 - sign; + arm_negate_q31(&x,&x,1); + } + + if (y > x) + { + q31_t ratio; + int16_t shift; + + arm_divide_q31(x,y,&ratio,&shift); + + arm_shift_q31(&ratio,shift,&ratio,1); + + res = PIHALF_Q29 - arm_atan_limited_q31(ratio); + + } + else + { + q31_t ratio; + int16_t shift; + + arm_divide_q31(y,x,&ratio,&shift); + + arm_shift_q31(&ratio,shift,&ratio,1); + + res = arm_atan_limited_q31(ratio); + + } + + + if (sign) + { + arm_negate_q31(&res,&res,1); + } + + return(res); +} + + +/** + @ingroup groupFastMath + */ + + +/** + @addtogroup atan2 + @{ + */ + +/** + @brief Arc Tangent of y/x using sign of y and x to get right quadrant + @param[in] y y coordinate + @param[in] x x coordinate + @param[out] result Result in Q2.29 + @return error status. + + @par Compute the Arc tangent of y/x: + The sign of y and x are used to determine the right quadrant + and compute the right angle. +*/ + + +arm_status arm_atan2_q31(q31_t y,q31_t x,q31_t *result) +{ + if (x > 0) + { + *result=arm_atan_q31(y,x); + return(ARM_MATH_SUCCESS); + } + if (x < 0) + { + if (y > 0) + { + *result=arm_atan_q31(y,x) + PIQ29; + } + else if (y < 0) + { + *result=arm_atan_q31(y,x) - PIQ29; + } + else + { + if (y<0) + { + *result= -PIQ29; + } + else + { + *result= PIQ29; + } + } + return(ARM_MATH_SUCCESS); + } + if (x == 0) + { + if (y > 0) + { + *result=PIHALF_Q29; + return(ARM_MATH_SUCCESS); + } + if (y < 0) + { + *result=-PIHALF_Q29; + return(ARM_MATH_SUCCESS); + } + } + + + return(ARM_MATH_NANINF); + +} + +/** + @} end of atan2 group + */ diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_cos_f32.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_cos_f32.c new file mode 100644 index 0000000..de037c8 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_cos_f32.c @@ -0,0 +1,121 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cos_f32.c + * Description: Fast cosine calculation for floating-point values + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" +#include "arm_common_tables.h" + +/** + @ingroup groupFastMath + */ + +/** + @defgroup cos Cosine + + Computes the trigonometric cosine function using a combination of table lookup + and linear interpolation. There are separate functions for + Q15, Q31, and floating-point data types. + The input to the floating-point version is in radians while the + fixed-point Q15 and Q31 have a scaled input with the range + [0 +0.9999] mapping to [0 2*pi). The fixed-point range is chosen so that a + value of 2*pi wraps around to 0. + + The implementation is based on table lookup using 512 values together with linear interpolation. + The steps used are: + -# Calculation of the nearest integer table index + -# Compute the fractional portion (fract) of the table index. + -# The final result equals <code>(1.0f-fract)*a + fract*b;</code> + + where + <pre> + a = Table[index]; + b = Table[index+1]; + </pre> + */ + +/** + @addtogroup cos + @{ + */ + +/** + @brief Fast approximation to the trigonometric cosine function for floating-point data. + @param[in] x input value in radians + @return cos(x) + */ +float32_t arm_cos_f32( + float32_t x) +{ + float32_t cosVal, fract, in; /* Temporary input, output variables */ + uint16_t index; /* Index variable */ + float32_t a, b; /* Two nearest output values */ + int32_t n; + float32_t findex; + + /* input x is in radians */ + /* Scale input to [0 1] range from [0 2*PI] , divide input by 2*pi, add 0.25 (pi/2) to read sine table */ + in = x * 0.159154943092f + 0.25f; + + /* Calculation of floor value of input */ + n = (int32_t) in; + + /* Make negative values towards -infinity */ + if (in < 0.0f) + { + n--; + } + + /* Map input value to [0 1] */ + in = in - (float32_t) n; + + /* Calculation of index of the table */ + findex = (float32_t)FAST_MATH_TABLE_SIZE * in; + index = (uint16_t)findex; + + /* when "in" is exactly 1, we need to rotate the index down to 0 */ + if (index >= FAST_MATH_TABLE_SIZE) { + index = 0; + findex -= (float32_t)FAST_MATH_TABLE_SIZE; + } + + /* fractional value calculation */ + fract = findex - (float32_t) index; + + /* Read two nearest values of input value from the cos table */ + a = sinTable_f32[index]; + b = sinTable_f32[index+1]; + + /* Linear interpolation process */ + cosVal = (1.0f - fract) * a + fract * b; + + /* Return output value */ + return (cosVal); +} + +/** + @} end of cos group + */ diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_cos_q15.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_cos_q15.c new file mode 100644 index 0000000..6b552f1 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_cos_q15.c @@ -0,0 +1,84 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cos_q15.c + * Description: Fast cosine calculation for Q15 values + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" +#include "arm_common_tables.h" + +/** + @ingroup groupFastMath + */ + +/** + @addtogroup cos + @{ + */ + +/** + @brief Fast approximation to the trigonometric cosine function for Q15 data. + @param[in] x Scaled input value in radians + @return cos(x) + + The Q15 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*PI). + */ + +q15_t arm_cos_q15( + q15_t x) +{ + q15_t cosVal; /* Temporary input, output variables */ + int32_t index; /* Index variable */ + q15_t a, b; /* Two nearest output values */ + q15_t fract; /* Temporary values for fractional values */ + + /* add 0.25 (pi/2) to read sine table */ + x = (uint16_t)x + 0x2000; + if (x < 0) + { /* convert negative numbers to corresponding positive ones */ + x = (uint16_t)x + 0x8000; + } + + /* Calculate the nearest index */ + index = (uint32_t)x >> FAST_MATH_Q15_SHIFT; + + /* Calculation of fractional value */ + fract = (x - (index << FAST_MATH_Q15_SHIFT)) << 9; + + /* Read two nearest values of input value from the sin table */ + a = sinTable_q15[index]; + b = sinTable_q15[index+1]; + + /* Linear interpolation process */ + cosVal = (q31_t) (0x8000 - fract) * a >> 16; + cosVal = (q15_t) ((((q31_t) cosVal << 16) + ((q31_t) fract * b)) >> 16); + + /* Return output value */ + return (cosVal << 1); +} + +/** + @} end of cos group + */ diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_cos_q31.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_cos_q31.c new file mode 100644 index 0000000..8386c8f --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_cos_q31.c @@ -0,0 +1,84 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cos_q31.c + * Description: Fast cosine calculation for Q31 values + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" +#include "arm_common_tables.h" + +/** + @ingroup groupFastMath + */ + +/** + @addtogroup cos + @{ + */ + +/** + @brief Fast approximation to the trigonometric cosine function for Q31 data. + @param[in] x Scaled input value in radians + @return cos(x) + + The Q31 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*PI). + */ + +q31_t arm_cos_q31( + q31_t x) +{ + q31_t cosVal; /* Temporary input, output variables */ + int32_t index; /* Index variable */ + q31_t a, b; /* Two nearest output values */ + q31_t fract; /* Temporary values for fractional values */ + + /* add 0.25 (pi/2) to read sine table */ + x = (uint32_t)x + 0x20000000; + if (x < 0) + { /* convert negative numbers to corresponding positive ones */ + x = (uint32_t)x + 0x80000000; + } + + /* Calculate the nearest index */ + index = (uint32_t)x >> FAST_MATH_Q31_SHIFT; + + /* Calculation of fractional value */ + fract = (x - (index << FAST_MATH_Q31_SHIFT)) << 9; + + /* Read two nearest values of input value from the sin table */ + a = sinTable_q31[index]; + b = sinTable_q31[index+1]; + + /* Linear interpolation process */ + cosVal = (q63_t) (0x80000000 - fract) * a >> 32; + cosVal = (q31_t) ((((q63_t) cosVal << 32) + ((q63_t) fract * b)) >> 32); + + /* Return output value */ + return (cosVal << 1); +} + +/** + @} end of cos group + */ diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_divide_q15.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_divide_q15.c new file mode 100644 index 0000000..9c26e06 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_divide_q15.c @@ -0,0 +1,110 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cos_q15.c + * Description: Fast cosine calculation for Q15 values + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" +#include "arm_common_tables.h" + +#include <stdlib.h> + +/** + @ingroup groupFastMath + */ + +/** + @defgroup divide Fixed point division + + */ + +/** + @addtogroup divide + @{ + */ + +/** + @brief Fixed point division + @param[in] numerator Numerator + @param[in] denominator Denominator + @param[out] quotient Quotient value normalized between -1.0 and 1.0 + @param[out] shift Shift left value to get the unnormalized quotient + @return error status + + When dividing by 0, an error ARM_MATH_NANINF is returned. And the quotient is forced + to the saturated negative or positive value. + */ + +arm_status arm_divide_q15(q15_t numerator, + q15_t denominator, + q15_t *quotient, + int16_t *shift) +{ + int16_t sign=0; + q31_t temp; + int16_t shiftForNormalizing; + + *shift = 0; + + sign = (numerator>>15) ^ (denominator>>15); + + if (denominator == 0) + { + if (sign) + { + *quotient = 0x8000; + } + else + { + *quotient = 0x7FFF; + } + return(ARM_MATH_NANINF); + } + + arm_abs_q15(&numerator,&numerator,1); + arm_abs_q15(&denominator,&denominator,1); + + temp = ((q31_t)numerator << 15) / ((q31_t)denominator); + + shiftForNormalizing= 17 - __CLZ(temp); + if (shiftForNormalizing > 0) + { + *shift = shiftForNormalizing; + temp = temp >> shiftForNormalizing; + } + + if (sign) + { + temp = -temp; + } + + *quotient=temp; + + return(ARM_MATH_SUCCESS); +} + +/** + @} end of divide group + */ diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_divide_q31.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_divide_q31.c new file mode 100644 index 0000000..12b727f --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_divide_q31.c @@ -0,0 +1,110 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cos_q31.c + * Description: Fast cosine calculation for Q31 values + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" +#include "arm_common_tables.h" + +#include <stdlib.h> + +/** + @ingroup groupFastMath + */ + +/** + @defgroup divide Fixed point division + + */ + +/** + @addtogroup divide + @{ + */ + +/** + @brief Fixed point division + @param[in] numerator Numerator + @param[in] denominator Denominator + @param[out] quotient Quotient value normalized between -1.0 and 1.0 + @param[out] shift Shift left value to get the unnormalized quotient + @return error status + + When dividing by 0, an error ARM_MATH_NANINF is returned. And the quotient is forced + to the saturated negative or positive value. + */ + +arm_status arm_divide_q31(q31_t numerator, + q31_t denominator, + q31_t *quotient, + int16_t *shift) +{ + int16_t sign=0; + q63_t temp; + int16_t shiftForNormalizing; + + *shift = 0; + + sign = (numerator>>31) ^ (denominator>>31); + + if (denominator == 0) + { + if (sign) + { + *quotient = 0x80000000; + } + else + { + *quotient = 0x7FFFFFFF; + } + return(ARM_MATH_NANINF); + } + + arm_abs_q31(&numerator,&numerator,1); + arm_abs_q31(&denominator,&denominator,1); + + temp = ((q63_t)numerator << 31) / ((q63_t)denominator); + + shiftForNormalizing= 32 - __CLZ(temp >> 31); + if (shiftForNormalizing > 0) + { + *shift = shiftForNormalizing; + temp = temp >> shiftForNormalizing; + } + + if (sign) + { + temp = -temp; + } + + *quotient=(q31_t)temp; + + return(ARM_MATH_SUCCESS); +} + +/** + @} end of divide group + */ diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_sin_f32.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_sin_f32.c new file mode 100644 index 0000000..2fcd6bb --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_sin_f32.c @@ -0,0 +1,122 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_sin_f32.c + * Description: Fast sine calculation for floating-point values + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" +#include "arm_common_tables.h" + +/** + @ingroup groupFastMath + */ + +/** + @defgroup sin Sine + + Computes the trigonometric sine function using a combination of table lookup + and linear interpolation. There are separate functions for + Q15, Q31, and floating-point data types. + The input to the floating-point version is in radians while the + fixed-point Q15 and Q31 have a scaled input with the range + [0 +0.9999] mapping to [0 2*pi). The fixed-point range is chosen so that a + value of 2*pi wraps around to 0. + + The implementation is based on table lookup using 512 values together with linear interpolation. + The steps used are: + -# Calculation of the nearest integer table index + -# Compute the fractional portion (fract) of the table index. + -# The final result equals <code>(1.0f-fract)*a + fract*b;</code> + + where + <pre> + b = Table[index]; + c = Table[index+1]; + </pre> + */ + +/** + @addtogroup sin + @{ + */ + +/** + @brief Fast approximation to the trigonometric sine function for floating-point data. + @param[in] x input value in radians. + @return sin(x) + */ + +float32_t arm_sin_f32( + float32_t x) +{ + float32_t sinVal, fract, in; /* Temporary input, output variables */ + uint16_t index; /* Index variable */ + float32_t a, b; /* Two nearest output values */ + int32_t n; + float32_t findex; + + /* input x is in radians */ + /* Scale input to [0 1] range from [0 2*PI] , divide input by 2*pi */ + in = x * 0.159154943092f; + + /* Calculation of floor value of input */ + n = (int32_t) in; + + /* Make negative values towards -infinity */ + if (in < 0.0f) + { + n--; + } + + /* Map input value to [0 1] */ + in = in - (float32_t) n; + + /* Calculation of index of the table */ + findex = (float32_t)FAST_MATH_TABLE_SIZE * in; + index = (uint16_t)findex; + + /* when "in" is exactly 1, we need to rotate the index down to 0 */ + if (index >= FAST_MATH_TABLE_SIZE) { + index = 0; + findex -= (float32_t)FAST_MATH_TABLE_SIZE; + } + + /* fractional value calculation */ + fract = findex - (float32_t) index; + + /* Read two nearest values of input value from the sin table */ + a = sinTable_f32[index]; + b = sinTable_f32[index+1]; + + /* Linear interpolation process */ + sinVal = (1.0f - fract) * a + fract * b; + + /* Return output value */ + return (sinVal); +} + +/** + @} end of sin group + */ diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_sin_q15.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_sin_q15.c new file mode 100644 index 0000000..81f0d62 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_sin_q15.c @@ -0,0 +1,83 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_sin_q15.c + * Description: Fast sine calculation for Q15 values + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" +#include "arm_common_tables.h" + +/** + @ingroup groupFastMath + */ + +/** + @addtogroup sin + @{ + */ + +/** + @brief Fast approximation to the trigonometric sine function for Q15 data. + @param[in] x Scaled input value in radians + @return sin(x) + + The Q15 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*PI). + */ + +q15_t arm_sin_q15( + q15_t x) +{ + q15_t sinVal; /* Temporary input, output variables */ + int32_t index; /* Index variable */ + q15_t a, b; /* Two nearest output values */ + q15_t fract; /* Temporary values for fractional values */ + + + if (x < 0) + { /* convert negative numbers to corresponding positive ones */ + x = (uint16_t)x + 0x8000; + } + + /* Calculate the nearest index */ + index = (uint32_t)x >> FAST_MATH_Q15_SHIFT; + + /* Calculation of fractional value */ + fract = (x - (index << FAST_MATH_Q15_SHIFT)) << 9; + + /* Read two nearest values of input value from the sin table */ + a = sinTable_q15[index]; + b = sinTable_q15[index+1]; + + /* Linear interpolation process */ + sinVal = (q31_t) (0x8000 - fract) * a >> 16; + sinVal = (q15_t) ((((q31_t) sinVal << 16) + ((q31_t) fract * b)) >> 16); + + /* Return output value */ + return (sinVal << 1); +} + +/** + @} end of sin group + */ diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_sin_q31.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_sin_q31.c new file mode 100644 index 0000000..06262fd --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_sin_q31.c @@ -0,0 +1,82 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_sin_q31.c + * Description: Fast sine calculation for Q31 values + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" +#include "arm_common_tables.h" + +/** + @ingroup groupFastMath + */ + +/** + @addtogroup sin + @{ + */ + +/** + @brief Fast approximation to the trigonometric sine function for Q31 data. + @param[in] x Scaled input value in radians + @return sin(x) + + The Q31 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*PI). + */ + +q31_t arm_sin_q31( + q31_t x) +{ + q31_t sinVal; /* Temporary variables for input, output */ + int32_t index; /* Index variable */ + q31_t a, b; /* Two nearest output values */ + q31_t fract; /* Temporary values for fractional values */ + + if (x < 0) + { /* convert negative numbers to corresponding positive ones */ + x = (uint32_t)x + 0x80000000; + } + + /* Calculate the nearest index */ + index = (uint32_t)x >> FAST_MATH_Q31_SHIFT; + + /* Calculation of fractional value */ + fract = (x - (index << FAST_MATH_Q31_SHIFT)) << 9; + + /* Read two nearest values of input value from the sin table */ + a = sinTable_q31[index]; + b = sinTable_q31[index+1]; + + /* Linear interpolation process */ + sinVal = (q63_t) (0x80000000 - fract) * a >> 32; + sinVal = (q31_t) ((((q63_t) sinVal << 32) + ((q63_t) fract * b)) >> 32); + + /* Return output value */ + return (sinVal << 1); +} + +/** + @} end of sin group + */ diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_sqrt_q15.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_sqrt_q15.c new file mode 100644 index 0000000..defeebe --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_sqrt_q15.c @@ -0,0 +1,123 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_sqrt_q15.c + * Description: Q15 square root function + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" +#include "arm_common_tables.h" + +/** + @ingroup groupFastMath + */ + +/** + @addtogroup SQRT + @{ + */ + +/** + @brief Q15 square root function. + @param[in] in input value. The range of the input value is [0 +1) or 0x0000 to 0x7FFF + @param[out] pOut points to square root of input value + @return execution status + - \ref ARM_MATH_SUCCESS : input value is positive + - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0 + */ + +#define Q12QUARTER 0x2000 +arm_status arm_sqrt_q15( + q15_t in, + q15_t * pOut) +{ + q15_t number, var1, signBits1,temp; + + number = in; + + /* If the input is a positive number then compute the signBits. */ + if (number > 0) + { + signBits1 = __CLZ(number) - 17; + + /* Shift by the number of signBits1 */ + if ((signBits1 % 2) == 0) + { + number = number << signBits1; + } + else + { + number = number << (signBits1 - 1); + } + /* Start value for 1/sqrt(x) for the Newton iteration */ + var1 = sqrt_initial_lut_q15[(number>> 11) - (Q12QUARTER >> 11)]; + + /* 0.5 var1 * (3 - number * var1 * var1) */ + /* 1st iteration */ + + temp = ((q31_t) var1 * var1) >> 12; + temp = ((q31_t) number * temp) >> 15; + temp = 0x3000 - temp; + var1 = ((q31_t) var1 * temp) >> 13; + + temp = ((q31_t) var1 * var1) >> 12; + temp = ((q31_t) number * temp) >> 15; + temp = 0x3000 - temp; + var1 = ((q31_t) var1 * temp) >> 13; + + temp = ((q31_t) var1 * var1) >> 12; + temp = ((q31_t) number * temp) >> 15; + temp = 0x3000 - temp; + var1 = ((q31_t) var1 * temp) >> 13; + + /* Multiply the inverse square root with the original value */ + + var1 = ((q15_t) (((q31_t) number * var1) >> 12)); + + /* Shift the output down accordingly */ + if ((signBits1 % 2) == 0) + { + var1 = var1 >> (signBits1 / 2); + } + else + { + var1 = var1 >> ((signBits1 - 1) / 2); + } + *pOut = var1; + + + return (ARM_MATH_SUCCESS); + } + /* If the number is a negative number then store zero as its square root value */ + else + { + *pOut = 0; + + return (ARM_MATH_ARGUMENT_ERROR); + } +} + +/** + @} end of SQRT group + */ diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_sqrt_q31.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_sqrt_q31.c new file mode 100644 index 0000000..8747670 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_sqrt_q31.c @@ -0,0 +1,126 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_sqrt_q31.c + * Description: Q31 square root function + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" +#include "arm_common_tables.h" + +/** + @ingroup groupFastMath + */ + +/** + @addtogroup SQRT + @{ + */ + +/** + @brief Q31 square root function. + @param[in] in input value. The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF + @param[out] pOut points to square root of input value + @return execution status + - \ref ARM_MATH_SUCCESS : input value is positive + - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0 + */ +#define Q28QUARTER 0x20000000 + +arm_status arm_sqrt_q31( + q31_t in, + q31_t * pOut) +{ + q31_t number, var1, signBits1 ,temp; + + number = in; + + /* If the input is a positive number then compute the signBits. */ + if (number > 0) + { + signBits1 = __CLZ(number) - 1; + + /* Shift by the number of signBits1 */ + if ((signBits1 % 2) == 0) + { + number = number << signBits1; + } + else + { + number = number << (signBits1 - 1); + } + + /* Start value for 1/sqrt(x) for the Newton iteration */ + var1 = sqrt_initial_lut_q31[(number>> 26) - (Q28QUARTER >> 26)]; + + /* 0.5 var1 * (3 - number * var1 * var1) */ + + /* 1st iteration */ + + temp = ((q63_t) var1 * var1) >> 28; + temp = ((q63_t) number * temp) >> 31; + temp = 0x30000000 - temp; + var1 = ((q63_t) var1 * temp) >> 29; + + + /* 2nd iteration */ + temp = ((q63_t) var1 * var1) >> 28; + temp = ((q63_t) number * temp) >> 31; + temp = 0x30000000 - temp; + var1 = ((q63_t) var1 * temp) >> 29; + + /* 3nd iteration */ + temp = ((q63_t) var1 * var1) >> 28; + temp = ((q63_t) number * temp) >> 31; + temp = 0x30000000 - temp; + var1 = ((q63_t) var1 * temp) >> 29; + + /* Multiply the inverse square root with the original value */ + var1 = ((q31_t) (((q63_t) number * var1) >> 28)); + + /* Shift the output down accordingly */ + if ((signBits1 % 2) == 0) + { + var1 = var1 >> (signBits1 / 2); + } + else + { + var1 = var1 >> ((signBits1 - 1) / 2); + } + *pOut = var1; + + return (ARM_MATH_SUCCESS); + } + /* If the number is a negative number then store zero as its square root value */ + else + { + *pOut = 0; + + return (ARM_MATH_ARGUMENT_ERROR); + } +} + +/** + @} end of SQRT group + */ diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f16.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f16.c new file mode 100644 index 0000000..dd7c06e --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f16.c @@ -0,0 +1,82 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_vlog_f16.c + * Description: Fast vectorized log + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#include "arm_common_tables.h" + +#include "arm_vec_math_f16.h" + + +void arm_vexp_f16( + const float16_t * pSrc, + float16_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + + f16x8_t src; + f16x8_t dst; + + blkCnt = blockSize >> 3; + + while (blkCnt > 0U) + { + src = vld1q(pSrc); + dst = vexpq_f16(src); + vst1q(pDst, dst); + + pSrc += 8; + pDst += 8; + /* Decrement loop counter */ + blkCnt--; + } + + blkCnt = blockSize & 7; +#else + blkCnt = blockSize; +#endif + + while (blkCnt > 0U) + { + /* C = log(A) */ + + /* Calculate log and store result in destination buffer. */ + *pDst++ = (_Float16)expf((float32_t)*pSrc++); + + /* Decrement loop counter */ + blkCnt--; + } +} + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f32.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f32.c new file mode 100644 index 0000000..43b2a86 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f32.c @@ -0,0 +1,97 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_vlog_f32.c + * Description: Fast vectorized log + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" +#include "arm_common_tables.h" + +#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM) || defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include "arm_vec_math.h" +#endif + +void arm_vexp_f32( + const float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; + +#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) + f32x4_t src; + f32x4_t dst; + + blkCnt = blockSize >> 2; + + while (blkCnt > 0U) + { + src = vld1q(pSrc); + dst = vexpq_f32(src); + vst1q(pDst, dst); + + pSrc += 4; + pDst += 4; + /* Decrement loop counter */ + blkCnt--; + } + + blkCnt = blockSize & 3; +#else +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) + f32x4_t src; + f32x4_t dst; + + blkCnt = blockSize >> 2; + + while (blkCnt > 0U) + { + src = vld1q_f32(pSrc); + dst = vexpq_f32(src); + vst1q_f32(pDst, dst); + + pSrc += 4; + pDst += 4; + /* Decrement loop counter */ + blkCnt--; + } + + blkCnt = blockSize & 3; +#else + blkCnt = blockSize; +#endif +#endif + + while (blkCnt > 0U) + { + /* C = log(A) */ + + /* Calculate log and store result in destination buffer. */ + *pDst++ = expf(*pSrc++); + + /* Decrement loop counter */ + blkCnt--; + } +} diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f64.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f64.c new file mode 100644 index 0000000..76f80ff --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vexp_f64.c @@ -0,0 +1,51 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_vlog_f64.c + * Description: Fast vectorized log + * + * $Date: 13 September 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" +#include "arm_common_tables.h" + +void arm_vexp_f64( + const float64_t * pSrc, + float64_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; + + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* C = log(A) */ + + /* Calculate log and store result in destination buffer. */ + *pDst++ = exp(*pSrc++); + + /* Decrement loop counter */ + blkCnt--; + } +} diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vinverse_f16.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vinverse_f16.c new file mode 100644 index 0000000..1067d02 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vinverse_f16.c @@ -0,0 +1,79 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_vinverse_f16.c + * Description: Fast vectorized inverse + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +#include "arm_common_tables.h" + +#include "arm_vec_math_f16.h" + +void arm_vinverse_f16( + const float16_t * pSrc, + float16_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + + f16x8_t src; + f16x8_t dst; + + blkCnt = blockSize >> 3; + + while (blkCnt > 0U) + { + src = vld1q(pSrc); + dst = vrecip_hiprec_f16(src); + vst1q(pDst, dst); + + pSrc += 8; + pDst += 8; + /* Decrement loop counter */ + blkCnt--; + } + + blkCnt = blockSize & 7; +#else + blkCnt = blockSize; +#endif + + while (blkCnt > 0U) + { + + *pDst++ = 1.0f16 / (_Float16)*pSrc++; + + /* Decrement loop counter */ + blkCnt--; + } +} + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ + diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f16.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f16.c new file mode 100644 index 0000000..0821d5a --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f16.c @@ -0,0 +1,222 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_vlog_f16.c + * Description: Fast vectorized log + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions_f16.h" +#include "dsp/support_functions_f16.h" + +#if defined(ARM_FLOAT16_SUPPORTED) + +/* Degree of the polynomial approximation */ +#define NB_DEG_LOGF16 3 + +/* +Related to the Log2 of the number of approximations. +For instance, with 3 there are 1 + 2^3 polynomials +*/ +#define NB_DIV_LOGF16 3 + +/* Length of the LUT table */ +#define NB_LUT_LOGF16 (NB_DEG_LOGF16+1)*(1 + (1<<NB_DIV_LOGF16)) + + +/* + +LUT of polynomial approximations. + +Could be generated with: + +ClearAll[lut, coefs, nb, deg]; +nb = 3; +deg = 3; +lut = Table[ + MiniMaxApproximation[ + Log[x/2^nb + i], {x, {10^-6, 1.0/2^nb}, deg, 0}, + MaxIterations -> 1000][[2, 1]], {i, 1, 2, (1.0/2^nb)}]; +coefs = Chop@Flatten[CoefficientList[lut, x]]; + +*/ +static float16_t lut_logf16[NB_LUT_LOGF16]={ + 0,0.125,-0.00781197,0.00063974,0.117783, + 0.111111,-0.00617212,0.000447935,0.223144, + 0.1,-0.00499952,0.000327193,0.318454,0.0909091, + -0.00413191,0.000246234,0.405465,0.0833333, + -0.00347199,0.000189928,0.485508,0.0769231, + -0.00295841,0.00014956,0.559616,0.0714286, + -0.0025509,0.000119868,0.628609,0.0666667, + -0.00222213,0.0000975436,0.693147, + 0.0625,-0.00195305,0.0000804357}; + + +float16_t logf16_scalar(float16_t x) +{ + int16_t i = arm_typecast_s16_f16(x); + + int32_t vecExpUnBiased = (i >> 10) - 15; + i = i - (vecExpUnBiased << 10); + float16_t vecTmpFlt1 = arm_typecast_f16_s16(i); + + float16_t *lut; + int n; + float16_t tmp,v; + + tmp = ((_Float16)vecTmpFlt1 - 1.0f16) * (1 << NB_DIV_LOGF16); + n = (int)floor((double)tmp); + v = (_Float16)tmp - (_Float16)n; + + lut = lut_logf16 + n * (1+NB_DEG_LOGF16); + + float16_t res = lut[NB_DEG_LOGF16-1]; + for(int j=NB_DEG_LOGF16-2; j >=0 ; j--) + { + res = (_Float16)lut[j] + (_Float16)v * (_Float16)res; + } + + res = (_Float16)res + 0.693147f16 * (_Float16)vecExpUnBiased; + + + return(res); +} + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_common_tables.h" +#include "arm_vec_math_f16.h" + + +float16x8_t vlogq_lut_f16(float16x8_t vecIn) +{ + int16x8_t i = vreinterpretq_s16_f16(vecIn); + + int16x8_t vecExpUnBiased = vsubq_n_s16(vshrq_n_s16(i,10), 15); + i = vsubq_s16(i,vshlq_n_s16(vecExpUnBiased,10)); + float16x8_t vecTmpFlt1 = vreinterpretq_f16_s16(i); + + + float16x8_t lutV; + int16x8_t n; + int16x8_t offset; + + float16x8_t tmp,v,res; + + tmp = vmulq_n_f16(vsubq_n_f16(vecTmpFlt1,1.0f16),(_Float16)(1 << NB_DIV_LOGF16)); + + n = vcvtq_s16_f16(tmp); + v = vsubq_f16(tmp,vcvtq_f16_s16(n)); + + + offset = vmulq_n_s16(n,(1+NB_DEG_LOGF16)); + offset = vaddq_n_s16(offset,NB_DEG_LOGF16-1); + + res = vldrhq_gather_shifted_offset_f16(lut_logf16,(uint16x8_t)offset); + offset = vsubq_n_s16(offset,1); + + for(int j=NB_DEG_LOGF16-2; j >=0 ; j--) + { + lutV = vldrhq_gather_shifted_offset_f16(lut_logf16,(uint16x8_t)offset); + res = vfmaq_f16(lutV,v,res); + offset = vsubq_n_s16(offset,1); + + } + + res = vfmaq_n_f16(res,vcvtq_f16_s16(vecExpUnBiased),0.693147f16); + + + return(res); + +} + +#endif + +/** + @ingroup groupFastMath + */ + +/** + @addtogroup vlog + @{ + */ + +/** + @brief Floating-point vector of log values. + @param[in] pSrc points to the input vector + @param[out] pDst points to the output vector + @param[in] blockSize number of samples in each vector + @return none + */ + + +void arm_vlog_f16( + const float16_t * pSrc, + float16_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; + +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + f16x8_t src; + f16x8_t dst; + + blkCnt = blockSize >> 3; + + while (blkCnt > 0U) + { + src = vld1q(pSrc); + dst = vlogq_lut_f16(src); + vst1q(pDst, dst); + + pSrc += 8; + pDst += 8; + /* Decrement loop counter */ + blkCnt--; + } + + blkCnt = blockSize & 7; +#else + blkCnt = blockSize; +#endif + + while (blkCnt > 0U) + { + /* C = log(A) */ + + /* Calculate log and store result in destination buffer. */ + *pDst++ = logf16_scalar(*pSrc++); + + /* Decrement loop counter */ + blkCnt--; + } +} + + + +/** + @} end of vlog group + */ + + +#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f32.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f32.c new file mode 100644 index 0000000..888a11f --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f32.c @@ -0,0 +1,119 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_vlog_f32.c + * Description: Fast vectorized log + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" +#include "arm_common_tables.h" + + +/** + @ingroup groupFastMath + */ + + +/** + @defgroup vlog Vector Log + + Compute the log values of a vector of samples. + + */ + +/** + @addtogroup vlog + @{ + */ + +#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM) || defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) +#include "arm_vec_math.h" +#endif + +void arm_vlog_f32( + const float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; + +#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE) + f32x4_t src; + f32x4_t dst; + + blkCnt = blockSize >> 2; + + while (blkCnt > 0U) + { + src = vld1q(pSrc); + dst = vlogq_f32(src); + vst1q(pDst, dst); + + pSrc += 4; + pDst += 4; + /* Decrement loop counter */ + blkCnt--; + } + + blkCnt = blockSize & 3; +#else +#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) + f32x4_t src; + f32x4_t dst; + + blkCnt = blockSize >> 2; + + while (blkCnt > 0U) + { + src = vld1q_f32(pSrc); + dst = vlogq_f32(src); + vst1q_f32(pDst, dst); + + pSrc += 4; + pDst += 4; + /* Decrement loop counter */ + blkCnt--; + } + + blkCnt = blockSize & 3; +#else + blkCnt = blockSize; +#endif +#endif + + while (blkCnt > 0U) + { + /* C = log(A) */ + + /* Calculate log and store result in destination buffer. */ + *pDst++ = logf(*pSrc++); + + /* Decrement loop counter */ + blkCnt--; + } +} + +/** + @} end of vlog group + */ diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f64.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f64.c new file mode 100644 index 0000000..a60fff6 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_f64.c @@ -0,0 +1,51 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_vlog_f64.c + * Description: Fast vectorized log + * + * $Date: 13 September 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" +#include "arm_common_tables.h" + +void arm_vlog_f64( + const float64_t * pSrc, + float64_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; + + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* C = log(A) */ + + /* Calculate log and store result in destination buffer. */ + *pDst++ = log(*pSrc++); + + /* Decrement loop counter */ + blkCnt--; + } +} diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_q15.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_q15.c new file mode 100644 index 0000000..896988d --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_q15.c @@ -0,0 +1,264 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_vlog_q15 + * Description: Q15 vector log + * + * $Date: 19 July 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include "dsp/fast_math_functions.h" + + +#define LOG_Q15_ACCURACY 15 + +/* Bit to represent the normalization factor + It is Ceiling[Log2[LOG_Q15_ACCURACY]] of the previous value. + The Log2 algorithm is assuming that the value x is + 1 <= x < 2. + + But input value could be as small a 2^-LOG_Q15_ACCURACY + which would give an integer part of -15. +*/ +#define LOG_Q15_INTEGER_PART 4 + +/* 2.0 in q14 */ +#define LOQ_Q15_THRESHOLD (1u << LOG_Q15_ACCURACY) + +/* HALF */ +#define LOQ_Q15_Q16_HALF LOQ_Q15_THRESHOLD +#define LOQ_Q15_Q14_HALF (LOQ_Q15_Q16_HALF >> 2) + + +/* 1.0 / Log2[Exp[1]] in q15 */ +#define LOG_Q15_INVLOG2EXP 0x58b9u + + +/* Clay Turner algorithm */ +static uint16_t arm_scalar_log_q15(uint16_t src) +{ + int i; + + int16_t c = __CLZ(src)-16; + int16_t normalization=0; + + /* 0.5 in q11 */ + uint16_t inc = LOQ_Q15_Q16_HALF >> (LOG_Q15_INTEGER_PART + 1); + + /* Will compute y = log2(x) for 1 <= x < 2.0 */ + uint16_t x; + + /* q11 */ + uint16_t y=0; + + /* q11 */ + int16_t tmp; + + + /* Normalize and convert to q14 format */ + x = src; + if ((c-1) < 0) + { + x = x >> (1-c); + } + else + { + x = x << (c-1); + } + normalization = c; + + + + /* Compute the Log2. Result is in q11 instead of q16 + because we know 0 <= y < 1.0 but + we want a result allowing to do a + product on int16 rather than having to go + through int32 + */ + for(i = 0; i < LOG_Q15_ACCURACY ; i++) + { + x = (((int32_t)x*x)) >> (LOG_Q15_ACCURACY - 1); + + if (x >= LOQ_Q15_THRESHOLD) + { + y += inc ; + x = x >> 1; + } + inc = inc >> 1; + } + + + /* + Convert the Log2 to Log and apply normalization. + We compute (y - normalisation) * (1 / Log2[e]). + + */ + + /* q11 */ + //tmp = y - ((int32_t)normalization << (LOG_Q15_ACCURACY + 1)); + tmp = (int16_t)y - (normalization << (LOG_Q15_ACCURACY - LOG_Q15_INTEGER_PART)); + + /* q4.11 */ + y = ((int32_t)tmp * LOG_Q15_INVLOG2EXP) >> 15; + + return(y); + +} + +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + + +q15x8_t vlogq_q15(q15x8_t src) +{ + + int i; + + int16x8_t c = vclzq_s16(src); + int16x8_t normalization = c; + + + /* 0.5 in q11 */ + uint16_t inc = LOQ_Q15_Q16_HALF >> (LOG_Q15_INTEGER_PART + 1); + + /* Will compute y = log2(x) for 1 <= x < 2.0 */ + uint16x8_t x; + + + /* q11 */ + uint16x8_t y = vdupq_n_u16(0); + + + /* q11 */ + int16x8_t vtmp; + + + mve_pred16_t p; + + /* Normalize and convert to q14 format */ + + + vtmp = vsubq_n_s16(c,1); + x = vshlq_u16((uint16x8_t)src,vtmp); + + + /* Compute the Log2. Result is in q11 instead of q16 + because we know 0 <= y < 1.0 but + we want a result allowing to do a + product on int16 rather than having to go + through int32 + */ + for(i = 0; i < LOG_Q15_ACCURACY ; i++) + { + x = vmulhq_u16(x,x); + x = vshlq_n_u16(x,2); + + + p = vcmphiq_u16(x,vdupq_n_u16(LOQ_Q15_THRESHOLD)); + y = vaddq_m_n_u16(y, y,inc,p); + x = vshrq_m_n_u16(x,x,1,p); + + inc = inc >> 1; + } + + + /* + Convert the Log2 to Log and apply normalization. + We compute (y - normalisation) * (1 / Log2[e]). + + */ + + /* q11 */ + // tmp = (int16_t)y - (normalization << (LOG_Q15_ACCURACY - LOG_Q15_INTEGER_PART)); + vtmp = vshlq_n_s16(normalization,LOG_Q15_ACCURACY - LOG_Q15_INTEGER_PART); + vtmp = vsubq_s16((int16x8_t)y,vtmp); + + + + /* q4.11 */ + // y = ((int32_t)tmp * LOG_Q15_INVLOG2EXP) >> 15; + vtmp = vqdmulhq_n_s16(vtmp,LOG_Q15_INVLOG2EXP); + + return(vtmp); +} +#endif + +/** + @ingroup groupFastMath + */ + +/** + @addtogroup vlog + @{ + */ + +/** + @brief q15 vector of log values. + @param[in] pSrc points to the input vector in q15 + @param[out] pDst points to the output vector in q4.11 + @param[in] blockSize number of samples in each vector + @return none + + */ + +void arm_vlog_q15( + const q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; /* loop counters */ + + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + q15x8_t src; + q15x8_t dst; + + blkCnt = blockSize >> 3; + + while (blkCnt > 0U) + { + src = vld1q(pSrc); + dst = vlogq_q15(src); + vst1q(pDst, dst); + + pSrc += 8; + pDst += 8; + /* Decrement loop counter */ + blkCnt--; + } + + blkCnt = blockSize & 7; + #else + blkCnt = blockSize; + #endif + + while (blkCnt > 0U) + { + *pDst++ = arm_scalar_log_q15(*pSrc++); + + /* Decrement loop counter */ + blkCnt--; + } +} + +/** + @} end of vlog group + */ diff --git a/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_q31.c b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_q31.c new file mode 100644 index 0000000..0053859 --- /dev/null +++ b/Drivers/CMSIS/DSP/Source/FastMathFunctions/arm_vlog_q31.c @@ -0,0 +1,258 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_vlog_q31 + * Description: Q31 vector log + * + * $Date: 19 July 2021 + * $Revision: V1.10.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dsp/fast_math_functions.h" + +#define LOG_Q31_ACCURACY 31 + +/* Bit to represent the normalization factor + It is Ceiling[Log2[LOG_Q31_ACCURACY]] of the previous value. + The Log2 algorithm is assuming that the value x is + 1 <= x < 2. + + But input value could be as small a 2^-LOG_Q31_ACCURACY + which would give an integer part of -31. +*/ +#define LOG_Q31_INTEGER_PART 5 + +/* 2.0 in Q30 */ +#define LOQ_Q31_THRESHOLD (1u << LOG_Q31_ACCURACY) + +/* HALF */ +#define LOQ_Q31_Q32_HALF LOQ_Q31_THRESHOLD +#define LOQ_Q31_Q30_HALF (LOQ_Q31_Q32_HALF >> 2) + + +/* 1.0 / Log2[Exp[1]] in Q31 */ +#define LOG_Q31_INVLOG2EXP 0x58b90bfbuL + +/* Clay Turner algorithm */ +static uint32_t arm_scalar_log_q31(uint32_t src) +{ + int32_t i; + + int32_t c = __CLZ(src); + int32_t normalization=0; + + /* 0.5 in q26 */ + uint32_t inc = LOQ_Q31_Q32_HALF >> (LOG_Q31_INTEGER_PART + 1); + + /* Will compute y = log2(x) for 1 <= x < 2.0 */ + uint32_t x; + + /* q26 */ + uint32_t y=0; + + /* q26 */ + int32_t tmp; + + + /* Normalize and convert to q30 format */ + x = src; + if ((c-1) < 0) + { + x = x >> (1-c); + } + else + { + x = x << (c-1); + } + normalization = c; + + /* Compute the Log2. Result is in q26 + because we know 0 <= y < 1.0 but + do not want to use q32 to allow + following computation with less instructions. + */ + for(i = 0; i < LOG_Q31_ACCURACY ; i++) + { + x = ((int64_t)x*x) >> (LOG_Q31_ACCURACY - 1); + + if (x >= LOQ_Q31_THRESHOLD) + { + y += inc ; + x = x >> 1; + } + inc = inc >> 1; + } + + /* + Convert the Log2 to Log and apply normalization. + We compute (y - normalisation) * (1 / Log2[e]). + + */ + + /* q26 */ + tmp = (int32_t)y - (normalization << (LOG_Q31_ACCURACY - LOG_Q31_INTEGER_PART)); + + + /* q5.26 */ + y = ((int64_t)tmp * LOG_Q31_INVLOG2EXP) >> 31; + + + + return(y); + +} + +#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + + +q31x4_t vlogq_q31(q31x4_t src) +{ + + int32_t i; + + int32x4_t c = vclzq_s32(src); + int32x4_t normalization = c; + + + /* 0.5 in q11 */ + uint32_t inc = LOQ_Q31_Q32_HALF >> (LOG_Q31_INTEGER_PART + 1); + + /* Will compute y = log2(x) for 1 <= x < 2.0 */ + uint32x4_t x; + + + /* q11 */ + uint32x4_t y = vdupq_n_u32(0); + + + /* q11 */ + int32x4_t vtmp; + + + mve_pred16_t p; + + /* Normalize and convert to q14 format */ + + + vtmp = vsubq_n_s32(c,1); + x = vshlq_u32((uint32x4_t)src,vtmp); + + + /* Compute the Log2. Result is in Q26 + because we know 0 <= y < 1.0 but + do not want to use Q32 to allow + following computation with less instructions. + */ + for(i = 0; i < LOG_Q31_ACCURACY ; i++) + { + x = vmulhq_u32(x,x); + x = vshlq_n_u32(x,2); + + + p = vcmphiq_u32(x,vdupq_n_u32(LOQ_Q31_THRESHOLD)); + y = vaddq_m_n_u32(y, y,inc,p); + x = vshrq_m_n_u32(x,x,1,p); + + inc = inc >> 1; + } + + + /* + Convert the Log2 to Log and apply normalization. + We compute (y - normalisation) * (1 / Log2[e]). + + */ + + /* q11 */ + // tmp = (int16_t)y - (normalization << (LOG_Q15_ACCURACY - LOG_Q15_INTEGER_PART)); + vtmp = vshlq_n_s32(normalization,LOG_Q31_ACCURACY - LOG_Q31_INTEGER_PART); + vtmp = vsubq_s32((int32x4_t)y,vtmp); + + + + /* q4.11 */ + // y = ((int32_t)tmp * LOG_Q15_INVLOG2EXP) >> 15; + vtmp = vqdmulhq_n_s32(vtmp,LOG_Q31_INVLOG2EXP); + + return(vtmp); +} +#endif + +/** + @ingroup groupFastMath + */ + +/** + @addtogroup vlog + @{ + */ + +/** + @brief q31 vector of log values. + @param[in] pSrc points to the input vector in q31 + @param[out] pDst points to the output vector q5.26 + @param[in] blockSize number of samples in each vector + @return none + + */ +void arm_vlog_q31( + const q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; /* loop counters */ + + #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) + + q31x4_t src; + q31x4_t dst; + + blkCnt = blockSize >> 2; + + while (blkCnt > 0U) + { + src = vld1q(pSrc); + dst = vlogq_q31(src); + vst1q(pDst, dst); + + pSrc += 4; + pDst += 4; + /* Decrement loop counter */ + blkCnt--; + } + + blkCnt = blockSize & 3; + #else + blkCnt = blockSize; + #endif + + while (blkCnt > 0U) + { + *pDst++=arm_scalar_log_q31(*pSrc++); + + blkCnt--; + } + +} + +/** + @} end of vlog group + */ |