diff options
Diffstat (limited to 'Drivers/CMSIS/NN/Source/SoftmaxFunctions')
9 files changed, 957 insertions, 0 deletions
diff --git a/Drivers/CMSIS/NN/Source/SoftmaxFunctions/CMakeLists.txt b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/CMakeLists.txt new file mode 100644 index 0000000..622b990 --- /dev/null +++ b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/CMakeLists.txt @@ -0,0 +1,22 @@ +# +# Copyright (c) 2019-2022 Arm Limited. +# +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the License); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +file(GLOB SRC "./*_s8.c") +target_sources(cmsis-nn PRIVATE ${SRC} arm_softmax_s8_s16.c + arm_softmax_s16.c + arm_nn_softmax_common_s8.c) diff --git a/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_nn_softmax_common_s8.c b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_nn_softmax_common_s8.c new file mode 100644 index 0000000..84d1ac8 --- /dev/null +++ b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_nn_softmax_common_s8.c @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2022 Arm Limited or its affiliates. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_nn_softmax_common_s8.c + * Description: Softmax with s8 input and output of s8 or s16. + * + * $Date: 17 March 2022 + * $Revision: V.1.0.1 + * + * Target Processor: Cortex-M processors + * -------------------------------------------------------------------- */ + +#include "arm_nnsupportfunctions.h" + +#define ACCUM_BITS 12 + +/** + * @ingroup groupSupport + */ + +/** + * @addtogroup Softmax + * @{ + */ + +/* + * Softmax function with s8 input and output of s8 or s16. + * + * Refer header file for details. + * + */ +void arm_nn_softmax_common_s8(const int8_t *input, + const int32_t num_rows, + const int32_t row_size, + const int32_t mult, + const int32_t shift, + const int32_t diff_min, + const bool int16_output, + void *output) +{ + const int32_t mask = (1 << shift); + + int32_t col = 0; + int32_t row_idx; + + for (row_idx = 0; row_idx < num_rows; ++row_idx) + { + // Find the maximum value in order to ensure numerical stability + int8_t max = *input; + + for (col = 1; col < row_size; ++col) + { + max = MAX(max, input[col]); + } + + int32_t diff = 0; + int32_t sum = 0; + + for (col = 0; col < row_size; ++col) + { + diff = input[col] - max; + if (diff >= diff_min) + { + sum += DIV_POW2(EXP_ON_NEG(MUL_SAT(diff * mask, mult)), ACCUM_BITS); + } + } + + const int32_t headroom = __CLZ(sum); + const int32_t shifted_scale = ONE_OVER1((sum > 0 ? sum << headroom : 0) - (1 << 31)); + int32_t bits_over_unit; + + if (int16_output) + { + int16_t *output_s16 = (int16_t *)output + row_idx * row_size; + + bits_over_unit = ACCUM_BITS - headroom + 15; + + for (col = 0; col < row_size; ++col) + { + diff = input[col] - max; + + if (diff >= diff_min) + { + const int32_t res = + DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) + + NN_Q15_MIN; + output_s16[col] = (int16_t)CLAMP(res, (int32_t)NN_Q15_MAX, (int32_t)NN_Q15_MIN); + } + else + { + output_s16[col] = NN_Q15_MIN; + } + } + } + else + { + int8_t *output_s8 = (int8_t *)output + row_idx * row_size; + + bits_over_unit = ACCUM_BITS - headroom + 23; + + for (col = 0; col < row_size; ++col) + { + diff = input[col] - max; + if (diff >= diff_min) + { + const int32_t res = + DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) + + NN_Q7_MIN; + output_s8[col] = (int8_t)CLAMP(res, (int32_t)NN_Q7_MAX, (int32_t)NN_Q7_MIN); + } + else + { + output_s8[col] = NN_Q7_MIN; + } + } + } + + input += row_size; + } +} + +/** + * @} end of NNBasicMath group + */ diff --git a/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c new file mode 100644 index 0000000..18f3e83 --- /dev/null +++ b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_softmax_q15.c + * Description: Q15 softmax function + * + * $Date: 09. October 2020 + * $Revision: V.1.0.1 + * + * Target Processor: Cortex-M cores + * + * -------------------------------------------------------------------- */ + +#include "arm_nnfunctions.h" + +/** + * @ingroup groupNN + */ + +/** + * @addtogroup Softmax + * @{ + */ + +/** + * @brief Q15 softmax function + * @param[in] vec_in pointer to input vector + * @param[in] dim_vec input vector dimention + * @param[out] p_out pointer to output vector + * + * @details + * + * Here, instead of typical e based softmax, we use + * 2-based softmax, i.e.,: + * + * y_i = 2^(x_i) / sum(2^x_j) + * + * The relative output will be different here. + * But mathematically, the gradient will be the same + * with a log(2) scaling factor. + * + */ + +void arm_softmax_q15(const q15_t *vec_in, const uint16_t dim_vec, q15_t *p_out) +{ + q31_t sum; + int16_t i; + uint8_t shift; + q31_t base; + base = -1 * 0x100000; + for (i = 0; i < dim_vec; i++) + { + if (vec_in[i] > base) + { + base = vec_in[i]; + } + } + + /* we ignore really small values + * anyway, they will be 0 after shrinking + * to q15_t + */ + base = base - 16; + + sum = 0; + + for (i = 0; i < dim_vec; i++) + { + if (vec_in[i] > base) + { + shift = (uint8_t)__USAT(vec_in[i] - base, 5); + sum += 0x1 << shift; + } + } + + /* This is effectively (0x1 << 32) / sum */ + int64_t div_base = 0x100000000LL; + int output_base = (int32_t)(div_base / sum); + + /* Final confidence will be output_base >> ( 17 - (vec_in[i] - base) ) + * so 32768 (0x1<<15) -> 100% confidence when sum = 0x1 << 16, output_base = 0x1 << 16 + * and vec_in[i]-base = 16 + */ + for (i = 0; i < dim_vec; i++) + { + if (vec_in[i] > base) + { + /* Here minimum value of 17+base-vec[i] will be 1 */ + shift = (uint8_t)__USAT(17 + base - vec_in[i], 5); + p_out[i] = (q15_t)__SSAT((output_base >> shift), 16); + } + else + { + p_out[i] = 0; + } + } +} + +/** + * @} end of Softmax group + */ diff --git a/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c new file mode 100644 index 0000000..58eb990 --- /dev/null +++ b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_softmax_q7.c + * Description: Q7 softmax function + * + * $Date: 09. October 2020 + * $Revision: V.1.0.2 + * + * Target Processor: Cortex-M cores + * + * -------------------------------------------------------------------- */ + +#include "arm_nnfunctions.h" + +/** + * @ingroup groupNN + */ + +/** + * @addtogroup Softmax + * @{ + */ + +/** + * @brief Q7 softmax function + * @param[in] vec_in pointer to input vector + * @param[in] dim_vec input vector dimention + * @param[out] p_out pointer to output vector + * + * @details + * + * Here, instead of typical natural logarithm e based softmax, we use + * 2-based softmax here, i.e.,: + * + * y_i = 2^(x_i) / sum(2^x_j) + * + * The relative output will be different here. + * But mathematically, the gradient will be the same + * with a log(2) scaling factor. + * + */ + +void arm_softmax_q7(const q7_t *vec_in, const uint16_t dim_vec, q7_t *p_out) +{ + q31_t sum; + int16_t i; + uint8_t shift; + q15_t base; + base = -128; + + /* We first search for the maximum */ + for (i = 0; i < dim_vec; i++) + { + if (vec_in[i] > base) + { + base = vec_in[i]; + } + } + + /* + * So the base is set to max-8, meaning + * that we ignore really small values. + * anyway, they will be 0 after shrinking to q7_t. + */ + base = base - (1 << 3); + + sum = 0; + + for (i = 0; i < dim_vec; i++) + { + shift = (uint8_t)__USAT(vec_in[i] - base, 3); + sum += 0x1 << shift; + } + + /* This is effectively (0x1 << 20) / sum */ + int output_base = (1 << 20) / sum; + + for (i = 0; i < dim_vec; i++) + { + + /* Here minimum value of 13+base-vec_in[i] will be 5 */ + shift = (uint8_t)__USAT(13 + base - vec_in[i], 5); + p_out[i] = (q7_t)__SSAT((output_base >> shift), 8); + } +} + +/** + * @} end of Softmax group + */ diff --git a/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s16.c b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s16.c new file mode 100644 index 0000000..e840893 --- /dev/null +++ b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s16.c @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2022 Arm Limited or its affiliates. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_softmax_s16.c + * Description: S16 softmax function + * + * $Date: 9 March 2022 + * $Revision: V.1.0.0 + * + * Target Processor: Cortex-M cores + * + * -------------------------------------------------------------------- */ + +#include "arm_nnfunctions.h" +#include "arm_nnsupportfunctions.h" + +/** + * @addtogroup Softmax + * @{ + */ + +arm_status arm_softmax_s16(const int16_t *input, + const int32_t num_rows, + const int32_t row_size, + const int32_t mult, + const int32_t shift, + const cmsis_nn_softmax_lut_s16 *softmax_params, + int16_t *output) +{ + int32_t col = 0; + int32_t row_idx; + + if (softmax_params->exp_lut == NULL || softmax_params->one_by_one_lut == NULL) + { + return ARM_MATH_ARGUMENT_ERROR; + } + + for (row_idx = 0; row_idx < num_rows; ++row_idx) + { + // Find the maximum value in order to ensure numerical stability + int16_t max = *input; + for (col = 1; col < row_size; ++col) + { + max = MAX(max, input[col]); + } + + int32_t diff = 0; + int32_t sum = 0; + int16_t *cached_exp_results = output; + + for (col = 0; col < row_size; ++col) + { + diff = input[col] - max; + const int32_t scaled_diff = arm_nn_requantize(diff, mult, shift); + const int32_t symmetric_scaled_diff = scaled_diff + NN_Q15_MAX; + const int16_t saturated_symmetric_scaled_diff = MIN(MAX(symmetric_scaled_diff, NN_Q15_MIN), NN_Q15_MAX); + + // Lookup from exp table and cache result for next step + const int16_t index = (256 + (saturated_symmetric_scaled_diff >> 7)); + const int16_t offset = saturated_symmetric_scaled_diff & 0x7f; + const int16_t base = softmax_params->exp_lut[index]; + const int16_t slope = softmax_params->exp_lut[index + 1] - softmax_params->exp_lut[index]; + const int16_t delta = (slope * offset + 64) >> 7; + const int16_t result = (base + delta); + cached_exp_results[col] = result; + + sum += cached_exp_results[col]; + } + + const int32_t headroom = __CLZ(sum); + + // Compute the reciprocal 1/sum + const int32_t shifted_sum = (((sum) << (headroom - 1)) + (1 << 13)) >> 14; + + // Since LUT computes 1/(1 + x), compute x = (sum - 1) => -65536 + // Since LUT expects a symmetrical input, recenter from [UINT16_MIN, UINT16_MAX] to [INT16_MIN, INT16_MAX] => + // -32768 ==> So in total -65536 -32768 => -98304 + const int16_t symmetric_shifted_sum = shifted_sum - 98304; + + // Lookup from one by one table + const int16_t index = (256 + (symmetric_shifted_sum >> 7)); + const int16_t offset = symmetric_shifted_sum & 0x7f; + const int16_t base = softmax_params->one_by_one_lut[index]; + const int16_t slope = softmax_params->one_by_one_lut[index + 1] - softmax_params->one_by_one_lut[index]; + const int16_t delta = (slope * offset + 64) >> 7; + const int16_t one_by_one_result = (base + delta); + + for (col = 0; col < row_size; ++col) + { + const int16_t right_shift = 30 - headroom; + int32_t result = (cached_exp_results[col] * one_by_one_result) >> right_shift; + result = (result + 1) >> 1; // Last shift position and insert round + output[col] = (int16_t)result; + } + + output += row_size; + input += row_size; + } + + return ARM_MATH_SUCCESS; +} + +/** + * @} end of Softmax group + */ diff --git a/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c new file mode 100644 index 0000000..fd70597 --- /dev/null +++ b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c @@ -0,0 +1,215 @@ +/* + * Copyright (C) 2010-2022 Arm Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_softmax_s8.c + * Description: S8 softmax function + * + * $Date: 9 March 2022 + * $Revision: V.2.1.0 + * + * Target Processor: Cortex-M cores + * + * -------------------------------------------------------------------- */ + +#include "arm_nnfunctions.h" +#include "arm_nnsupportfunctions.h" + +#define ACCUM_BITS 12 + +#ifdef ARM_MATH_MVEI +static int32x4_t arm_exp_on_negative_values_mve_32x4(int32x4_t val) +{ +#define SHIFT_START (24) + int32_t shift = SHIFT_START; + int32x4_t mask; + + const int32x4_t val_mod_minus_quarter = + vandq_s32(val, vdupq_n_s32((1 << SHIFT_START) - 1)) - vdupq_n_s32(1 << SHIFT_START); + const int32x4_t remainder = vsubq_s32(val_mod_minus_quarter, val); + const int32x4_t x = vaddq_n_s32(val_mod_minus_quarter << 5, 1 << 28); + const int32x4_t x2 = MUL_SAT_MVE(x, x); + const int32x4_t op_1 = DIV_POW2_MVE(MUL_SAT_MVE(x2, x2), 2) + MUL_SAT_MVE(x2, x); + const int32x4_t op_2 = x + DIV_POW2_MVE(MUL_SAT_MVE(op_1, vdupq_n_s32(715827883)) + x2, 1); + int32x4_t result = vdupq_n_s32(1895147668) + MUL_SAT_MVE(vdupq_n_s32(1895147668), op_2); + +#define SELECT_IF_NON_ZERO(x) \ + { \ + mve_pred16_t p = vcmpneq_n_s32(remainder & vdupq_n_s32(1 << shift++), 0); \ + mask = vmvnq_m_s32(vdupq_n_s32(0), vdupq_n_s32(0), p); \ + result = SELECT_USING_MASK(mask, MUL_SAT_MVE(result, vdupq_n_s32(x)), result); \ + } + + SELECT_IF_NON_ZERO(1672461947) + SELECT_IF_NON_ZERO(1302514674) + SELECT_IF_NON_ZERO(790015084) + SELECT_IF_NON_ZERO(290630308) + SELECT_IF_NON_ZERO(39332535) + SELECT_IF_NON_ZERO(720401) + SELECT_IF_NON_ZERO(242) + +#undef SELECT_IF_NON_ZERO + + mve_pred16_t p = vcmpeqq_n_s32(val, 0); + mask = vmvnq_m_s32(vdupq_n_s32(0), vdupq_n_s32(0), p); + + result = SELECT_USING_MASK(mask, vdupq_n_s32(NN_Q31_MAX), result); + return result; +} +#endif + +/** + * @ingroup groupNN + */ + +/** + * @addtogroup Softmax + * @{ + */ + +void arm_softmax_s8(const int8_t *input, + const int32_t num_rows, + const int32_t row_size, + const int32_t mult, + const int32_t shift, + const int32_t diff_min, + int8_t *output) +{ +#ifdef ARM_MATH_MVEI + +#define ACT_MIN ((int8_t)NN_Q7_MIN) +#define ACT_MAX ((int8_t)NN_Q7_MAX) + + const int32_t mask = (1 << shift); + + for (int i_num_rows = 0; i_num_rows < num_rows; ++i_num_rows) + { + int8_t max = ACT_MIN; + + int32_t vec_count = (row_size + 15) / 16; + uint32_t r_count = (uint32_t)row_size; + for (int i = 0; i < vec_count; i++) + { + mve_pred16_t p = vctp8q(r_count); + const int8x16_t ip = vldrbq_z_s8(&input[i * 16], p); + max = vmaxvq_p_s8(max, ip, p); + r_count -= 16; + } + + vec_count = row_size / 4; + int32_t idx = 0; + int32_t sum = 0; + + while (vec_count) + { + int32x4_t ip = vldrbq_s32(&input[idx * 4]); + ip = vsubq_n_s32(ip, max); + mve_pred16_t p = vcmpgeq_n_s32(ip, diff_min); + if (p != 0) + { + ip = vmulq_n_s32(ip, mask); + + int32x4_t res = MUL_SAT_MVE(ip, vdupq_n_s32(mult)); + + res = arm_exp_on_negative_values_mve_32x4(res); + res = DIV_POW2_MVE(res, ACCUM_BITS); + res = vpselq_s32(res, vdupq_n_s32(0), p); + sum += vaddvq_s32(res); + } + + vec_count--; + idx++; + } + + const int32_t tail_idx = row_size & ~3; + for (int i = 0; i < (row_size & 3); i++) + { + const int32_t diff = input[tail_idx + i] - max; + if (diff >= diff_min) + { + sum += DIV_POW2(EXP_ON_NEG(MUL_SAT(diff * mask, mult)), ACCUM_BITS); + } + } + + const int32_t headroom = __CLZ((uint32_t)sum); + const int32_t bits_over_unit = ACCUM_BITS - headroom + 23; + const int32_t shifted_scale = ONE_OVER1((sum > 0 ? sum << headroom : 0) - (1 << 31)); + + vec_count = row_size / 4; + idx = 0; + + while (vec_count) + { + int32x4_t ip = vldrbq_s32(&input[idx]); + ip = vsubq_n_s32(ip, max); + + mve_pred16_t p = vcmpgeq_n_s32(ip, diff_min); + + int32x4_t tmp_res; + + if (p != 0) + { + ip = vmulq_n_s32(ip, mask); + + tmp_res = MUL_SAT_MVE(ip, vdupq_n_s32(mult)); + tmp_res = arm_exp_on_negative_values_mve_32x4(tmp_res); + tmp_res = MUL_SAT_MVE(vdupq_n_s32(shifted_scale), tmp_res); + tmp_res = DIV_POW2_MVE(tmp_res, bits_over_unit); + tmp_res += vdupq_n_s32(ACT_MIN); + + tmp_res = vmaxq_s32(tmp_res, vdupq_n_s32(ACT_MIN)); + tmp_res = vminq_s32(tmp_res, vdupq_n_s32(ACT_MAX)); + tmp_res = vpselq_s32(tmp_res, vdupq_n_s32(ACT_MIN), p); + } + else + { + tmp_res = vdupq_n_s32(ACT_MIN); + } + vstrbq_s32(&output[idx], tmp_res); + vec_count--; + idx += 4; + } + + for (int i = 0; i < (row_size & 3); i++) + { + int32_t diff = input[tail_idx + i] - max; + if (diff >= diff_min) + { + const int32_t res = + DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) + + NN_Q7_MIN; + output[tail_idx + i] = (int8_t)CLAMP(res, (int32_t)ACT_MAX, (int32_t)ACT_MIN); + } + else + { + output[tail_idx + i] = ACT_MIN; + } + } + + input += row_size; + output += row_size; + } +#else + arm_nn_softmax_common_s8(input, num_rows, row_size, mult, shift, diff_min, false, (void *)output); +#endif +} + +/** + * @} end of Softmax group + */ diff --git a/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8_s16.c b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8_s16.c new file mode 100644 index 0000000..9ba0b9a --- /dev/null +++ b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8_s16.c @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2022 Arm Limited or its affiliates. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_softmax_s8_s16.c + * Description: S8 to s16 softmax function + * + * $Date: 7 January 2022 + * $Revision: V.1.0.0 + * + * Target Processor: Cortex-M cores + * + * -------------------------------------------------------------------- */ + +#include "arm_nnfunctions.h" +#include "arm_nnsupportfunctions.h" + +/** + * @ingroup groupNN + */ + +/** + * @addtogroup Softmax + * @{ + */ + +void arm_softmax_s8_s16(const int8_t *input, + const int32_t num_rows, + const int32_t row_size, + const int32_t mult, + const int32_t shift, + const int32_t diff_min, + int16_t *output) +{ + arm_nn_softmax_common_s8(input, num_rows, row_size, mult, shift, diff_min, true, (void *)output); +} +/** + * @} end of Softmax group + */ diff --git a/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_u8.c b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_u8.c new file mode 100644 index 0000000..c4df8f8 --- /dev/null +++ b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_u8.c @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_softmax_u8.c + * Description: U8 softmax function + * + * $Date: 09. October 2020 + * $Revision: V.1.0.2 + * + * Target Processor: Cortex-M CPUs + * + * -------------------------------------------------------------------- */ + +#include "arm_nnfunctions.h" +#include "arm_nnsupportfunctions.h" + +#define ACCUM_BITS 12 + +/** + * @ingroup groupNN + */ + +/** + * @addtogroup Softmax + * @{ + */ +void arm_softmax_u8(const uint8_t *input, + const int32_t num_rows, + const int32_t row_size, + const int32_t mult, + const int32_t shift, + const int32_t diff_min, + uint8_t *output) +{ + const int32_t mask = (1 << shift); + + int32_t col = 0; + int32_t row_idx; + + for (row_idx = 0; row_idx < num_rows; ++row_idx) + { + // Find the maximum value in order to ensure numerical stability + uint8_t max = *input; + + for (col = 1; col < row_size; ++col) + { + max = MAX(max, input[col]); + } + + int32_t diff = 0; + int32_t sum = 0; + + for (col = 0; col < row_size; ++col) + { + diff = input[col] - max; + if (diff >= diff_min) + { + sum += DIV_POW2(EXP_ON_NEG(MUL_SAT(diff * mask, mult)), ACCUM_BITS); + } + } + + const int32_t headroom = __CLZ((uint32_t)sum); + const int32_t bits_over_unit = ACCUM_BITS - headroom + 23; + const int32_t shifted_scale = ONE_OVER1((sum << headroom) - (1 << 31)); + + for (col = 0; col < row_size; ++col) + { + diff = input[col] - max; + if (diff >= diff_min) + { + const int32_t res = + DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit); + output[col] = (uint8_t)CLAMP(res, (int32_t)255, (int32_t)0); + } + else + { + output[col] = 0; + } + } + input += row_size; + output += row_size; + } +} +/** + * @} end of Softmax group + */
\ No newline at end of file diff --git a/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c new file mode 100644 index 0000000..66e892e --- /dev/null +++ b/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_softmax_with_batch_q7.c + * Description: Q7 softmax function + * + * $Date: 09. October 2020 + * $Revision: V.1.0.1 + * + * Target Processor: Cortex-M and Cortex-A cores + * + * -------------------------------------------------------------------- */ + +#include "arm_nnfunctions.h" + +/** + * @ingroup groupNN + */ + +/** + * @addtogroup Softmax + * @{ + */ + +/** + * @brief Q7 softmax function with batch parameter + * @param[in] vec_in pointer to input vector + * @param[in] nb_batches number of batches + * @param[in] dim_vec input vector dimention + * @param[out] p_out pointer to output vector + * + * @details + * + * Here, instead of typical natural logarithm e based softmax, we use + * 2-based softmax here, i.e.,: + * + * y_i = 2^(x_i) / sum(2^x_j) + * + * The relative output will be different here. + * But mathematically, the gradient will be the same + * with a log(2) scaling factor. + * + */ + +void arm_softmax_with_batch_q7(const q7_t *vec_in, const uint16_t nb_batches, const uint16_t dim_vec, q7_t *p_out) +{ + for (int i = 0; i < nb_batches; i++) + { + arm_softmax_q7(vec_in, dim_vec, p_out); + vec_in += dim_vec; + p_out += dim_vec; + } +} + +/** + * @} end of Softmax group + */ |