/* ---------------------------------------------------------------------- * Project: CMSIS DSP Library * Title: arm_cmplx_mag_fast_q15.c * Description: Q15 complex magnitude * * $Date: 23 April 2021 * $Revision: V1.9.0 * * Target Processor: Cortex-M and Cortex-A cores * -------------------------------------------------------------------- */ /* * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the License); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an AS IS BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "dsp/complex_math_functions.h" /** @ingroup groupCmplxMath */ /** @addtogroup cmplx_mag @{ */ /** @brief Q15 complex magnitude. @param[in] pSrc points to input vector @param[out] pDst points to output vector @param[in] numSamples number of samples in each vector @return none @par Scaling and Overflow Behavior The function implements 1.15 by 1.15 multiplications and finally output is converted into 2.14 format. Fast functions are less accurate. This function will tend to clamp to 0 the too small values. So sqrt(x*x) = x will not always be true. */ #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) #include "arm_helium_utils.h" void arm_cmplx_mag_fast_q15( const q15_t * pSrc, q15_t * pDst, uint32_t numSamples) { int32_t blockSize = numSamples; /* loop counters */ uint32_t blkCnt; /* loop counters */ q15x8x2_t vecSrc; q15x8_t sum; q31_t in; q31_t acc0; blkCnt = blockSize >> 3; while (blkCnt > 0U) { vecSrc = vld2q(pSrc); pSrc += 16; sum = vqaddq(vmulhq(vecSrc.val[0], vecSrc.val[0]), vmulhq(vecSrc.val[1], vecSrc.val[1])); sum = vshrq(sum, 1); sum = FAST_VSQRT_Q15(sum); vst1q(pDst, sum); pDst += 8; /* * Decrement the blockSize loop counter */ blkCnt--; } /* * tail */ blkCnt = blockSize & 7; while (blkCnt > 0U) { /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */ in = read_q15x2_ia ((q15_t **) &pSrc); acc0 = __SMUAD(in, in); /* store result in 2.14 format in destination buffer. */ arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++); /* Decrement loop counter */ blkCnt--; } } #else void arm_cmplx_mag_fast_q15( const q15_t * pSrc, q15_t * pDst, uint32_t numSamples) { uint32_t blkCnt; /* Loop counter */ #if defined (ARM_MATH_DSP) q31_t in; q31_t acc0; /* Accumulators */ #else q15_t real, imag; /* Temporary input variables */ q31_t acc0, acc1; /* Accumulators */ #endif #if defined (ARM_MATH_LOOPUNROLL) /* Loop unrolling: Compute 4 outputs at a time */ blkCnt = numSamples >> 2U; while (blkCnt > 0U) { /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */ #if defined (ARM_MATH_DSP) in = read_q15x2_ia (&pSrc); acc0 = __SMUAD(in, in); /* store result in 2.14 format in destination buffer. */ arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++); in = read_q15x2_ia (&pSrc); acc0 = __SMUAD(in, in); arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++); in = read_q15x2_ia (&pSrc); acc0 = __SMUAD(in, in); arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++); in = read_q15x2_ia (&pSrc); acc0 = __SMUAD(in, in); arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++); #else real = *pSrc++; imag = *pSrc++; acc0 = ((q31_t) real * real); acc1 = ((q31_t) imag * imag); /* store result in 2.14 format in destination buffer. */ arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++); real = *pSrc++; imag = *pSrc++; acc0 = ((q31_t) real * real); acc1 = ((q31_t) imag * imag); arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++); real = *pSrc++; imag = *pSrc++; acc0 = ((q31_t) real * real); acc1 = ((q31_t) imag * imag); arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++); real = *pSrc++; imag = *pSrc++; acc0 = ((q31_t) real * real); acc1 = ((q31_t) imag * imag); arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++); #endif /* #if defined (ARM_MATH_DSP) */ /* Decrement loop counter */ blkCnt--; } /* Loop unrolling: Compute remaining outputs */ blkCnt = numSamples % 0x4U; #else /* Initialize blkCnt with number of samples */ blkCnt = numSamples; #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ while (blkCnt > 0U) { /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */ #if defined (ARM_MATH_DSP) in = read_q15x2_ia (&pSrc); acc0 = __SMUAD(in, in); /* store result in 2.14 format in destination buffer. */ arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++); #else real = *pSrc++; imag = *pSrc++; acc0 = ((q31_t) real * real); acc1 = ((q31_t) imag * imag); /* store result in 2.14 format in destination buffer. */ arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++); #endif /* Decrement loop counter */ blkCnt--; } } #endif /* defined(ARM_MATH_MVEI) */ /** @} end of cmplx_mag group */