diff options
Diffstat (limited to 'Drivers/CMSIS/DSP/Source/TransformFunctions')
35 files changed, 12257 insertions, 14972 deletions
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/CMakeLists.txt b/Drivers/CMSIS/DSP/Source/TransformFunctions/CMakeLists.txt index e598f82..834409e 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/CMakeLists.txt +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/CMakeLists.txt @@ -1,213 +1,116 @@ -cmake_minimum_required (VERSION 3.14) - -project(CMSISDSPTransform) - -include(configLib) -include(configDsp) - -add_library(CMSISDSPTransform STATIC) -configLib(CMSISDSPTransform ${ROOT}) -configDsp(CMSISDSPTransform ${ROOT}) - -include(fft) -fft(CMSISDSPTransform) - -if (CONFIGTABLE AND ALLFFT) -target_compile_definitions(CMSISDSPTransform PUBLIC ARM_ALL_FFT_TABLES) -endif() - -target_sources(CMSISDSPTransform PRIVATE arm_bitreversal.c) -target_sources(CMSISDSPTransform PRIVATE arm_bitreversal2.c) - -if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16)) -target_sources(CMSISDSPTransform PRIVATE arm_bitreversal_f16.c) -endif() - -if (NOT CONFIGTABLE OR ALLFFT OR CFFT_F32_16 OR CFFT_F32_32 OR CFFT_F32_64 OR CFFT_F32_128 OR CFFT_F32_256 OR CFFT_F32_512 - OR CFFT_F32_1024 OR CFFT_F32_2048 OR CFFT_F32_4096) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_f32.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f32.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix8_f32.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_f32.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_f32.c) -endif() - -if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16)) -if (NOT CONFIGTABLE OR ALLFFT OR CFFT_F16_16 OR CFFT_F16_32 OR CFFT_F16_64 OR CFFT_F16_128 OR CFFT_F16_256 OR CFFT_F16_512 - OR CFFT_F16_1024 OR CFFT_F16_2048 OR CFFT_F16_4096) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_f16.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f16.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_f16.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_f16.c) -endif() -endif() - -if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16)) -if (NOT CONFIGTABLE OR ALLFFT OR RFFT_F16_128 OR RFFT_F16_512 OR RFFT_F16_2048 OR RFFT_F16_8192) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_f16.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f16.c) -endif() -endif() - -if (NOT CONFIGTABLE OR ALLFFT OR CFFT_F64_16 OR CFFT_F64_32 OR CFFT_F64_64 OR CFFT_F64_128 OR CFFT_F64_256 OR CFFT_F64_512 - OR CFFT_F64_1024 OR CFFT_F64_2048 OR CFFT_F64_4096) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_f64.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_f64.c) -endif() - -if (NOT CONFIGTABLE OR ALLFFT OR CFFT_Q15_16 OR CFFT_Q15_32 OR CFFT_Q15_64 OR CFFT_Q15_128 OR CFFT_Q15_256 OR CFFT_Q15_512 - OR CFFT_Q15_1024 OR CFFT_Q15_2048 OR CFFT_Q15_4096) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_q15.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q15.c) -endif() - -if (NOT CONFIGTABLE OR ALLFFT OR CFFT_Q31_16 OR CFFT_Q31_32 OR CFFT_Q31_64 OR CFFT_Q31_128 OR CFFT_Q31_256 OR CFFT_Q31_512 - OR CFFT_Q31_1024 OR CFFT_Q31_2048 OR CFFT_Q31_4096) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_q31.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q31.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_q31.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q31.c) -endif() - - -if (NOT CONFIGTABLE OR ALLFFT OR DCT4_F32_128 OR DCT4_F32_512 OR DCT4_F32_2048 OR DCT4_F32_8192) -target_sources(CMSISDSPTransform PRIVATE arm_dct4_f32.c) -target_sources(CMSISDSPTransform PRIVATE arm_dct4_init_f32.c) - -target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_f32.c) -target_sources(CMSISDSPTransform PRIVATE arm_rfft_f32.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_f32.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f32.c) -endif() - -if (NOT CONFIGTABLE OR ALLFFT OR DCT4_Q31_128 OR DCT4_Q31_512 OR DCT4_Q31_2048 OR DCT4_Q31_8192) -target_sources(CMSISDSPTransform PRIVATE arm_dct4_q31.c) -target_sources(CMSISDSPTransform PRIVATE arm_dct4_init_q31.c) - -target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q31.c) -target_sources(CMSISDSPTransform PRIVATE arm_rfft_q31.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_q31.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q31.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_q31.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q31.c) -endif() - -if (NOT CONFIGTABLE OR ALLFFT OR ALLFFT OR DCT4_Q15_128 OR DCT4_Q15_512 OR DCT4_Q15_2048 OR DCT4_Q15_8192) -target_sources(CMSISDSPTransform PRIVATE arm_dct4_init_q15.c) -target_sources(CMSISDSPTransform PRIVATE arm_dct4_q15.c) - -target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q15.c) -target_sources(CMSISDSPTransform PRIVATE arm_rfft_q15.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q15.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_q15.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c) -endif() - -if (NOT CONFIGTABLE OR ALLFFT OR RFFT_FAST_F32_32 OR RFFT_FAST_F32_64 OR RFFT_FAST_F32_128 - OR RFFT_FAST_F32_256 OR RFFT_FAST_F32_512 OR RFFT_FAST_F32_1024 OR RFFT_FAST_F32_2048 - OR RFFT_FAST_F32_4096 ) -target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_f32.c) -target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_init_f32.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_f32.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_f32.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix8_f32.c) -endif() - -if (NOT CONFIGTABLE OR ALLFFT OR RFFT_FAST_F64_32 OR RFFT_FAST_F64_64 OR RFFT_FAST_F64_128 - OR RFFT_FAST_F64_256 OR RFFT_FAST_F64_512 OR RFFT_FAST_F64_1024 OR RFFT_FAST_F64_2048 - OR RFFT_FAST_F64_4096 ) -target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_f64.c) -target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_init_f64.c) -endif() - -if ((NOT DISABLEFLOAT16)) -if (NOT CONFIGTABLE OR ALLFFT OR RFFT_FAST_F16_32 OR RFFT_FAST_F16_64 OR RFFT_FAST_F16_128 - OR RFFT_FAST_F16_256 OR RFFT_FAST_F16_512 OR RFFT_FAST_F16_1024 OR RFFT_FAST_F16_2048 - OR RFFT_FAST_F16_4096 ) -target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_f16.c) -target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_init_f16.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_f16.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_f16.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix8_f16.c) -endif() -endif() - -if (NOT CONFIGTABLE OR ALLFFT OR RFFT_F32_128 OR RFFT_F32_512 OR RFFT_F32_2048 OR RFFT_F32_8192) -target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_f32.c) -target_sources(CMSISDSPTransform PRIVATE arm_rfft_f32.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_f32.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f32.c) -endif() - -if (NOT CONFIGTABLE OR ALLFFT OR RFFT_Q15_32 OR RFFT_Q15_64 OR RFFT_Q15_128 OR RFFT_Q15_256 - OR RFFT_Q15_512 OR RFFT_Q15_1024 OR RFFT_Q15_2048 OR RFFT_Q15_4096 OR RFFT_Q15_8192) -target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q15.c) -target_sources(CMSISDSPTransform PRIVATE arm_rfft_q15.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q15.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c) -endif() - -if (NOT CONFIGTABLE OR ALLFFT OR RFFT_Q31_32 OR RFFT_Q31_64 OR RFFT_Q31_128 OR RFFT_Q31_256 - OR RFFT_Q31_512 OR RFFT_Q31_1024 OR RFFT_Q31_2048 OR RFFT_Q31_4096 OR RFFT_Q31_8192) -target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q31.c) -target_sources(CMSISDSPTransform PRIVATE arm_rfft_q31.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_q31.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q31.c) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q31.c) -endif() - -if (WRAPPER OR ARM_CFFT_RADIX2_Q15) - target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_init_q15.c) -endif() - -if (NOT CONFIGTABLE OR ALLFFT OR ARM_CFFT_RADIX4_Q15) - target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_q15.c) -endif() - -if (WRAPPER OR ARM_CFFT_RADIX2_Q31) - target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_init_q31.c) -endif() - -if (NOT CONFIGTABLE OR ALLFFT OR ARM_CFFT_RADIX4_Q31) - target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_q31.c) -endif() - -# For scipy or wrappers or benchmarks -if (WRAPPER) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_init_f32.c) -if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16)) -target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_init_f16.c) -endif() - - target_compile_definitions(CMSISDSPTransform PUBLIC ARM_TABLE_BITREV_1024) - target_compile_definitions(CMSISDSPTransform PUBLIC ARM_TABLE_TWIDDLECOEF_F32_4096) - target_compile_definitions(CMSISDSPTransform PUBLIC ARM_TABLE_TWIDDLECOEF_Q31_4096) - target_compile_definitions(CMSISDSPTransform PUBLIC ARM_TABLE_TWIDDLECOEF_Q15_4096) -if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16)) - target_compile_definitions(CMSISDSPTransform PUBLIC ARM_TABLE_TWIDDLECOEF_F16_4096) -endif() -endif() - -target_sources(CMSISDSPTransform PRIVATE arm_mfcc_init_f32.c) -target_sources(CMSISDSPTransform PRIVATE arm_mfcc_f32.c) - -target_sources(CMSISDSPTransform PRIVATE arm_mfcc_init_q31.c) -target_sources(CMSISDSPTransform PRIVATE arm_mfcc_q31.c) - -target_sources(CMSISDSPTransform PRIVATE arm_mfcc_init_q15.c) -target_sources(CMSISDSPTransform PRIVATE arm_mfcc_q15.c) - -if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16)) -target_sources(CMSISDSPTransform PRIVATE arm_mfcc_init_f16.c) -target_sources(CMSISDSPTransform PRIVATE arm_mfcc_f16.c) -endif() - -### Includes -target_include_directories(CMSISDSPTransform PUBLIC "${DSP}/Include") - - - +cmake_minimum_required (VERSION 3.6)
+
+project(CMSISDSPTransform)
+
+
+
+add_library(CMSISDSPTransform STATIC)
+
+include(fft)
+fft(CMSISDSPTransform)
+
+if (CONFIGTABLE AND ALLFFT)
+target_compile_definitions(CMSISDSPTransform PUBLIC ARM_ALL_FFT_TABLES)
+endif()
+
+target_sources(CMSISDSPTransform PRIVATE arm_bitreversal.c)
+target_sources(CMSISDSPTransform PRIVATE arm_bitreversal2.c)
+
+if (NOT CONFIGTABLE OR ALLFFT OR CFFT_F32_16 OR CFFT_F32_32 OR CFFT_F32_64 OR CFFT_F32_128 OR CFFT_F32_256 OR CFFT_F32_512
+ OR CFFT_F32_1024 OR CFFT_F32_2048 OR CFFT_F32_4096)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix8_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_f32.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR CFFT_Q15_16 OR CFFT_Q15_32 OR CFFT_Q15_64 OR CFFT_Q15_128 OR CFFT_Q15_256 OR CFFT_Q15_512
+ OR CFFT_Q15_1024 OR CFFT_Q15_2048 OR CFFT_Q15_4096)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR CFFT_Q31_16 OR CFFT_Q31_32 OR CFFT_Q31_64 OR CFFT_Q31_128 OR CFFT_Q31_256 OR CFFT_Q31_512
+ OR CFFT_Q31_1024 OR CFFT_Q31_2048 OR CFFT_Q31_4096)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_q31.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_init_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_init_q31.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR DCT4_F32_128 OR DCT4_F32_512 OR DCT4_F32_2048 OR DCT4_F32_8192)
+target_sources(CMSISDSPTransform PRIVATE arm_dct4_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_dct4_init_f32.c)
+
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f32.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR DCT4_Q31_128 OR DCT4_Q31_512 OR DCT4_Q31_2048 OR DCT4_Q31_8192)
+target_sources(CMSISDSPTransform PRIVATE arm_dct4_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_dct4_init_q31.c)
+
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q31.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR ALLFFT OR DCT4_Q15_128 OR DCT4_Q15_512 OR DCT4_Q15_2048 OR DCT4_Q15_8192)
+target_sources(CMSISDSPTransform PRIVATE arm_dct4_init_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_dct4_q15.c)
+
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR RFFT_FAST_F32_32 OR RFFT_FAST_F32_64 OR RFFT_FAST_F32_128
+ OR RFFT_FAST_F32_256 OR RFFT_FAST_F32_512 OR RFFT_FAST_F32_1024 OR RFFT_FAST_F32_2048
+ OR RFFT_FAST_F32_4096 )
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_init_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix8_f32.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR RFFT_F32_128 OR RFFT_F32_512 OR RFFT_F32_2048 OR RFFT_F32_8192)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f32.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR RFFT_Q15_32 OR RFFT_Q15_64 OR RFFT_Q15_128 OR RFFT_Q15_256
+ OR RFFT_Q15_512 OR RFFT_Q15_1024 OR RFFT_Q15_2048 OR RFFT_Q15_4096 OR RFFT_Q15_8192)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR RFFT_Q31_32 OR RFFT_Q31_64 OR RFFT_Q31_128 OR RFFT_Q31_256
+ OR RFFT_Q31_512 OR RFFT_Q31_1024 OR RFFT_Q31_2048 OR RFFT_Q31_4096 OR RFFT_Q31_8192)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q31.c)
+endif()
+
+configdsp(CMSISDSPTransform ..)
+
+### Includes
+target_include_directories(CMSISDSPTransform PUBLIC "${DSP}/../../Include")
+
+
+
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/TransformFunctions.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/TransformFunctions.c index f327801..fcd0d95 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/TransformFunctions.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/TransformFunctions.c @@ -1,83 +1,60 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: TransformFunctions.c - * Description: Combination of all transform function source files. - * - * $Date: 18. March 2019 - * $Revision: V1.0.0 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "arm_bitreversal.c" -#include "arm_bitreversal2.c" -#include "arm_cfft_f32.c" -#include "arm_cfft_f64.c" -#include "arm_cfft_q15.c" -#include "arm_cfft_q31.c" -#include "arm_cfft_init_f32.c" -#include "arm_cfft_init_f64.c" -#include "arm_cfft_init_q15.c" -#include "arm_cfft_init_q31.c" -#include "arm_cfft_radix2_f32.c" -#include "arm_cfft_radix2_q15.c" -#include "arm_cfft_radix2_q31.c" -#include "arm_cfft_radix4_f32.c" -#include "arm_cfft_radix4_q15.c" -#include "arm_cfft_radix4_q31.c" -#include "arm_cfft_radix8_f32.c" -#include "arm_rfft_fast_f32.c" -#include "arm_rfft_fast_f64.c" -#include "arm_rfft_fast_init_f32.c" -#include "arm_rfft_fast_init_f64.c" - -#include "arm_mfcc_init_f32.c" -#include "arm_mfcc_f32.c" - -#include "arm_mfcc_init_q31.c" -#include "arm_mfcc_q31.c" - -#include "arm_mfcc_init_q15.c" -#include "arm_mfcc_q15.c" - -/* Deprecated */ - -#include "arm_dct4_f32.c" -#include "arm_dct4_init_f32.c" -#include "arm_dct4_init_q15.c" -#include "arm_dct4_init_q31.c" -#include "arm_dct4_q15.c" -#include "arm_dct4_q31.c" - -#include "arm_rfft_f32.c" -#include "arm_rfft_q15.c" -#include "arm_rfft_q31.c" - -#include "arm_rfft_init_f32.c" -#include "arm_rfft_init_q15.c" -#include "arm_rfft_init_q31.c" - -#include "arm_cfft_radix4_init_f32.c" -#include "arm_cfft_radix4_init_q15.c" -#include "arm_cfft_radix4_init_q31.c" - -#include "arm_cfft_radix2_init_f32.c" -#include "arm_cfft_radix2_init_q15.c" -#include "arm_cfft_radix2_init_q31.c" +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: TransformFunctions.c
+ * Description: Combination of all transform function source files.
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.0.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_bitreversal.c"
+#include "arm_bitreversal2.c"
+#include "arm_cfft_f32.c"
+#include "arm_cfft_q15.c"
+#include "arm_cfft_q31.c"
+#include "arm_cfft_radix2_f32.c"
+#include "arm_cfft_radix2_init_f32.c"
+#include "arm_cfft_radix2_init_q15.c"
+#include "arm_cfft_radix2_init_q31.c"
+#include "arm_cfft_radix2_q15.c"
+#include "arm_cfft_radix2_q31.c"
+#include "arm_cfft_radix4_f32.c"
+#include "arm_cfft_radix4_init_f32.c"
+#include "arm_cfft_radix4_init_q15.c"
+#include "arm_cfft_radix4_init_q31.c"
+#include "arm_cfft_radix4_q15.c"
+#include "arm_cfft_radix4_q31.c"
+#include "arm_cfft_radix8_f32.c"
+#include "arm_dct4_f32.c"
+#include "arm_dct4_init_f32.c"
+#include "arm_dct4_init_q15.c"
+#include "arm_dct4_init_q31.c"
+#include "arm_dct4_q15.c"
+#include "arm_dct4_q31.c"
+#include "arm_rfft_f32.c"
+#include "arm_rfft_fast_f32.c"
+#include "arm_rfft_fast_init_f32.c"
+#include "arm_rfft_init_f32.c"
+#include "arm_rfft_init_q15.c"
+#include "arm_rfft_init_q31.c"
+#include "arm_rfft_q15.c"
+#include "arm_rfft_q31.c"
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal.c index 687a9e8..de95c59 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal.c @@ -1,230 +1,229 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_bitreversal.c - * Description: Bitreversal functions - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" -#include "arm_common_tables.h" - - -/** - @brief In-place floating-point bit reversal function. - @param[in,out] pSrc points to in-place floating-point data buffer - @param[in] fftSize length of FFT - @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table - @param[in] pBitRevTab points to bit reversal table - @return none - */ - -void arm_bitreversal_f32( - float32_t * pSrc, - uint16_t fftSize, - uint16_t bitRevFactor, - const uint16_t * pBitRevTab) -{ - uint16_t fftLenBy2, fftLenBy2p1; - uint16_t i, j; - float32_t in; - - /* Initializations */ - j = 0U; - fftLenBy2 = fftSize >> 1U; - fftLenBy2p1 = (fftSize >> 1U) + 1U; - - /* Bit Reversal Implementation */ - for (i = 0U; i <= (fftLenBy2 - 2U); i += 2U) - { - if (i < j) - { - /* pSrc[i] <-> pSrc[j]; */ - in = pSrc[2U * i]; - pSrc[2U * i] = pSrc[2U * j]; - pSrc[2U * j] = in; - - /* pSrc[i+1U] <-> pSrc[j+1U] */ - in = pSrc[(2U * i) + 1U]; - pSrc[(2U * i) + 1U] = pSrc[(2U * j) + 1U]; - pSrc[(2U * j) + 1U] = in; - - /* pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */ - in = pSrc[2U * (i + fftLenBy2p1)]; - pSrc[2U * (i + fftLenBy2p1)] = pSrc[2U * (j + fftLenBy2p1)]; - pSrc[2U * (j + fftLenBy2p1)] = in; - - /* pSrc[i+fftLenBy2p1+1U] <-> pSrc[j+fftLenBy2p1+1U] */ - in = pSrc[(2U * (i + fftLenBy2p1)) + 1U]; - pSrc[(2U * (i + fftLenBy2p1)) + 1U] = - pSrc[(2U * (j + fftLenBy2p1)) + 1U]; - pSrc[(2U * (j + fftLenBy2p1)) + 1U] = in; - - } - - /* pSrc[i+1U] <-> pSrc[j+1U] */ - in = pSrc[2U * (i + 1U)]; - pSrc[2U * (i + 1U)] = pSrc[2U * (j + fftLenBy2)]; - pSrc[2U * (j + fftLenBy2)] = in; - - /* pSrc[i+2U] <-> pSrc[j+2U] */ - in = pSrc[(2U * (i + 1U)) + 1U]; - pSrc[(2U * (i + 1U)) + 1U] = pSrc[(2U * (j + fftLenBy2)) + 1U]; - pSrc[(2U * (j + fftLenBy2)) + 1U] = in; - - /* Reading the index for the bit reversal */ - j = *pBitRevTab; - - /* Updating the bit reversal index depending on the fft length */ - pBitRevTab += bitRevFactor; - } -} - - -/** - @brief In-place Q31 bit reversal function. - @param[in,out] pSrc points to in-place Q31 data buffer. - @param[in] fftLen length of FFT. - @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table - @param[in] pBitRevTab points to bit reversal table - @return none -*/ - -void arm_bitreversal_q31( - q31_t * pSrc, - uint32_t fftLen, - uint16_t bitRevFactor, - const uint16_t * pBitRevTab) -{ - uint32_t fftLenBy2, fftLenBy2p1, i, j; - q31_t in; - - /* Initializations */ - j = 0U; - fftLenBy2 = fftLen / 2U; - fftLenBy2p1 = (fftLen / 2U) + 1U; - - /* Bit Reversal Implementation */ - for (i = 0U; i <= (fftLenBy2 - 2U); i += 2U) - { - if (i < j) - { - /* pSrc[i] <-> pSrc[j]; */ - in = pSrc[2U * i]; - pSrc[2U * i] = pSrc[2U * j]; - pSrc[2U * j] = in; - - /* pSrc[i+1U] <-> pSrc[j+1U] */ - in = pSrc[(2U * i) + 1U]; - pSrc[(2U * i) + 1U] = pSrc[(2U * j) + 1U]; - pSrc[(2U * j) + 1U] = in; - - /* pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */ - in = pSrc[2U * (i + fftLenBy2p1)]; - pSrc[2U * (i + fftLenBy2p1)] = pSrc[2U * (j + fftLenBy2p1)]; - pSrc[2U * (j + fftLenBy2p1)] = in; - - /* pSrc[i+fftLenBy2p1+1U] <-> pSrc[j+fftLenBy2p1+1U] */ - in = pSrc[(2U * (i + fftLenBy2p1)) + 1U]; - pSrc[(2U * (i + fftLenBy2p1)) + 1U] = - pSrc[(2U * (j + fftLenBy2p1)) + 1U]; - pSrc[(2U * (j + fftLenBy2p1)) + 1U] = in; - - } - - /* pSrc[i+1U] <-> pSrc[j+1U] */ - in = pSrc[2U * (i + 1U)]; - pSrc[2U * (i + 1U)] = pSrc[2U * (j + fftLenBy2)]; - pSrc[2U * (j + fftLenBy2)] = in; - - /* pSrc[i+2U] <-> pSrc[j+2U] */ - in = pSrc[(2U * (i + 1U)) + 1U]; - pSrc[(2U * (i + 1U)) + 1U] = pSrc[(2U * (j + fftLenBy2)) + 1U]; - pSrc[(2U * (j + fftLenBy2)) + 1U] = in; - - /* Reading the index for the bit reversal */ - j = *pBitRevTab; - - /* Updating the bit reversal index depending on the fft length */ - pBitRevTab += bitRevFactor; - } -} - - - -/** - @brief In-place Q15 bit reversal function. - @param[in,out] pSrc16 points to in-place Q15 data buffer - @param[in] fftLen length of FFT - @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table - @param[in] pBitRevTab points to bit reversal table - @return none -*/ - -void arm_bitreversal_q15( - q15_t * pSrc16, - uint32_t fftLen, - uint16_t bitRevFactor, - const uint16_t * pBitRevTab) -{ - q31_t *pSrc = (q31_t *) pSrc16; - q31_t in; - uint32_t fftLenBy2, fftLenBy2p1; - uint32_t i, j; - - /* Initializations */ - j = 0U; - fftLenBy2 = fftLen / 2U; - fftLenBy2p1 = (fftLen / 2U) + 1U; - - /* Bit Reversal Implementation */ - for (i = 0U; i <= (fftLenBy2 - 2U); i += 2U) - { - if (i < j) - { - /* pSrc[i] <-> pSrc[j]; */ - /* pSrc[i+1U] <-> pSrc[j+1U] */ - in = pSrc[i]; - pSrc[i] = pSrc[j]; - pSrc[j] = in; - - /* pSrc[i + fftLenBy2p1] <-> pSrc[j + fftLenBy2p1]; */ - /* pSrc[i + fftLenBy2p1+1U] <-> pSrc[j + fftLenBy2p1+1U] */ - in = pSrc[i + fftLenBy2p1]; - pSrc[i + fftLenBy2p1] = pSrc[j + fftLenBy2p1]; - pSrc[j + fftLenBy2p1] = in; - } - - /* pSrc[i+1U] <-> pSrc[j+fftLenBy2]; */ - /* pSrc[i+2] <-> pSrc[j+fftLenBy2+1U] */ - in = pSrc[i + 1U]; - pSrc[i + 1U] = pSrc[j + fftLenBy2]; - pSrc[j + fftLenBy2] = in; - - /* Reading the index for the bit reversal */ - j = *pBitRevTab; - - /* Updating the bit reversal index depending on the fft length */ - pBitRevTab += bitRevFactor; - } -} +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_bitreversal.c
+ * Description: Bitreversal functions
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**
+ @brief In-place floating-point bit reversal function.
+ @param[in,out] pSrc points to in-place floating-point data buffer
+ @param[in] fftSize length of FFT
+ @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
+ @param[in] pBitRevTab points to bit reversal table
+ @return none
+ */
+
+void arm_bitreversal_f32(
+ float32_t * pSrc,
+ uint16_t fftSize,
+ uint16_t bitRevFactor,
+ const uint16_t * pBitRevTab)
+{
+ uint16_t fftLenBy2, fftLenBy2p1;
+ uint16_t i, j;
+ float32_t in;
+
+ /* Initializations */
+ j = 0U;
+ fftLenBy2 = fftSize >> 1U;
+ fftLenBy2p1 = (fftSize >> 1U) + 1U;
+
+ /* Bit Reversal Implementation */
+ for (i = 0U; i <= (fftLenBy2 - 2U); i += 2U)
+ {
+ if (i < j)
+ {
+ /* pSrc[i] <-> pSrc[j]; */
+ in = pSrc[2U * i];
+ pSrc[2U * i] = pSrc[2U * j];
+ pSrc[2U * j] = in;
+
+ /* pSrc[i+1U] <-> pSrc[j+1U] */
+ in = pSrc[(2U * i) + 1U];
+ pSrc[(2U * i) + 1U] = pSrc[(2U * j) + 1U];
+ pSrc[(2U * j) + 1U] = in;
+
+ /* pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */
+ in = pSrc[2U * (i + fftLenBy2p1)];
+ pSrc[2U * (i + fftLenBy2p1)] = pSrc[2U * (j + fftLenBy2p1)];
+ pSrc[2U * (j + fftLenBy2p1)] = in;
+
+ /* pSrc[i+fftLenBy2p1+1U] <-> pSrc[j+fftLenBy2p1+1U] */
+ in = pSrc[(2U * (i + fftLenBy2p1)) + 1U];
+ pSrc[(2U * (i + fftLenBy2p1)) + 1U] =
+ pSrc[(2U * (j + fftLenBy2p1)) + 1U];
+ pSrc[(2U * (j + fftLenBy2p1)) + 1U] = in;
+
+ }
+
+ /* pSrc[i+1U] <-> pSrc[j+1U] */
+ in = pSrc[2U * (i + 1U)];
+ pSrc[2U * (i + 1U)] = pSrc[2U * (j + fftLenBy2)];
+ pSrc[2U * (j + fftLenBy2)] = in;
+
+ /* pSrc[i+2U] <-> pSrc[j+2U] */
+ in = pSrc[(2U * (i + 1U)) + 1U];
+ pSrc[(2U * (i + 1U)) + 1U] = pSrc[(2U * (j + fftLenBy2)) + 1U];
+ pSrc[(2U * (j + fftLenBy2)) + 1U] = in;
+
+ /* Reading the index for the bit reversal */
+ j = *pBitRevTab;
+
+ /* Updating the bit reversal index depending on the fft length */
+ pBitRevTab += bitRevFactor;
+ }
+}
+
+
+/**
+ @brief In-place Q31 bit reversal function.
+ @param[in,out] pSrc points to in-place Q31 data buffer.
+ @param[in] fftLen length of FFT.
+ @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
+ @param[in] pBitRevTab points to bit reversal table
+ @return none
+*/
+
+void arm_bitreversal_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ uint16_t bitRevFactor,
+ const uint16_t * pBitRevTab)
+{
+ uint32_t fftLenBy2, fftLenBy2p1, i, j;
+ q31_t in;
+
+ /* Initializations */
+ j = 0U;
+ fftLenBy2 = fftLen / 2U;
+ fftLenBy2p1 = (fftLen / 2U) + 1U;
+
+ /* Bit Reversal Implementation */
+ for (i = 0U; i <= (fftLenBy2 - 2U); i += 2U)
+ {
+ if (i < j)
+ {
+ /* pSrc[i] <-> pSrc[j]; */
+ in = pSrc[2U * i];
+ pSrc[2U * i] = pSrc[2U * j];
+ pSrc[2U * j] = in;
+
+ /* pSrc[i+1U] <-> pSrc[j+1U] */
+ in = pSrc[(2U * i) + 1U];
+ pSrc[(2U * i) + 1U] = pSrc[(2U * j) + 1U];
+ pSrc[(2U * j) + 1U] = in;
+
+ /* pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */
+ in = pSrc[2U * (i + fftLenBy2p1)];
+ pSrc[2U * (i + fftLenBy2p1)] = pSrc[2U * (j + fftLenBy2p1)];
+ pSrc[2U * (j + fftLenBy2p1)] = in;
+
+ /* pSrc[i+fftLenBy2p1+1U] <-> pSrc[j+fftLenBy2p1+1U] */
+ in = pSrc[(2U * (i + fftLenBy2p1)) + 1U];
+ pSrc[(2U * (i + fftLenBy2p1)) + 1U] =
+ pSrc[(2U * (j + fftLenBy2p1)) + 1U];
+ pSrc[(2U * (j + fftLenBy2p1)) + 1U] = in;
+
+ }
+
+ /* pSrc[i+1U] <-> pSrc[j+1U] */
+ in = pSrc[2U * (i + 1U)];
+ pSrc[2U * (i + 1U)] = pSrc[2U * (j + fftLenBy2)];
+ pSrc[2U * (j + fftLenBy2)] = in;
+
+ /* pSrc[i+2U] <-> pSrc[j+2U] */
+ in = pSrc[(2U * (i + 1U)) + 1U];
+ pSrc[(2U * (i + 1U)) + 1U] = pSrc[(2U * (j + fftLenBy2)) + 1U];
+ pSrc[(2U * (j + fftLenBy2)) + 1U] = in;
+
+ /* Reading the index for the bit reversal */
+ j = *pBitRevTab;
+
+ /* Updating the bit reversal index depending on the fft length */
+ pBitRevTab += bitRevFactor;
+ }
+}
+
+
+
+/**
+ @brief In-place Q15 bit reversal function.
+ @param[in,out] pSrc16 points to in-place Q15 data buffer
+ @param[in] fftLen length of FFT
+ @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
+ @param[in] pBitRevTab points to bit reversal table
+ @return none
+*/
+
+void arm_bitreversal_q15(
+ q15_t * pSrc16,
+ uint32_t fftLen,
+ uint16_t bitRevFactor,
+ const uint16_t * pBitRevTab)
+{
+ q31_t *pSrc = (q31_t *) pSrc16;
+ q31_t in;
+ uint32_t fftLenBy2, fftLenBy2p1;
+ uint32_t i, j;
+
+ /* Initializations */
+ j = 0U;
+ fftLenBy2 = fftLen / 2U;
+ fftLenBy2p1 = (fftLen / 2U) + 1U;
+
+ /* Bit Reversal Implementation */
+ for (i = 0U; i <= (fftLenBy2 - 2U); i += 2U)
+ {
+ if (i < j)
+ {
+ /* pSrc[i] <-> pSrc[j]; */
+ /* pSrc[i+1U] <-> pSrc[j+1U] */
+ in = pSrc[i];
+ pSrc[i] = pSrc[j];
+ pSrc[j] = in;
+
+ /* pSrc[i + fftLenBy2p1] <-> pSrc[j + fftLenBy2p1]; */
+ /* pSrc[i + fftLenBy2p1+1U] <-> pSrc[j + fftLenBy2p1+1U] */
+ in = pSrc[i + fftLenBy2p1];
+ pSrc[i + fftLenBy2p1] = pSrc[j + fftLenBy2p1];
+ pSrc[j + fftLenBy2p1] = in;
+ }
+
+ /* pSrc[i+1U] <-> pSrc[j+fftLenBy2]; */
+ /* pSrc[i+2] <-> pSrc[j+fftLenBy2+1U] */
+ in = pSrc[i + 1U];
+ pSrc[i + 1U] = pSrc[j + fftLenBy2];
+ pSrc[j + fftLenBy2] = in;
+
+ /* Reading the index for the bit reversal */
+ j = *pBitRevTab;
+
+ /* Updating the bit reversal index depending on the fft length */
+ pBitRevTab += bitRevFactor;
+ }
+}
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal2.S b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal2.S index c16091b..01c1e76 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal2.S +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal2.S @@ -1,216 +1,216 @@ -;/* ---------------------------------------------------------------------- -; * Project: CMSIS DSP Library -; * Title: arm_bitreversal2.S -; * Description: arm_bitreversal_32 function done in assembly for maximum speed. -; * Called after doing an fft to reorder the output. -; * The function is loop unrolled by 2. arm_bitreversal_16 as well. -; * -; * $Date: 18. March 2019 -; * $Revision: V1.5.2 -; * -; * Target Processor: Cortex-M cores -; * -------------------------------------------------------------------- */ -;/* -; * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved. -; * -; * SPDX-License-Identifier: Apache-2.0 -; * -; * Licensed under the Apache License, Version 2.0 (the License); you may -; * not use this file except in compliance with the License. -; * You may obtain a copy of the License at -; * -; * www.apache.org/licenses/LICENSE-2.0 -; * -; * Unless required by applicable law or agreed to in writing, software -; * distributed under the License is distributed on an AS IS BASIS, WITHOUT -; * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -; * See the License for the specific language governing permissions and -; * limitations under the License. -; */ - -#if defined ( __CC_ARM ) /* Keil */ - #define CODESECT AREA ||.text||, CODE, READONLY, ALIGN=2 - #define LABEL -#elif defined ( __IASMARM__ ) /* IAR */ - #define CODESECT SECTION `.text`:CODE - #define PROC - #define LABEL - #define ENDP - #define EXPORT PUBLIC -#elif defined ( __CSMC__ ) /* Cosmic */ - #define CODESECT switch .text - #define THUMB - #define EXPORT xdef - #define PROC : - #define LABEL : - #define ENDP - #define arm_bitreversal_32 _arm_bitreversal_32 -#elif defined ( __TI_ARM__ ) /* TI ARM */ - #define THUMB .thumb - #define CODESECT .text - #define EXPORT .global - #define PROC : .asmfunc - #define LABEL : - #define ENDP .endasmfunc - #define END -#elif defined ( __GNUC__ ) /* GCC */ - #define THUMB .thumb - #define CODESECT .section .text - #define EXPORT .global - #define PROC : - #define LABEL : - #define ENDP - #define END - - .syntax unified -#endif - - CODESECT - THUMB - -;/** -; @brief In-place bit reversal function. -; @param[in,out] pSrc points to the in-place buffer of unknown 32-bit data type -; @param[in] bitRevLen bit reversal table length -; @param[in] pBitRevTab points to bit reversal table -; @return none -; */ - EXPORT arm_bitreversal_32 - EXPORT arm_bitreversal_16 - -#if defined ( __CC_ARM ) /* Keil */ -#elif defined ( __IASMARM__ ) /* IAR */ -#elif defined ( __CSMC__ ) /* Cosmic */ -#elif defined ( __TI_ARM__ ) /* TI ARM */ -#elif defined ( __GNUC__ ) /* GCC */ - .type arm_bitreversal_16, %function - .type arm_bitreversal_32, %function -#endif - -#if defined (ARM_MATH_CM0_FAMILY) - -arm_bitreversal_32 PROC - ADDS r3,r1,#1 - PUSH {r4-r6} - ADDS r1,r2,#0 - LSRS r3,r3,#1 -arm_bitreversal_32_0 LABEL - LDRH r2,[r1,#2] - LDRH r6,[r1,#0] - ADD r2,r0,r2 - ADD r6,r0,r6 - LDR r5,[r2,#0] - LDR r4,[r6,#0] - STR r5,[r6,#0] - STR r4,[r2,#0] - LDR r5,[r2,#4] - LDR r4,[r6,#4] - STR r5,[r6,#4] - STR r4,[r2,#4] - ADDS r1,r1,#4 - SUBS r3,r3,#1 - BNE arm_bitreversal_32_0 - POP {r4-r6} - BX lr - ENDP - -arm_bitreversal_16 PROC - ADDS r3,r1,#1 - PUSH {r4-r6} - ADDS r1,r2,#0 - LSRS r3,r3,#1 -arm_bitreversal_16_0 LABEL - LDRH r2,[r1,#2] - LDRH r6,[r1,#0] - LSRS r2,r2,#1 - LSRS r6,r6,#1 - ADD r2,r0,r2 - ADD r6,r0,r6 - LDR r5,[r2,#0] - LDR r4,[r6,#0] - STR r5,[r6,#0] - STR r4,[r2,#0] - ADDS r1,r1,#4 - SUBS r3,r3,#1 - BNE arm_bitreversal_16_0 - POP {r4-r6} - BX lr - ENDP - -#else - -arm_bitreversal_32 PROC - ADDS r3,r1,#1 - CMP r3,#1 - IT LS - BXLS lr - PUSH {r4-r9} - ADDS r1,r2,#2 - LSRS r3,r3,#2 -arm_bitreversal_32_0 LABEL ;/* loop unrolled by 2 */ - LDRH r8,[r1,#4] - LDRH r9,[r1,#2] - LDRH r2,[r1,#0] - LDRH r12,[r1,#-2] - ADD r8,r0,r8 - ADD r9,r0,r9 - ADD r2,r0,r2 - ADD r12,r0,r12 - LDR r7,[r9,#0] - LDR r6,[r8,#0] - LDR r5,[r2,#0] - LDR r4,[r12,#0] - STR r6,[r9,#0] - STR r7,[r8,#0] - STR r5,[r12,#0] - STR r4,[r2,#0] - LDR r7,[r9,#4] - LDR r6,[r8,#4] - LDR r5,[r2,#4] - LDR r4,[r12,#4] - STR r6,[r9,#4] - STR r7,[r8,#4] - STR r5,[r12,#4] - STR r4,[r2,#4] - ADDS r1,r1,#8 - SUBS r3,r3,#1 - BNE arm_bitreversal_32_0 - POP {r4-r9} - BX lr - ENDP - -arm_bitreversal_16 PROC - ADDS r3,r1,#1 - CMP r3,#1 - IT LS - BXLS lr - PUSH {r4-r9} - ADDS r1,r2,#2 - LSRS r3,r3,#2 -arm_bitreversal_16_0 LABEL ;/* loop unrolled by 2 */ - LDRH r8,[r1,#4] - LDRH r9,[r1,#2] - LDRH r2,[r1,#0] - LDRH r12,[r1,#-2] - ADD r8,r0,r8,LSR #1 - ADD r9,r0,r9,LSR #1 - ADD r2,r0,r2,LSR #1 - ADD r12,r0,r12,LSR #1 - LDR r7,[r9,#0] - LDR r6,[r8,#0] - LDR r5,[r2,#0] - LDR r4,[r12,#0] - STR r6,[r9,#0] - STR r7,[r8,#0] - STR r5,[r12,#0] - STR r4,[r2,#0] - ADDS r1,r1,#8 - SUBS r3,r3,#1 - BNE arm_bitreversal_16_0 - POP {r4-r9} - BX lr - ENDP - -#endif - - END +;/* ----------------------------------------------------------------------
+; * Project: CMSIS DSP Library
+; * Title: arm_bitreversal2.S
+; * Description: arm_bitreversal_32 function done in assembly for maximum speed.
+; * Called after doing an fft to reorder the output.
+; * The function is loop unrolled by 2. arm_bitreversal_16 as well.
+; *
+; * $Date: 18. March 2019
+; * $Revision: V1.5.2
+; *
+; * Target Processor: Cortex-M cores
+; * -------------------------------------------------------------------- */
+;/*
+; * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+; *
+; * SPDX-License-Identifier: Apache-2.0
+; *
+; * Licensed under the Apache License, Version 2.0 (the License); you may
+; * not use this file except in compliance with the License.
+; * You may obtain a copy of the License at
+; *
+; * www.apache.org/licenses/LICENSE-2.0
+; *
+; * Unless required by applicable law or agreed to in writing, software
+; * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+; * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; * See the License for the specific language governing permissions and
+; * limitations under the License.
+; */
+
+#if defined ( __CC_ARM ) /* Keil */
+ #define CODESECT AREA ||.text||, CODE, READONLY, ALIGN=2
+ #define LABEL
+#elif defined ( __IASMARM__ ) /* IAR */
+ #define CODESECT SECTION `.text`:CODE
+ #define PROC
+ #define LABEL
+ #define ENDP
+ #define EXPORT PUBLIC
+#elif defined ( __CSMC__ ) /* Cosmic */
+ #define CODESECT switch .text
+ #define THUMB
+ #define EXPORT xdef
+ #define PROC :
+ #define LABEL :
+ #define ENDP
+ #define arm_bitreversal_32 _arm_bitreversal_32
+#elif defined ( __TI_ARM__ ) /* TI ARM */
+ #define THUMB .thumb
+ #define CODESECT .text
+ #define EXPORT .global
+ #define PROC : .asmfunc
+ #define LABEL :
+ #define ENDP .endasmfunc
+ #define END
+#elif defined ( __GNUC__ ) /* GCC */
+ #define THUMB .thumb
+ #define CODESECT .section .text
+ #define EXPORT .global
+ #define PROC :
+ #define LABEL :
+ #define ENDP
+ #define END
+
+ .syntax unified
+#endif
+
+ CODESECT
+ THUMB
+
+;/**
+; @brief In-place bit reversal function.
+; @param[in,out] pSrc points to the in-place buffer of unknown 32-bit data type
+; @param[in] bitRevLen bit reversal table length
+; @param[in] pBitRevTab points to bit reversal table
+; @return none
+; */
+ EXPORT arm_bitreversal_32
+ EXPORT arm_bitreversal_16
+
+#if defined ( __CC_ARM ) /* Keil */
+#elif defined ( __IASMARM__ ) /* IAR */
+#elif defined ( __CSMC__ ) /* Cosmic */
+#elif defined ( __TI_ARM__ ) /* TI ARM */
+#elif defined ( __GNUC__ ) /* GCC */
+ .type arm_bitreversal_16, %function
+ .type arm_bitreversal_32, %function
+#endif
+
+#if defined (ARM_MATH_CM0_FAMILY)
+
+arm_bitreversal_32 PROC
+ ADDS r3,r1,#1
+ PUSH {r4-r6}
+ ADDS r1,r2,#0
+ LSRS r3,r3,#1
+arm_bitreversal_32_0 LABEL
+ LDRH r2,[r1,#2]
+ LDRH r6,[r1,#0]
+ ADD r2,r0,r2
+ ADD r6,r0,r6
+ LDR r5,[r2,#0]
+ LDR r4,[r6,#0]
+ STR r5,[r6,#0]
+ STR r4,[r2,#0]
+ LDR r5,[r2,#4]
+ LDR r4,[r6,#4]
+ STR r5,[r6,#4]
+ STR r4,[r2,#4]
+ ADDS r1,r1,#4
+ SUBS r3,r3,#1
+ BNE arm_bitreversal_32_0
+ POP {r4-r6}
+ BX lr
+ ENDP
+
+arm_bitreversal_16 PROC
+ ADDS r3,r1,#1
+ PUSH {r4-r6}
+ ADDS r1,r2,#0
+ LSRS r3,r3,#1
+arm_bitreversal_16_0 LABEL
+ LDRH r2,[r1,#2]
+ LDRH r6,[r1,#0]
+ LSRS r2,r2,#1
+ LSRS r6,r6,#1
+ ADD r2,r0,r2
+ ADD r6,r0,r6
+ LDR r5,[r2,#0]
+ LDR r4,[r6,#0]
+ STR r5,[r6,#0]
+ STR r4,[r2,#0]
+ ADDS r1,r1,#4
+ SUBS r3,r3,#1
+ BNE arm_bitreversal_16_0
+ POP {r4-r6}
+ BX lr
+ ENDP
+
+#else
+
+arm_bitreversal_32 PROC
+ ADDS r3,r1,#1
+ CMP r3,#1
+ IT LS
+ BXLS lr
+ PUSH {r4-r9}
+ ADDS r1,r2,#2
+ LSRS r3,r3,#2
+arm_bitreversal_32_0 LABEL ;/* loop unrolled by 2 */
+ LDRH r8,[r1,#4]
+ LDRH r9,[r1,#2]
+ LDRH r2,[r1,#0]
+ LDRH r12,[r1,#-2]
+ ADD r8,r0,r8
+ ADD r9,r0,r9
+ ADD r2,r0,r2
+ ADD r12,r0,r12
+ LDR r7,[r9,#0]
+ LDR r6,[r8,#0]
+ LDR r5,[r2,#0]
+ LDR r4,[r12,#0]
+ STR r6,[r9,#0]
+ STR r7,[r8,#0]
+ STR r5,[r12,#0]
+ STR r4,[r2,#0]
+ LDR r7,[r9,#4]
+ LDR r6,[r8,#4]
+ LDR r5,[r2,#4]
+ LDR r4,[r12,#4]
+ STR r6,[r9,#4]
+ STR r7,[r8,#4]
+ STR r5,[r12,#4]
+ STR r4,[r2,#4]
+ ADDS r1,r1,#8
+ SUBS r3,r3,#1
+ BNE arm_bitreversal_32_0
+ POP {r4-r9}
+ BX lr
+ ENDP
+
+arm_bitreversal_16 PROC
+ ADDS r3,r1,#1
+ CMP r3,#1
+ IT LS
+ BXLS lr
+ PUSH {r4-r9}
+ ADDS r1,r2,#2
+ LSRS r3,r3,#2
+arm_bitreversal_16_0 LABEL ;/* loop unrolled by 2 */
+ LDRH r8,[r1,#4]
+ LDRH r9,[r1,#2]
+ LDRH r2,[r1,#0]
+ LDRH r12,[r1,#-2]
+ ADD r8,r0,r8,LSR #1
+ ADD r9,r0,r9,LSR #1
+ ADD r2,r0,r2,LSR #1
+ ADD r12,r0,r12,LSR #1
+ LDR r7,[r9,#0]
+ LDR r6,[r8,#0]
+ LDR r5,[r2,#0]
+ LDR r4,[r12,#0]
+ STR r6,[r9,#0]
+ STR r7,[r8,#0]
+ STR r5,[r12,#0]
+ STR r4,[r2,#0]
+ ADDS r1,r1,#8
+ SUBS r3,r3,#1
+ BNE arm_bitreversal_16_0
+ POP {r4-r9}
+ BX lr
+ ENDP
+
+#endif
+
+ END
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal2.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal2.c index 77fac1f..29d5757 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal2.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal2.c @@ -1,134 +1,99 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_bitreversal2.c - * Description: Bitreversal functions - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" -#include "arm_common_tables.h" - - -/** - @brief In-place 64 bit reversal function. - @param[in,out] pSrc points to in-place buffer of unknown 64-bit data type - @param[in] bitRevLen bit reversal table length - @param[in] pBitRevTab points to bit reversal table - @return none -*/ - -void arm_bitreversal_64( - uint64_t *pSrc, - const uint16_t bitRevLen, - const uint16_t *pBitRevTab) -{ - uint64_t a, b, i, tmp; - - for (i = 0; i < bitRevLen; ) - { - a = pBitRevTab[i ] >> 2; - b = pBitRevTab[i + 1] >> 2; - - //real - tmp = pSrc[a]; - pSrc[a] = pSrc[b]; - pSrc[b] = tmp; - - //complex - tmp = pSrc[a+1]; - pSrc[a+1] = pSrc[b+1]; - pSrc[b+1] = tmp; - - i += 2; - } -} - -/** - @brief In-place 32 bit reversal function. - @param[in,out] pSrc points to in-place buffer of unknown 32-bit data type - @param[in] bitRevLen bit reversal table length - @param[in] pBitRevTab points to bit reversal table - @return none -*/ - -void arm_bitreversal_32( - uint32_t *pSrc, - const uint16_t bitRevLen, - const uint16_t *pBitRevTab) -{ - uint32_t a, b, i, tmp; - - for (i = 0; i < bitRevLen; ) - { - a = pBitRevTab[i ] >> 2; - b = pBitRevTab[i + 1] >> 2; - - //real - tmp = pSrc[a]; - pSrc[a] = pSrc[b]; - pSrc[b] = tmp; - - //complex - tmp = pSrc[a+1]; - pSrc[a+1] = pSrc[b+1]; - pSrc[b+1] = tmp; - - i += 2; - } -} - - -/** - @brief In-place 16 bit reversal function. - @param[in,out] pSrc points to in-place buffer of unknown 16-bit data type - @param[in] bitRevLen bit reversal table length - @param[in] pBitRevTab points to bit reversal table - @return none -*/ - -void arm_bitreversal_16( - uint16_t *pSrc, - const uint16_t bitRevLen, - const uint16_t *pBitRevTab) -{ - uint16_t a, b, i, tmp; - - for (i = 0; i < bitRevLen; ) - { - a = pBitRevTab[i ] >> 2; - b = pBitRevTab[i + 1] >> 2; - - //real - tmp = pSrc[a]; - pSrc[a] = pSrc[b]; - pSrc[b] = tmp; - - //complex - tmp = pSrc[a+1]; - pSrc[a+1] = pSrc[b+1]; - pSrc[b+1] = tmp; - - i += 2; - } -} +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_bitreversal2.c
+ * Description: Bitreversal functions
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.0.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**
+ @brief In-place 32 bit reversal function.
+ @param[in,out] pSrc points to in-place buffer of unknown 32-bit data type
+ @param[in] bitRevLen bit reversal table length
+ @param[in] pBitRevTab points to bit reversal table
+ @return none
+*/
+
+void arm_bitreversal_32(
+ uint32_t *pSrc,
+ const uint16_t bitRevLen,
+ const uint16_t *pBitRevTab)
+{
+ uint32_t a, b, i, tmp;
+
+ for (i = 0; i < bitRevLen; )
+ {
+ a = pBitRevTab[i ] >> 2;
+ b = pBitRevTab[i + 1] >> 2;
+
+ //real
+ tmp = pSrc[a];
+ pSrc[a] = pSrc[b];
+ pSrc[b] = tmp;
+
+ //complex
+ tmp = pSrc[a+1];
+ pSrc[a+1] = pSrc[b+1];
+ pSrc[b+1] = tmp;
+
+ i += 2;
+ }
+}
+
+
+/**
+ @brief In-place 16 bit reversal function.
+ @param[in,out] pSrc points to in-place buffer of unknown 16-bit data type
+ @param[in] bitRevLen bit reversal table length
+ @param[in] pBitRevTab points to bit reversal table
+ @return none
+*/
+
+void arm_bitreversal_16(
+ uint16_t *pSrc,
+ const uint16_t bitRevLen,
+ const uint16_t *pBitRevTab)
+{
+ uint16_t a, b, i, tmp;
+
+ for (i = 0; i < bitRevLen; )
+ {
+ a = pBitRevTab[i ] >> 2;
+ b = pBitRevTab[i + 1] >> 2;
+
+ //real
+ tmp = pSrc[a];
+ pSrc[a] = pSrc[b];
+ pSrc[b] = tmp;
+
+ //complex
+ tmp = pSrc[a+1];
+ pSrc[a+1] = pSrc[b+1];
+ pSrc[b+1] = tmp;
+
+ i += 2;
+ }
+}
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f32.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f32.c index 0b33e8e..15dbb8f 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f32.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f32.c @@ -1,1192 +1,629 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_cfft_f32.c - * Description: Combined Radix Decimation in Frequency CFFT Floating point processing function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" -#include "arm_common_tables.h" - -#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) - -#include "arm_helium_utils.h" -#include "arm_vec_fft.h" -#include "arm_mve_tables.h" - - -static float32_t arm_inverse_fft_length_f32(uint16_t fftLen) -{ - float32_t retValue=1.0; - - switch (fftLen) - { - - case 4096U: - retValue = 0.000244140625; - break; - - case 2048U: - retValue = 0.00048828125; - break; - - case 1024U: - retValue = 0.0009765625f; - break; - - case 512U: - retValue = 0.001953125; - break; - - case 256U: - retValue = 0.00390625f; - break; - - case 128U: - retValue = 0.0078125; - break; - - case 64U: - retValue = 0.015625f; - break; - - case 32U: - retValue = 0.03125; - break; - - case 16U: - retValue = 0.0625f; - break; - - - default: - break; - } - return(retValue); -} - - - - -static void _arm_radix4_butterfly_f32_mve(const arm_cfft_instance_f32 * S,float32_t * pSrc, uint32_t fftLen) -{ - f32x4_t vecTmp0, vecTmp1; - f32x4_t vecSum0, vecDiff0, vecSum1, vecDiff1; - f32x4_t vecA, vecB, vecC, vecD; - uint32_t blkCnt; - uint32_t n1, n2; - uint32_t stage = 0; - int32_t iter = 1; - static const int32_t strides[4] = { - (0 - 16) * (int32_t)sizeof(q31_t *), - (1 - 16) * (int32_t)sizeof(q31_t *), - (8 - 16) * (int32_t)sizeof(q31_t *), - (9 - 16) * (int32_t)sizeof(q31_t *) - }; - - n2 = fftLen; - n1 = n2; - n2 >>= 2u; - for (int k = fftLen / 4u; k > 1; k >>= 2) - { - float32_t const *p_rearranged_twiddle_tab_stride1 = - &S->rearranged_twiddle_stride1[ - S->rearranged_twiddle_tab_stride1_arr[stage]]; - float32_t const *p_rearranged_twiddle_tab_stride2 = - &S->rearranged_twiddle_stride2[ - S->rearranged_twiddle_tab_stride2_arr[stage]]; - float32_t const *p_rearranged_twiddle_tab_stride3 = - &S->rearranged_twiddle_stride3[ - S->rearranged_twiddle_tab_stride3_arr[stage]]; - - float32_t * pBase = pSrc; - for (int i = 0; i < iter; i++) - { - float32_t *inA = pBase; - float32_t *inB = inA + n2 * CMPLX_DIM; - float32_t *inC = inB + n2 * CMPLX_DIM; - float32_t *inD = inC + n2 * CMPLX_DIM; - float32_t const *pW1 = p_rearranged_twiddle_tab_stride1; - float32_t const *pW2 = p_rearranged_twiddle_tab_stride2; - float32_t const *pW3 = p_rearranged_twiddle_tab_stride3; - f32x4_t vecW; - - blkCnt = n2 / 2; - /* - * load 2 f32 complex pair - */ - vecA = vldrwq_f32(inA); - vecC = vldrwq_f32(inC); - while (blkCnt > 0U) - { - vecB = vldrwq_f32(inB); - vecD = vldrwq_f32(inD); - - vecSum0 = vecA + vecC; /* vecSum0 = vaddq(vecA, vecC) */ - vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */ - - vecSum1 = vecB + vecD; - vecDiff1 = vecB - vecD; - /* - * [ 1 1 1 1 ] * [ A B C D ]' .* 1 - */ - vecTmp0 = vecSum0 + vecSum1; - vst1q(inA, vecTmp0); - inA += 4; - - /* - * [ 1 -1 1 -1 ] * [ A B C D ]' - */ - vecTmp0 = vecSum0 - vecSum1; - /* - * [ 1 -1 1 -1 ] * [ A B C D ]'.* W2 - */ - vecW = vld1q(pW2); - pW2 += 4; - vecTmp1 = MVE_CMPLX_MULT_FLT_Conj_AxB(vecW, vecTmp0); - vst1q(inB, vecTmp1); - inB += 4; - - /* - * [ 1 -i -1 +i ] * [ A B C D ]' - */ - vecTmp0 = MVE_CMPLX_SUB_A_ixB(vecDiff0, vecDiff1); - /* - * [ 1 -i -1 +i ] * [ A B C D ]'.* W1 - */ - vecW = vld1q(pW1); - pW1 +=4; - vecTmp1 = MVE_CMPLX_MULT_FLT_Conj_AxB(vecW, vecTmp0); - vst1q(inC, vecTmp1); - inC += 4; - - /* - * [ 1 +i -1 -i ] * [ A B C D ]' - */ - vecTmp0 = MVE_CMPLX_ADD_A_ixB(vecDiff0, vecDiff1); - /* - * [ 1 +i -1 -i ] * [ A B C D ]'.* W3 - */ - vecW = vld1q(pW3); - pW3 += 4; - vecTmp1 = MVE_CMPLX_MULT_FLT_Conj_AxB(vecW, vecTmp0); - vst1q(inD, vecTmp1); - inD += 4; - - vecA = vldrwq_f32(inA); - vecC = vldrwq_f32(inC); - - blkCnt--; - } - pBase += CMPLX_DIM * n1; - } - n1 = n2; - n2 >>= 2u; - iter = iter << 2; - stage++; - } - - /* - * start of Last stage process - */ - uint32x4_t vecScGathAddr = vld1q_u32((uint32_t*)strides); - vecScGathAddr = vecScGathAddr + (uint32_t) pSrc; - - /* load scheduling */ - vecA = vldrwq_gather_base_wb_f32(&vecScGathAddr, 64); - vecC = vldrwq_gather_base_f32(vecScGathAddr, 16); - - blkCnt = (fftLen >> 3); - while (blkCnt > 0U) - { - vecSum0 = vecA + vecC; /* vecSum0 = vaddq(vecA, vecC) */ - vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */ - - vecB = vldrwq_gather_base_f32(vecScGathAddr, 8); - vecD = vldrwq_gather_base_f32(vecScGathAddr, 24); - - vecSum1 = vecB + vecD; - vecDiff1 = vecB - vecD; - - /* pre-load for next iteration */ - vecA = vldrwq_gather_base_wb_f32(&vecScGathAddr, 64); - vecC = vldrwq_gather_base_f32(vecScGathAddr, 16); - - vecTmp0 = vecSum0 + vecSum1; - vstrwq_scatter_base_f32(vecScGathAddr, -64, vecTmp0); - - vecTmp0 = vecSum0 - vecSum1; - vstrwq_scatter_base_f32(vecScGathAddr, -64 + 8, vecTmp0); - - vecTmp0 = MVE_CMPLX_SUB_A_ixB(vecDiff0, vecDiff1); - vstrwq_scatter_base_f32(vecScGathAddr, -64 + 16, vecTmp0); - - vecTmp0 = MVE_CMPLX_ADD_A_ixB(vecDiff0, vecDiff1); - vstrwq_scatter_base_f32(vecScGathAddr, -64 + 24, vecTmp0); - - blkCnt--; - } - - /* - * End of last stage process - */ -} - -static void arm_cfft_radix4by2_f32_mve(const arm_cfft_instance_f32 * S, float32_t *pSrc, uint32_t fftLen) -{ - float32_t const *pCoefVec; - float32_t const *pCoef = S->pTwiddle; - float32_t *pIn0, *pIn1; - uint32_t n2; - uint32_t blkCnt; - f32x4_t vecIn0, vecIn1, vecSum, vecDiff; - f32x4_t vecCmplxTmp, vecTw; - - - n2 = fftLen >> 1; - pIn0 = pSrc; - pIn1 = pSrc + fftLen; - pCoefVec = pCoef; - - blkCnt = n2 / 2; - while (blkCnt > 0U) - { - vecIn0 = *(f32x4_t *) pIn0; - vecIn1 = *(f32x4_t *) pIn1; - vecTw = vld1q(pCoefVec); - pCoefVec += 4; - - vecSum = vecIn0 + vecIn1; - vecDiff = vecIn0 - vecIn1; - - vecCmplxTmp = MVE_CMPLX_MULT_FLT_Conj_AxB(vecTw, vecDiff); - - vst1q(pIn0, vecSum); - pIn0 += 4; - vst1q(pIn1, vecCmplxTmp); - pIn1 += 4; - - blkCnt--; - } - - _arm_radix4_butterfly_f32_mve(S, pSrc, n2); - - _arm_radix4_butterfly_f32_mve(S, pSrc + fftLen, n2); - - pIn0 = pSrc; -} - -static void _arm_radix4_butterfly_inverse_f32_mve(const arm_cfft_instance_f32 * S,float32_t * pSrc, uint32_t fftLen, float32_t onebyfftLen) -{ - f32x4_t vecTmp0, vecTmp1; - f32x4_t vecSum0, vecDiff0, vecSum1, vecDiff1; - f32x4_t vecA, vecB, vecC, vecD; - uint32_t blkCnt; - uint32_t n1, n2; - uint32_t stage = 0; - int32_t iter = 1; - static const int32_t strides[4] = { - (0 - 16) * (int32_t)sizeof(q31_t *), - (1 - 16) * (int32_t)sizeof(q31_t *), - (8 - 16) * (int32_t)sizeof(q31_t *), - (9 - 16) * (int32_t)sizeof(q31_t *) - }; - - n2 = fftLen; - n1 = n2; - n2 >>= 2u; - for (int k = fftLen / 4; k > 1; k >>= 2) - { - float32_t const *p_rearranged_twiddle_tab_stride1 = - &S->rearranged_twiddle_stride1[ - S->rearranged_twiddle_tab_stride1_arr[stage]]; - float32_t const *p_rearranged_twiddle_tab_stride2 = - &S->rearranged_twiddle_stride2[ - S->rearranged_twiddle_tab_stride2_arr[stage]]; - float32_t const *p_rearranged_twiddle_tab_stride3 = - &S->rearranged_twiddle_stride3[ - S->rearranged_twiddle_tab_stride3_arr[stage]]; - - float32_t * pBase = pSrc; - for (int i = 0; i < iter; i++) - { - float32_t *inA = pBase; - float32_t *inB = inA + n2 * CMPLX_DIM; - float32_t *inC = inB + n2 * CMPLX_DIM; - float32_t *inD = inC + n2 * CMPLX_DIM; - float32_t const *pW1 = p_rearranged_twiddle_tab_stride1; - float32_t const *pW2 = p_rearranged_twiddle_tab_stride2; - float32_t const *pW3 = p_rearranged_twiddle_tab_stride3; - f32x4_t vecW; - - blkCnt = n2 / 2; - /* - * load 2 f32 complex pair - */ - vecA = vldrwq_f32(inA); - vecC = vldrwq_f32(inC); - while (blkCnt > 0U) - { - vecB = vldrwq_f32(inB); - vecD = vldrwq_f32(inD); - - vecSum0 = vecA + vecC; /* vecSum0 = vaddq(vecA, vecC) */ - vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */ - - vecSum1 = vecB + vecD; - vecDiff1 = vecB - vecD; - /* - * [ 1 1 1 1 ] * [ A B C D ]' .* 1 - */ - vecTmp0 = vecSum0 + vecSum1; - vst1q(inA, vecTmp0); - inA += 4; - /* - * [ 1 -1 1 -1 ] * [ A B C D ]' - */ - vecTmp0 = vecSum0 - vecSum1; - /* - * [ 1 -1 1 -1 ] * [ A B C D ]'.* W1 - */ - vecW = vld1q(pW2); - pW2 += 4; - vecTmp1 = MVE_CMPLX_MULT_FLT_AxB(vecW, vecTmp0); - vst1q(inB, vecTmp1); - inB += 4; - - /* - * [ 1 -i -1 +i ] * [ A B C D ]' - */ - vecTmp0 = MVE_CMPLX_ADD_A_ixB(vecDiff0, vecDiff1); - /* - * [ 1 -i -1 +i ] * [ A B C D ]'.* W2 - */ - vecW = vld1q(pW1); - pW1 += 4; - vecTmp1 = MVE_CMPLX_MULT_FLT_AxB(vecW, vecTmp0); - vst1q(inC, vecTmp1); - inC += 4; - - /* - * [ 1 +i -1 -i ] * [ A B C D ]' - */ - vecTmp0 = MVE_CMPLX_SUB_A_ixB(vecDiff0, vecDiff1); - /* - * [ 1 +i -1 -i ] * [ A B C D ]'.* W3 - */ - vecW = vld1q(pW3); - pW3 += 4; - vecTmp1 = MVE_CMPLX_MULT_FLT_AxB(vecW, vecTmp0); - vst1q(inD, vecTmp1); - inD += 4; - - vecA = vldrwq_f32(inA); - vecC = vldrwq_f32(inC); - - blkCnt--; - } - pBase += CMPLX_DIM * n1; - } - n1 = n2; - n2 >>= 2u; - iter = iter << 2; - stage++; - } - - /* - * start of Last stage process - */ - uint32x4_t vecScGathAddr = vld1q_u32 ((uint32_t*)strides); - vecScGathAddr = vecScGathAddr + (uint32_t) pSrc; - - /* - * load scheduling - */ - vecA = vldrwq_gather_base_wb_f32(&vecScGathAddr, 64); - vecC = vldrwq_gather_base_f32(vecScGathAddr, 16); - - blkCnt = (fftLen >> 3); - while (blkCnt > 0U) - { - vecSum0 = vecA + vecC; /* vecSum0 = vaddq(vecA, vecC) */ - vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */ - - vecB = vldrwq_gather_base_f32(vecScGathAddr, 8); - vecD = vldrwq_gather_base_f32(vecScGathAddr, 24); - - vecSum1 = vecB + vecD; - vecDiff1 = vecB - vecD; - - vecA = vldrwq_gather_base_wb_f32(&vecScGathAddr, 64); - vecC = vldrwq_gather_base_f32(vecScGathAddr, 16); - - vecTmp0 = vecSum0 + vecSum1; - vecTmp0 = vecTmp0 * onebyfftLen; - vstrwq_scatter_base_f32(vecScGathAddr, -64, vecTmp0); - - vecTmp0 = vecSum0 - vecSum1; - vecTmp0 = vecTmp0 * onebyfftLen; - vstrwq_scatter_base_f32(vecScGathAddr, -64 + 8, vecTmp0); - - vecTmp0 = MVE_CMPLX_ADD_A_ixB(vecDiff0, vecDiff1); - vecTmp0 = vecTmp0 * onebyfftLen; - vstrwq_scatter_base_f32(vecScGathAddr, -64 + 16, vecTmp0); - - vecTmp0 = MVE_CMPLX_SUB_A_ixB(vecDiff0, vecDiff1); - vecTmp0 = vecTmp0 * onebyfftLen; - vstrwq_scatter_base_f32(vecScGathAddr, -64 + 24, vecTmp0); - - blkCnt--; - } - - /* - * End of last stage process - */ -} - -static void arm_cfft_radix4by2_inverse_f32_mve(const arm_cfft_instance_f32 * S,float32_t *pSrc, uint32_t fftLen) -{ - float32_t const *pCoefVec; - float32_t const *pCoef = S->pTwiddle; - float32_t *pIn0, *pIn1; - uint32_t n2; - float32_t onebyfftLen = arm_inverse_fft_length_f32(fftLen); - uint32_t blkCnt; - f32x4_t vecIn0, vecIn1, vecSum, vecDiff; - f32x4_t vecCmplxTmp, vecTw; - - - n2 = fftLen >> 1; - pIn0 = pSrc; - pIn1 = pSrc + fftLen; - pCoefVec = pCoef; - - blkCnt = n2 / 2; - while (blkCnt > 0U) - { - vecIn0 = *(f32x4_t *) pIn0; - vecIn1 = *(f32x4_t *) pIn1; - vecTw = vld1q(pCoefVec); - pCoefVec += 4; - - vecSum = vecIn0 + vecIn1; - vecDiff = vecIn0 - vecIn1; - - vecCmplxTmp = MVE_CMPLX_MULT_FLT_AxB(vecTw, vecDiff); - - vst1q(pIn0, vecSum); - pIn0 += 4; - vst1q(pIn1, vecCmplxTmp); - pIn1 += 4; - - blkCnt--; - } - - _arm_radix4_butterfly_inverse_f32_mve(S, pSrc, n2, onebyfftLen); - - _arm_radix4_butterfly_inverse_f32_mve(S, pSrc + fftLen, n2, onebyfftLen); -} - - -/** - @addtogroup ComplexFFT - @{ - */ - -/** - @brief Processing function for the floating-point complex FFT. - @param[in] S points to an instance of the floating-point CFFT structure - @param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place - @param[in] ifftFlag flag that selects transform direction - - value = 0: forward transform - - value = 1: inverse transform - @param[in] bitReverseFlag flag that enables / disables bit reversal of output - - value = 0: disables bit reversal of output - - value = 1: enables bit reversal of output - @return none - */ - - -void arm_cfft_f32( - const arm_cfft_instance_f32 * S, - float32_t * pSrc, - uint8_t ifftFlag, - uint8_t bitReverseFlag) -{ - uint32_t fftLen = S->fftLen; - - if (ifftFlag == 1U) { - - switch (fftLen) { - case 16: - case 64: - case 256: - case 1024: - case 4096: - _arm_radix4_butterfly_inverse_f32_mve(S, pSrc, fftLen, arm_inverse_fft_length_f32(S->fftLen)); - break; - - case 32: - case 128: - case 512: - case 2048: - arm_cfft_radix4by2_inverse_f32_mve(S, pSrc, fftLen); - break; - } - } else { - switch (fftLen) { - case 16: - case 64: - case 256: - case 1024: - case 4096: - _arm_radix4_butterfly_f32_mve(S, pSrc, fftLen); - break; - - case 32: - case 128: - case 512: - case 2048: - arm_cfft_radix4by2_f32_mve(S, pSrc, fftLen); - break; - } - } - - - if (bitReverseFlag) - { - - arm_bitreversal_32_inpl_mve((uint32_t*)pSrc, S->bitRevLength, S->pBitRevTable); - - } -} - - -#else -extern void arm_radix8_butterfly_f32( - float32_t * pSrc, - uint16_t fftLen, - const float32_t * pCoef, - uint16_t twidCoefModifier); - -extern void arm_bitreversal_32( - uint32_t * pSrc, - const uint16_t bitRevLen, - const uint16_t * pBitRevTable); - -/** - @ingroup groupTransforms - */ - -/** - @defgroup ComplexFFT Complex FFT Functions - - @par - The Fast Fourier Transform (FFT) is an efficient algorithm for computing the - Discrete Fourier Transform (DFT). The FFT can be orders of magnitude faster - than the DFT, especially for long lengths. - The algorithms described in this section - operate on complex data. A separate set of functions is devoted to handling - of real sequences. - @par - There are separate algorithms for handling floating-point, Q15, and Q31 data - types. The algorithms available for each data type are described next. - @par - The FFT functions operate in-place. That is, the array holding the input data - will also be used to hold the corresponding result. The input data is complex - and contains <code>2*fftLen</code> interleaved values as shown below. - <pre>{real[0], imag[0], real[1], imag[1], ...} </pre> - The FFT result will be contained in the same array and the frequency domain - values will have the same interleaving. - - @par Floating-point - The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-8 - stages are performed along with a single radix-2 or radix-4 stage, as needed. - The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses - a different twiddle factor table. - @par - The function uses the standard FFT definition and output values may grow by a - factor of <code>fftLen</code> when computing the forward transform. The - inverse transform includes a scale of <code>1/fftLen</code> as part of the - calculation and this matches the textbook definition of the inverse FFT. - @par - For the MVE version, the new arm_cfft_init_f32 initialization function is - <b>mandatory</b>. <b>Compilation flags are available to include only the required tables for the - needed FFTs.</b> Other FFT versions can continue to be initialized as - explained below. - @par - For not MVE versions, pre-initialized data structures containing twiddle factors - and bit reversal tables are provided and defined in <code>arm_const_structs.h</code>. Include - this header in your function and then pass one of the constant structures as - an argument to arm_cfft_f32. For example: - @par - <code>arm_cfft_f32(arm_cfft_sR_f32_len64, pSrc, 1, 1)</code> - @par - computes a 64-point inverse complex FFT including bit reversal. - The data structures are treated as constant data and not modified during the - calculation. The same data structure can be reused for multiple transforms - including mixing forward and inverse transforms. - @par - Earlier releases of the library provided separate radix-2 and radix-4 - algorithms that operated on floating-point data. These functions are still - provided but are deprecated. The older functions are slower and less general - than the new functions. - @par - An example of initialization of the constants for the arm_cfft_f32 function follows: - @code - const static arm_cfft_instance_f32 *S; - ... - switch (length) { - case 16: - S = &arm_cfft_sR_f32_len16; - break; - case 32: - S = &arm_cfft_sR_f32_len32; - break; - case 64: - S = &arm_cfft_sR_f32_len64; - break; - case 128: - S = &arm_cfft_sR_f32_len128; - break; - case 256: - S = &arm_cfft_sR_f32_len256; - break; - case 512: - S = &arm_cfft_sR_f32_len512; - break; - case 1024: - S = &arm_cfft_sR_f32_len1024; - break; - case 2048: - S = &arm_cfft_sR_f32_len2048; - break; - case 4096: - S = &arm_cfft_sR_f32_len4096; - break; - } - @endcode - @par - The new arm_cfft_init_f32 can also be used. - @par Q15 and Q31 - The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-4 - stages are performed along with a single radix-2 stage, as needed. - The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses - a different twiddle factor table. - @par - The function uses the standard FFT definition and output values may grow by a - factor of <code>fftLen</code> when computing the forward transform. The - inverse transform includes a scale of <code>1/fftLen</code> as part of the - calculation and this matches the textbook definition of the inverse FFT. - @par - Pre-initialized data structures containing twiddle factors and bit reversal - tables are provided and defined in <code>arm_const_structs.h</code>. Include - this header in your function and then pass one of the constant structures as - an argument to arm_cfft_q31. For example: - @par - <code>arm_cfft_q31(arm_cfft_sR_q31_len64, pSrc, 1, 1)</code> - @par - computes a 64-point inverse complex FFT including bit reversal. - The data structures are treated as constant data and not modified during the - calculation. The same data structure can be reused for multiple transforms - including mixing forward and inverse transforms. - @par - Earlier releases of the library provided separate radix-2 and radix-4 - algorithms that operated on floating-point data. These functions are still - provided but are deprecated. The older functions are slower and less general - than the new functions. - @par - An example of initialization of the constants for the arm_cfft_q31 function follows: - @code - const static arm_cfft_instance_q31 *S; - ... - switch (length) { - case 16: - S = &arm_cfft_sR_q31_len16; - break; - case 32: - S = &arm_cfft_sR_q31_len32; - break; - case 64: - S = &arm_cfft_sR_q31_len64; - break; - case 128: - S = &arm_cfft_sR_q31_len128; - break; - case 256: - S = &arm_cfft_sR_q31_len256; - break; - case 512: - S = &arm_cfft_sR_q31_len512; - break; - case 1024: - S = &arm_cfft_sR_q31_len1024; - break; - case 2048: - S = &arm_cfft_sR_q31_len2048; - break; - case 4096: - S = &arm_cfft_sR_q31_len4096; - break; - } - @endcode - - */ - -void arm_cfft_radix8by2_f32 (arm_cfft_instance_f32 * S, float32_t * p1) -{ - uint32_t L = S->fftLen; - float32_t * pCol1, * pCol2, * pMid1, * pMid2; - float32_t * p2 = p1 + L; - const float32_t * tw = (float32_t *) S->pTwiddle; - float32_t t1[4], t2[4], t3[4], t4[4], twR, twI; - float32_t m0, m1, m2, m3; - uint32_t l; - - pCol1 = p1; - pCol2 = p2; - - /* Define new length */ - L >>= 1; - - /* Initialize mid pointers */ - pMid1 = p1 + L; - pMid2 = p2 + L; - - /* do two dot Fourier transform */ - for (l = L >> 2; l > 0; l-- ) - { - t1[0] = p1[0]; - t1[1] = p1[1]; - t1[2] = p1[2]; - t1[3] = p1[3]; - - t2[0] = p2[0]; - t2[1] = p2[1]; - t2[2] = p2[2]; - t2[3] = p2[3]; - - t3[0] = pMid1[0]; - t3[1] = pMid1[1]; - t3[2] = pMid1[2]; - t3[3] = pMid1[3]; - - t4[0] = pMid2[0]; - t4[1] = pMid2[1]; - t4[2] = pMid2[2]; - t4[3] = pMid2[3]; - - *p1++ = t1[0] + t2[0]; - *p1++ = t1[1] + t2[1]; - *p1++ = t1[2] + t2[2]; - *p1++ = t1[3] + t2[3]; /* col 1 */ - - t2[0] = t1[0] - t2[0]; - t2[1] = t1[1] - t2[1]; - t2[2] = t1[2] - t2[2]; - t2[3] = t1[3] - t2[3]; /* for col 2 */ - - *pMid1++ = t3[0] + t4[0]; - *pMid1++ = t3[1] + t4[1]; - *pMid1++ = t3[2] + t4[2]; - *pMid1++ = t3[3] + t4[3]; /* col 1 */ - - t4[0] = t4[0] - t3[0]; - t4[1] = t4[1] - t3[1]; - t4[2] = t4[2] - t3[2]; - t4[3] = t4[3] - t3[3]; /* for col 2 */ - - twR = *tw++; - twI = *tw++; - - /* multiply by twiddle factors */ - m0 = t2[0] * twR; - m1 = t2[1] * twI; - m2 = t2[1] * twR; - m3 = t2[0] * twI; - - /* R = R * Tr - I * Ti */ - *p2++ = m0 + m1; - /* I = I * Tr + R * Ti */ - *p2++ = m2 - m3; - - /* use vertical symmetry */ - /* 0.9988 - 0.0491i <==> -0.0491 - 0.9988i */ - m0 = t4[0] * twI; - m1 = t4[1] * twR; - m2 = t4[1] * twI; - m3 = t4[0] * twR; - - *pMid2++ = m0 - m1; - *pMid2++ = m2 + m3; - - twR = *tw++; - twI = *tw++; - - m0 = t2[2] * twR; - m1 = t2[3] * twI; - m2 = t2[3] * twR; - m3 = t2[2] * twI; - - *p2++ = m0 + m1; - *p2++ = m2 - m3; - - m0 = t4[2] * twI; - m1 = t4[3] * twR; - m2 = t4[3] * twI; - m3 = t4[2] * twR; - - *pMid2++ = m0 - m1; - *pMid2++ = m2 + m3; - } - - /* first col */ - arm_radix8_butterfly_f32 (pCol1, L, (float32_t *) S->pTwiddle, 2U); - - /* second col */ - arm_radix8_butterfly_f32 (pCol2, L, (float32_t *) S->pTwiddle, 2U); -} - -void arm_cfft_radix8by4_f32 (arm_cfft_instance_f32 * S, float32_t * p1) -{ - uint32_t L = S->fftLen >> 1; - float32_t * pCol1, *pCol2, *pCol3, *pCol4, *pEnd1, *pEnd2, *pEnd3, *pEnd4; - const float32_t *tw2, *tw3, *tw4; - float32_t * p2 = p1 + L; - float32_t * p3 = p2 + L; - float32_t * p4 = p3 + L; - float32_t t2[4], t3[4], t4[4], twR, twI; - float32_t p1ap3_0, p1sp3_0, p1ap3_1, p1sp3_1; - float32_t m0, m1, m2, m3; - uint32_t l, twMod2, twMod3, twMod4; - - pCol1 = p1; /* points to real values by default */ - pCol2 = p2; - pCol3 = p3; - pCol4 = p4; - pEnd1 = p2 - 1; /* points to imaginary values by default */ - pEnd2 = p3 - 1; - pEnd3 = p4 - 1; - pEnd4 = pEnd3 + L; - - tw2 = tw3 = tw4 = (float32_t *) S->pTwiddle; - - L >>= 1; - - /* do four dot Fourier transform */ - - twMod2 = 2; - twMod3 = 4; - twMod4 = 6; - - /* TOP */ - p1ap3_0 = p1[0] + p3[0]; - p1sp3_0 = p1[0] - p3[0]; - p1ap3_1 = p1[1] + p3[1]; - p1sp3_1 = p1[1] - p3[1]; - - /* col 2 */ - t2[0] = p1sp3_0 + p2[1] - p4[1]; - t2[1] = p1sp3_1 - p2[0] + p4[0]; - /* col 3 */ - t3[0] = p1ap3_0 - p2[0] - p4[0]; - t3[1] = p1ap3_1 - p2[1] - p4[1]; - /* col 4 */ - t4[0] = p1sp3_0 - p2[1] + p4[1]; - t4[1] = p1sp3_1 + p2[0] - p4[0]; - /* col 1 */ - *p1++ = p1ap3_0 + p2[0] + p4[0]; - *p1++ = p1ap3_1 + p2[1] + p4[1]; - - /* Twiddle factors are ones */ - *p2++ = t2[0]; - *p2++ = t2[1]; - *p3++ = t3[0]; - *p3++ = t3[1]; - *p4++ = t4[0]; - *p4++ = t4[1]; - - tw2 += twMod2; - tw3 += twMod3; - tw4 += twMod4; - - for (l = (L - 2) >> 1; l > 0; l-- ) - { - /* TOP */ - p1ap3_0 = p1[0] + p3[0]; - p1sp3_0 = p1[0] - p3[0]; - p1ap3_1 = p1[1] + p3[1]; - p1sp3_1 = p1[1] - p3[1]; - /* col 2 */ - t2[0] = p1sp3_0 + p2[1] - p4[1]; - t2[1] = p1sp3_1 - p2[0] + p4[0]; - /* col 3 */ - t3[0] = p1ap3_0 - p2[0] - p4[0]; - t3[1] = p1ap3_1 - p2[1] - p4[1]; - /* col 4 */ - t4[0] = p1sp3_0 - p2[1] + p4[1]; - t4[1] = p1sp3_1 + p2[0] - p4[0]; - /* col 1 - top */ - *p1++ = p1ap3_0 + p2[0] + p4[0]; - *p1++ = p1ap3_1 + p2[1] + p4[1]; - - /* BOTTOM */ - p1ap3_1 = pEnd1[-1] + pEnd3[-1]; - p1sp3_1 = pEnd1[-1] - pEnd3[-1]; - p1ap3_0 = pEnd1[ 0] + pEnd3[0]; - p1sp3_0 = pEnd1[ 0] - pEnd3[0]; - /* col 2 */ - t2[2] = pEnd2[0] - pEnd4[0] + p1sp3_1; - t2[3] = pEnd1[0] - pEnd3[0] - pEnd2[-1] + pEnd4[-1]; - /* col 3 */ - t3[2] = p1ap3_1 - pEnd2[-1] - pEnd4[-1]; - t3[3] = p1ap3_0 - pEnd2[ 0] - pEnd4[ 0]; - /* col 4 */ - t4[2] = pEnd2[ 0] - pEnd4[ 0] - p1sp3_1; - t4[3] = pEnd4[-1] - pEnd2[-1] - p1sp3_0; - /* col 1 - Bottom */ - *pEnd1-- = p1ap3_0 + pEnd2[ 0] + pEnd4[ 0]; - *pEnd1-- = p1ap3_1 + pEnd2[-1] + pEnd4[-1]; - - /* COL 2 */ - /* read twiddle factors */ - twR = *tw2++; - twI = *tw2++; - /* multiply by twiddle factors */ - /* let Z1 = a + i(b), Z2 = c + i(d) */ - /* => Z1 * Z2 = (a*c - b*d) + i(b*c + a*d) */ - - /* Top */ - m0 = t2[0] * twR; - m1 = t2[1] * twI; - m2 = t2[1] * twR; - m3 = t2[0] * twI; - - *p2++ = m0 + m1; - *p2++ = m2 - m3; - /* use vertical symmetry col 2 */ - /* 0.9997 - 0.0245i <==> 0.0245 - 0.9997i */ - /* Bottom */ - m0 = t2[3] * twI; - m1 = t2[2] * twR; - m2 = t2[2] * twI; - m3 = t2[3] * twR; - - *pEnd2-- = m0 - m1; - *pEnd2-- = m2 + m3; - - /* COL 3 */ - twR = tw3[0]; - twI = tw3[1]; - tw3 += twMod3; - /* Top */ - m0 = t3[0] * twR; - m1 = t3[1] * twI; - m2 = t3[1] * twR; - m3 = t3[0] * twI; - - *p3++ = m0 + m1; - *p3++ = m2 - m3; - /* use vertical symmetry col 3 */ - /* 0.9988 - 0.0491i <==> -0.9988 - 0.0491i */ - /* Bottom */ - m0 = -t3[3] * twR; - m1 = t3[2] * twI; - m2 = t3[2] * twR; - m3 = t3[3] * twI; - - *pEnd3-- = m0 - m1; - *pEnd3-- = m3 - m2; - - /* COL 4 */ - twR = tw4[0]; - twI = tw4[1]; - tw4 += twMod4; - /* Top */ - m0 = t4[0] * twR; - m1 = t4[1] * twI; - m2 = t4[1] * twR; - m3 = t4[0] * twI; - - *p4++ = m0 + m1; - *p4++ = m2 - m3; - /* use vertical symmetry col 4 */ - /* 0.9973 - 0.0736i <==> -0.0736 + 0.9973i */ - /* Bottom */ - m0 = t4[3] * twI; - m1 = t4[2] * twR; - m2 = t4[2] * twI; - m3 = t4[3] * twR; - - *pEnd4-- = m0 - m1; - *pEnd4-- = m2 + m3; - } - - /* MIDDLE */ - /* Twiddle factors are */ - /* 1.0000 0.7071-0.7071i -1.0000i -0.7071-0.7071i */ - p1ap3_0 = p1[0] + p3[0]; - p1sp3_0 = p1[0] - p3[0]; - p1ap3_1 = p1[1] + p3[1]; - p1sp3_1 = p1[1] - p3[1]; - - /* col 2 */ - t2[0] = p1sp3_0 + p2[1] - p4[1]; - t2[1] = p1sp3_1 - p2[0] + p4[0]; - /* col 3 */ - t3[0] = p1ap3_0 - p2[0] - p4[0]; - t3[1] = p1ap3_1 - p2[1] - p4[1]; - /* col 4 */ - t4[0] = p1sp3_0 - p2[1] + p4[1]; - t4[1] = p1sp3_1 + p2[0] - p4[0]; - /* col 1 - Top */ - *p1++ = p1ap3_0 + p2[0] + p4[0]; - *p1++ = p1ap3_1 + p2[1] + p4[1]; - - /* COL 2 */ - twR = tw2[0]; - twI = tw2[1]; - - m0 = t2[0] * twR; - m1 = t2[1] * twI; - m2 = t2[1] * twR; - m3 = t2[0] * twI; - - *p2++ = m0 + m1; - *p2++ = m2 - m3; - /* COL 3 */ - twR = tw3[0]; - twI = tw3[1]; - - m0 = t3[0] * twR; - m1 = t3[1] * twI; - m2 = t3[1] * twR; - m3 = t3[0] * twI; - - *p3++ = m0 + m1; - *p3++ = m2 - m3; - /* COL 4 */ - twR = tw4[0]; - twI = tw4[1]; - - m0 = t4[0] * twR; - m1 = t4[1] * twI; - m2 = t4[1] * twR; - m3 = t4[0] * twI; - - *p4++ = m0 + m1; - *p4++ = m2 - m3; - - /* first col */ - arm_radix8_butterfly_f32 (pCol1, L, (float32_t *) S->pTwiddle, 4U); - - /* second col */ - arm_radix8_butterfly_f32 (pCol2, L, (float32_t *) S->pTwiddle, 4U); - - /* third col */ - arm_radix8_butterfly_f32 (pCol3, L, (float32_t *) S->pTwiddle, 4U); - - /* fourth col */ - arm_radix8_butterfly_f32 (pCol4, L, (float32_t *) S->pTwiddle, 4U); -} - -/** - @addtogroup ComplexFFT - @{ - */ - -/** - @brief Processing function for the floating-point complex FFT. - @param[in] S points to an instance of the floating-point CFFT structure - @param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place - @param[in] ifftFlag flag that selects transform direction - - value = 0: forward transform - - value = 1: inverse transform - @param[in] bitReverseFlag flag that enables / disables bit reversal of output - - value = 0: disables bit reversal of output - - value = 1: enables bit reversal of output - @return none - */ - -void arm_cfft_f32( - const arm_cfft_instance_f32 * S, - float32_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag) -{ - uint32_t L = S->fftLen, l; - float32_t invL, * pSrc; - - if (ifftFlag == 1U) - { - /* Conjugate input data */ - pSrc = p1 + 1; - for (l = 0; l < L; l++) - { - *pSrc = -*pSrc; - pSrc += 2; - } - } - - switch (L) - { - case 16: - case 128: - case 1024: - arm_cfft_radix8by2_f32 ( (arm_cfft_instance_f32 *) S, p1); - break; - case 32: - case 256: - case 2048: - arm_cfft_radix8by4_f32 ( (arm_cfft_instance_f32 *) S, p1); - break; - case 64: - case 512: - case 4096: - arm_radix8_butterfly_f32 ( p1, L, (float32_t *) S->pTwiddle, 1); - break; - } - - if ( bitReverseFlag ) - arm_bitreversal_32 ((uint32_t*) p1, S->bitRevLength, S->pBitRevTable); - - if (ifftFlag == 1U) - { - invL = 1.0f / (float32_t)L; - - /* Conjugate and scale output data */ - pSrc = p1; - for (l= 0; l < L; l++) - { - *pSrc++ *= invL ; - *pSrc = -(*pSrc) * invL; - pSrc++; - } - } -} -#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ - -/** - @} end of ComplexFFT group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cfft_f32.c
+ * Description: Combined Radix Decimation in Frequency CFFT Floating point processing function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+extern void arm_radix8_butterfly_f32(
+ float32_t * pSrc,
+ uint16_t fftLen,
+ const float32_t * pCoef,
+ uint16_t twidCoefModifier);
+
+extern void arm_bitreversal_32(
+ uint32_t * pSrc,
+ const uint16_t bitRevLen,
+ const uint16_t * pBitRevTable);
+
+/**
+ @ingroup groupTransforms
+ */
+
+/**
+ @defgroup ComplexFFT Complex FFT Functions
+
+ @par
+ The Fast Fourier Transform (FFT) is an efficient algorithm for computing the
+ Discrete Fourier Transform (DFT). The FFT can be orders of magnitude faster
+ than the DFT, especially for long lengths.
+ The algorithms described in this section
+ operate on complex data. A separate set of functions is devoted to handling
+ of real sequences.
+ @par
+ There are separate algorithms for handling floating-point, Q15, and Q31 data
+ types. The algorithms available for each data type are described next.
+ @par
+ The FFT functions operate in-place. That is, the array holding the input data
+ will also be used to hold the corresponding result. The input data is complex
+ and contains <code>2*fftLen</code> interleaved values as shown below.
+ <pre>{real[0], imag[0], real[1], imag[1], ...} </pre>
+ The FFT result will be contained in the same array and the frequency domain
+ values will have the same interleaving.
+
+ @par Floating-point
+ The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-8
+ stages are performed along with a single radix-2 or radix-4 stage, as needed.
+ The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
+ a different twiddle factor table.
+ @par
+ The function uses the standard FFT definition and output values may grow by a
+ factor of <code>fftLen</code> when computing the forward transform. The
+ inverse transform includes a scale of <code>1/fftLen</code> as part of the
+ calculation and this matches the textbook definition of the inverse FFT.
+ @par
+ Pre-initialized data structures containing twiddle factors and bit reversal
+ tables are provided and defined in <code>arm_const_structs.h</code>. Include
+ this header in your function and then pass one of the constant structures as
+ an argument to arm_cfft_f32. For example:
+ @par
+ <code>arm_cfft_f32(arm_cfft_sR_f32_len64, pSrc, 1, 1)</code>
+ @par
+ computes a 64-point inverse complex FFT including bit reversal.
+ The data structures are treated as constant data and not modified during the
+ calculation. The same data structure can be reused for multiple transforms
+ including mixing forward and inverse transforms.
+ @par
+ Earlier releases of the library provided separate radix-2 and radix-4
+ algorithms that operated on floating-point data. These functions are still
+ provided but are deprecated. The older functions are slower and less general
+ than the new functions.
+ @par
+ An example of initialization of the constants for the arm_cfft_f32 function follows:
+ @code
+ const static arm_cfft_instance_f32 *S;
+ ...
+ switch (length) {
+ case 16:
+ S = &arm_cfft_sR_f32_len16;
+ break;
+ case 32:
+ S = &arm_cfft_sR_f32_len32;
+ break;
+ case 64:
+ S = &arm_cfft_sR_f32_len64;
+ break;
+ case 128:
+ S = &arm_cfft_sR_f32_len128;
+ break;
+ case 256:
+ S = &arm_cfft_sR_f32_len256;
+ break;
+ case 512:
+ S = &arm_cfft_sR_f32_len512;
+ break;
+ case 1024:
+ S = &arm_cfft_sR_f32_len1024;
+ break;
+ case 2048:
+ S = &arm_cfft_sR_f32_len2048;
+ break;
+ case 4096:
+ S = &arm_cfft_sR_f32_len4096;
+ break;
+ }
+ @endcode
+ @par Q15 and Q31
+ The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-4
+ stages are performed along with a single radix-2 stage, as needed.
+ The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
+ a different twiddle factor table.
+ @par
+ The function uses the standard FFT definition and output values may grow by a
+ factor of <code>fftLen</code> when computing the forward transform. The
+ inverse transform includes a scale of <code>1/fftLen</code> as part of the
+ calculation and this matches the textbook definition of the inverse FFT.
+ @par
+ Pre-initialized data structures containing twiddle factors and bit reversal
+ tables are provided and defined in <code>arm_const_structs.h</code>. Include
+ this header in your function and then pass one of the constant structures as
+ an argument to arm_cfft_q31. For example:
+ @par
+ <code>arm_cfft_q31(arm_cfft_sR_q31_len64, pSrc, 1, 1)</code>
+ @par
+ computes a 64-point inverse complex FFT including bit reversal.
+ The data structures are treated as constant data and not modified during the
+ calculation. The same data structure can be reused for multiple transforms
+ including mixing forward and inverse transforms.
+ @par
+ Earlier releases of the library provided separate radix-2 and radix-4
+ algorithms that operated on floating-point data. These functions are still
+ provided but are deprecated. The older functions are slower and less general
+ than the new functions.
+ @par
+ An example of initialization of the constants for the arm_cfft_q31 function follows:
+ @code
+ const static arm_cfft_instance_q31 *S;
+ ...
+ switch (length) {
+ case 16:
+ S = &arm_cfft_sR_q31_len16;
+ break;
+ case 32:
+ S = &arm_cfft_sR_q31_len32;
+ break;
+ case 64:
+ S = &arm_cfft_sR_q31_len64;
+ break;
+ case 128:
+ S = &arm_cfft_sR_q31_len128;
+ break;
+ case 256:
+ S = &arm_cfft_sR_q31_len256;
+ break;
+ case 512:
+ S = &arm_cfft_sR_q31_len512;
+ break;
+ case 1024:
+ S = &arm_cfft_sR_q31_len1024;
+ break;
+ case 2048:
+ S = &arm_cfft_sR_q31_len2048;
+ break;
+ case 4096:
+ S = &arm_cfft_sR_q31_len4096;
+ break;
+ }
+ @endcode
+
+ */
+
+void arm_cfft_radix8by2_f32 (arm_cfft_instance_f32 * S, float32_t * p1)
+{
+ uint32_t L = S->fftLen;
+ float32_t * pCol1, * pCol2, * pMid1, * pMid2;
+ float32_t * p2 = p1 + L;
+ const float32_t * tw = (float32_t *) S->pTwiddle;
+ float32_t t1[4], t2[4], t3[4], t4[4], twR, twI;
+ float32_t m0, m1, m2, m3;
+ uint32_t l;
+
+ pCol1 = p1;
+ pCol2 = p2;
+
+ /* Define new length */
+ L >>= 1;
+
+ /* Initialize mid pointers */
+ pMid1 = p1 + L;
+ pMid2 = p2 + L;
+
+ /* do two dot Fourier transform */
+ for (l = L >> 2; l > 0; l-- )
+ {
+ t1[0] = p1[0];
+ t1[1] = p1[1];
+ t1[2] = p1[2];
+ t1[3] = p1[3];
+
+ t2[0] = p2[0];
+ t2[1] = p2[1];
+ t2[2] = p2[2];
+ t2[3] = p2[3];
+
+ t3[0] = pMid1[0];
+ t3[1] = pMid1[1];
+ t3[2] = pMid1[2];
+ t3[3] = pMid1[3];
+
+ t4[0] = pMid2[0];
+ t4[1] = pMid2[1];
+ t4[2] = pMid2[2];
+ t4[3] = pMid2[3];
+
+ *p1++ = t1[0] + t2[0];
+ *p1++ = t1[1] + t2[1];
+ *p1++ = t1[2] + t2[2];
+ *p1++ = t1[3] + t2[3]; /* col 1 */
+
+ t2[0] = t1[0] - t2[0];
+ t2[1] = t1[1] - t2[1];
+ t2[2] = t1[2] - t2[2];
+ t2[3] = t1[3] - t2[3]; /* for col 2 */
+
+ *pMid1++ = t3[0] + t4[0];
+ *pMid1++ = t3[1] + t4[1];
+ *pMid1++ = t3[2] + t4[2];
+ *pMid1++ = t3[3] + t4[3]; /* col 1 */
+
+ t4[0] = t4[0] - t3[0];
+ t4[1] = t4[1] - t3[1];
+ t4[2] = t4[2] - t3[2];
+ t4[3] = t4[3] - t3[3]; /* for col 2 */
+
+ twR = *tw++;
+ twI = *tw++;
+
+ /* multiply by twiddle factors */
+ m0 = t2[0] * twR;
+ m1 = t2[1] * twI;
+ m2 = t2[1] * twR;
+ m3 = t2[0] * twI;
+
+ /* R = R * Tr - I * Ti */
+ *p2++ = m0 + m1;
+ /* I = I * Tr + R * Ti */
+ *p2++ = m2 - m3;
+
+ /* use vertical symmetry */
+ /* 0.9988 - 0.0491i <==> -0.0491 - 0.9988i */
+ m0 = t4[0] * twI;
+ m1 = t4[1] * twR;
+ m2 = t4[1] * twI;
+ m3 = t4[0] * twR;
+
+ *pMid2++ = m0 - m1;
+ *pMid2++ = m2 + m3;
+
+ twR = *tw++;
+ twI = *tw++;
+
+ m0 = t2[2] * twR;
+ m1 = t2[3] * twI;
+ m2 = t2[3] * twR;
+ m3 = t2[2] * twI;
+
+ *p2++ = m0 + m1;
+ *p2++ = m2 - m3;
+
+ m0 = t4[2] * twI;
+ m1 = t4[3] * twR;
+ m2 = t4[3] * twI;
+ m3 = t4[2] * twR;
+
+ *pMid2++ = m0 - m1;
+ *pMid2++ = m2 + m3;
+ }
+
+ /* first col */
+ arm_radix8_butterfly_f32 (pCol1, L, (float32_t *) S->pTwiddle, 2U);
+
+ /* second col */
+ arm_radix8_butterfly_f32 (pCol2, L, (float32_t *) S->pTwiddle, 2U);
+}
+
+void arm_cfft_radix8by4_f32 (arm_cfft_instance_f32 * S, float32_t * p1)
+{
+ uint32_t L = S->fftLen >> 1;
+ float32_t * pCol1, *pCol2, *pCol3, *pCol4, *pEnd1, *pEnd2, *pEnd3, *pEnd4;
+ const float32_t *tw2, *tw3, *tw4;
+ float32_t * p2 = p1 + L;
+ float32_t * p3 = p2 + L;
+ float32_t * p4 = p3 + L;
+ float32_t t2[4], t3[4], t4[4], twR, twI;
+ float32_t p1ap3_0, p1sp3_0, p1ap3_1, p1sp3_1;
+ float32_t m0, m1, m2, m3;
+ uint32_t l, twMod2, twMod3, twMod4;
+
+ pCol1 = p1; /* points to real values by default */
+ pCol2 = p2;
+ pCol3 = p3;
+ pCol4 = p4;
+ pEnd1 = p2 - 1; /* points to imaginary values by default */
+ pEnd2 = p3 - 1;
+ pEnd3 = p4 - 1;
+ pEnd4 = pEnd3 + L;
+
+ tw2 = tw3 = tw4 = (float32_t *) S->pTwiddle;
+
+ L >>= 1;
+
+ /* do four dot Fourier transform */
+
+ twMod2 = 2;
+ twMod3 = 4;
+ twMod4 = 6;
+
+ /* TOP */
+ p1ap3_0 = p1[0] + p3[0];
+ p1sp3_0 = p1[0] - p3[0];
+ p1ap3_1 = p1[1] + p3[1];
+ p1sp3_1 = p1[1] - p3[1];
+
+ /* col 2 */
+ t2[0] = p1sp3_0 + p2[1] - p4[1];
+ t2[1] = p1sp3_1 - p2[0] + p4[0];
+ /* col 3 */
+ t3[0] = p1ap3_0 - p2[0] - p4[0];
+ t3[1] = p1ap3_1 - p2[1] - p4[1];
+ /* col 4 */
+ t4[0] = p1sp3_0 - p2[1] + p4[1];
+ t4[1] = p1sp3_1 + p2[0] - p4[0];
+ /* col 1 */
+ *p1++ = p1ap3_0 + p2[0] + p4[0];
+ *p1++ = p1ap3_1 + p2[1] + p4[1];
+
+ /* Twiddle factors are ones */
+ *p2++ = t2[0];
+ *p2++ = t2[1];
+ *p3++ = t3[0];
+ *p3++ = t3[1];
+ *p4++ = t4[0];
+ *p4++ = t4[1];
+
+ tw2 += twMod2;
+ tw3 += twMod3;
+ tw4 += twMod4;
+
+ for (l = (L - 2) >> 1; l > 0; l-- )
+ {
+ /* TOP */
+ p1ap3_0 = p1[0] + p3[0];
+ p1sp3_0 = p1[0] - p3[0];
+ p1ap3_1 = p1[1] + p3[1];
+ p1sp3_1 = p1[1] - p3[1];
+ /* col 2 */
+ t2[0] = p1sp3_0 + p2[1] - p4[1];
+ t2[1] = p1sp3_1 - p2[0] + p4[0];
+ /* col 3 */
+ t3[0] = p1ap3_0 - p2[0] - p4[0];
+ t3[1] = p1ap3_1 - p2[1] - p4[1];
+ /* col 4 */
+ t4[0] = p1sp3_0 - p2[1] + p4[1];
+ t4[1] = p1sp3_1 + p2[0] - p4[0];
+ /* col 1 - top */
+ *p1++ = p1ap3_0 + p2[0] + p4[0];
+ *p1++ = p1ap3_1 + p2[1] + p4[1];
+
+ /* BOTTOM */
+ p1ap3_1 = pEnd1[-1] + pEnd3[-1];
+ p1sp3_1 = pEnd1[-1] - pEnd3[-1];
+ p1ap3_0 = pEnd1[ 0] + pEnd3[0];
+ p1sp3_0 = pEnd1[ 0] - pEnd3[0];
+ /* col 2 */
+ t2[2] = pEnd2[0] - pEnd4[0] + p1sp3_1;
+ t2[3] = pEnd1[0] - pEnd3[0] - pEnd2[-1] + pEnd4[-1];
+ /* col 3 */
+ t3[2] = p1ap3_1 - pEnd2[-1] - pEnd4[-1];
+ t3[3] = p1ap3_0 - pEnd2[ 0] - pEnd4[ 0];
+ /* col 4 */
+ t4[2] = pEnd2[ 0] - pEnd4[ 0] - p1sp3_1;
+ t4[3] = pEnd4[-1] - pEnd2[-1] - p1sp3_0;
+ /* col 1 - Bottom */
+ *pEnd1-- = p1ap3_0 + pEnd2[ 0] + pEnd4[ 0];
+ *pEnd1-- = p1ap3_1 + pEnd2[-1] + pEnd4[-1];
+
+ /* COL 2 */
+ /* read twiddle factors */
+ twR = *tw2++;
+ twI = *tw2++;
+ /* multiply by twiddle factors */
+ /* let Z1 = a + i(b), Z2 = c + i(d) */
+ /* => Z1 * Z2 = (a*c - b*d) + i(b*c + a*d) */
+
+ /* Top */
+ m0 = t2[0] * twR;
+ m1 = t2[1] * twI;
+ m2 = t2[1] * twR;
+ m3 = t2[0] * twI;
+
+ *p2++ = m0 + m1;
+ *p2++ = m2 - m3;
+ /* use vertical symmetry col 2 */
+ /* 0.9997 - 0.0245i <==> 0.0245 - 0.9997i */
+ /* Bottom */
+ m0 = t2[3] * twI;
+ m1 = t2[2] * twR;
+ m2 = t2[2] * twI;
+ m3 = t2[3] * twR;
+
+ *pEnd2-- = m0 - m1;
+ *pEnd2-- = m2 + m3;
+
+ /* COL 3 */
+ twR = tw3[0];
+ twI = tw3[1];
+ tw3 += twMod3;
+ /* Top */
+ m0 = t3[0] * twR;
+ m1 = t3[1] * twI;
+ m2 = t3[1] * twR;
+ m3 = t3[0] * twI;
+
+ *p3++ = m0 + m1;
+ *p3++ = m2 - m3;
+ /* use vertical symmetry col 3 */
+ /* 0.9988 - 0.0491i <==> -0.9988 - 0.0491i */
+ /* Bottom */
+ m0 = -t3[3] * twR;
+ m1 = t3[2] * twI;
+ m2 = t3[2] * twR;
+ m3 = t3[3] * twI;
+
+ *pEnd3-- = m0 - m1;
+ *pEnd3-- = m3 - m2;
+
+ /* COL 4 */
+ twR = tw4[0];
+ twI = tw4[1];
+ tw4 += twMod4;
+ /* Top */
+ m0 = t4[0] * twR;
+ m1 = t4[1] * twI;
+ m2 = t4[1] * twR;
+ m3 = t4[0] * twI;
+
+ *p4++ = m0 + m1;
+ *p4++ = m2 - m3;
+ /* use vertical symmetry col 4 */
+ /* 0.9973 - 0.0736i <==> -0.0736 + 0.9973i */
+ /* Bottom */
+ m0 = t4[3] * twI;
+ m1 = t4[2] * twR;
+ m2 = t4[2] * twI;
+ m3 = t4[3] * twR;
+
+ *pEnd4-- = m0 - m1;
+ *pEnd4-- = m2 + m3;
+ }
+
+ /* MIDDLE */
+ /* Twiddle factors are */
+ /* 1.0000 0.7071-0.7071i -1.0000i -0.7071-0.7071i */
+ p1ap3_0 = p1[0] + p3[0];
+ p1sp3_0 = p1[0] - p3[0];
+ p1ap3_1 = p1[1] + p3[1];
+ p1sp3_1 = p1[1] - p3[1];
+
+ /* col 2 */
+ t2[0] = p1sp3_0 + p2[1] - p4[1];
+ t2[1] = p1sp3_1 - p2[0] + p4[0];
+ /* col 3 */
+ t3[0] = p1ap3_0 - p2[0] - p4[0];
+ t3[1] = p1ap3_1 - p2[1] - p4[1];
+ /* col 4 */
+ t4[0] = p1sp3_0 - p2[1] + p4[1];
+ t4[1] = p1sp3_1 + p2[0] - p4[0];
+ /* col 1 - Top */
+ *p1++ = p1ap3_0 + p2[0] + p4[0];
+ *p1++ = p1ap3_1 + p2[1] + p4[1];
+
+ /* COL 2 */
+ twR = tw2[0];
+ twI = tw2[1];
+
+ m0 = t2[0] * twR;
+ m1 = t2[1] * twI;
+ m2 = t2[1] * twR;
+ m3 = t2[0] * twI;
+
+ *p2++ = m0 + m1;
+ *p2++ = m2 - m3;
+ /* COL 3 */
+ twR = tw3[0];
+ twI = tw3[1];
+
+ m0 = t3[0] * twR;
+ m1 = t3[1] * twI;
+ m2 = t3[1] * twR;
+ m3 = t3[0] * twI;
+
+ *p3++ = m0 + m1;
+ *p3++ = m2 - m3;
+ /* COL 4 */
+ twR = tw4[0];
+ twI = tw4[1];
+
+ m0 = t4[0] * twR;
+ m1 = t4[1] * twI;
+ m2 = t4[1] * twR;
+ m3 = t4[0] * twI;
+
+ *p4++ = m0 + m1;
+ *p4++ = m2 - m3;
+
+ /* first col */
+ arm_radix8_butterfly_f32 (pCol1, L, (float32_t *) S->pTwiddle, 4U);
+
+ /* second col */
+ arm_radix8_butterfly_f32 (pCol2, L, (float32_t *) S->pTwiddle, 4U);
+
+ /* third col */
+ arm_radix8_butterfly_f32 (pCol3, L, (float32_t *) S->pTwiddle, 4U);
+
+ /* fourth col */
+ arm_radix8_butterfly_f32 (pCol4, L, (float32_t *) S->pTwiddle, 4U);
+}
+
+/**
+ @addtogroup ComplexFFT
+ @{
+ */
+
+/**
+ @brief Processing function for the floating-point complex FFT.
+ @param[in] S points to an instance of the floating-point CFFT structure
+ @param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
+ @param[in] ifftFlag flag that selects transform direction
+ - value = 0: forward transform
+ - value = 1: inverse transform
+ @param[in] bitReverseFlag flag that enables / disables bit reversal of output
+ - value = 0: disables bit reversal of output
+ - value = 1: enables bit reversal of output
+ @return none
+ */
+
+void arm_cfft_f32(
+ const arm_cfft_instance_f32 * S,
+ float32_t * p1,
+ uint8_t ifftFlag,
+ uint8_t bitReverseFlag)
+{
+ uint32_t L = S->fftLen, l;
+ float32_t invL, * pSrc;
+
+ if (ifftFlag == 1U)
+ {
+ /* Conjugate input data */
+ pSrc = p1 + 1;
+ for (l = 0; l < L; l++)
+ {
+ *pSrc = -*pSrc;
+ pSrc += 2;
+ }
+ }
+
+ switch (L)
+ {
+ case 16:
+ case 128:
+ case 1024:
+ arm_cfft_radix8by2_f32 ( (arm_cfft_instance_f32 *) S, p1);
+ break;
+ case 32:
+ case 256:
+ case 2048:
+ arm_cfft_radix8by4_f32 ( (arm_cfft_instance_f32 *) S, p1);
+ break;
+ case 64:
+ case 512:
+ case 4096:
+ arm_radix8_butterfly_f32 ( p1, L, (float32_t *) S->pTwiddle, 1);
+ break;
+ }
+
+ if ( bitReverseFlag )
+ arm_bitreversal_32 ((uint32_t*) p1, S->bitRevLength, S->pBitRevTable);
+
+ if (ifftFlag == 1U)
+ {
+ invL = 1.0f / (float32_t)L;
+
+ /* Conjugate and scale output data */
+ pSrc = p1;
+ for (l= 0; l < L; l++)
+ {
+ *pSrc++ *= invL ;
+ *pSrc = -(*pSrc) * invL;
+ pSrc++;
+ }
+ }
+}
+
+/**
+ @} end of ComplexFFT group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q15.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q15.c index 74a6e7a..a47dd02 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q15.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q15.c @@ -1,893 +1,332 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_cfft_q15.c - * Description: Combined Radix Decimation in Q15 Frequency CFFT processing function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" - -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - -#include "arm_vec_fft.h" - - -static void _arm_radix4_butterfly_q15_mve( - const arm_cfft_instance_q15 * S, - q15_t *pSrc, - uint32_t fftLen) -{ - q15x8_t vecTmp0, vecTmp1; - q15x8_t vecSum0, vecDiff0, vecSum1, vecDiff1; - q15x8_t vecA, vecB, vecC, vecD; - uint32_t blkCnt; - uint32_t n1, n2; - uint32_t stage = 0; - int32_t iter = 1; - static const int32_t strides[4] = { - (0 - 16) * (int32_t)sizeof(q15_t *), (4 - 16) * (int32_t)sizeof(q15_t *), - (8 - 16) * (int32_t)sizeof(q15_t *), (12 - 16) * (int32_t)sizeof(q15_t *) - }; - - /* - * Process first stages - * Each stage in middle stages provides two down scaling of the input - */ - n2 = fftLen; - n1 = n2; - n2 >>= 2u; - - for (int k = fftLen / 4u; k > 1; k >>= 2u) - { - q15_t const *p_rearranged_twiddle_tab_stride2 = - &S->rearranged_twiddle_stride2[ - S->rearranged_twiddle_tab_stride2_arr[stage]]; - q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[ - S->rearranged_twiddle_tab_stride3_arr[stage]]; - q15_t const *p_rearranged_twiddle_tab_stride1 = - &S->rearranged_twiddle_stride1[ - S->rearranged_twiddle_tab_stride1_arr[stage]]; - - q15_t * pBase = pSrc; - for (int i = 0; i < iter; i++) - { - q15_t *inA = pBase; - q15_t *inB = inA + n2 * CMPLX_DIM; - q15_t *inC = inB + n2 * CMPLX_DIM; - q15_t *inD = inC + n2 * CMPLX_DIM; - q15_t const *pW1 = p_rearranged_twiddle_tab_stride1; - q15_t const *pW2 = p_rearranged_twiddle_tab_stride2; - q15_t const *pW3 = p_rearranged_twiddle_tab_stride3; - q15x8_t vecW; - - blkCnt = n2 / 4; - /* - * load 4 x q15 complex pair - */ - vecA = vldrhq_s16(inA); - vecC = vldrhq_s16(inC); - while (blkCnt > 0U) - { - vecB = vldrhq_s16(inB); - vecD = vldrhq_s16(inD); - - vecSum0 = vhaddq(vecA, vecC); - vecDiff0 = vhsubq(vecA, vecC); - - vecSum1 = vhaddq(vecB, vecD); - vecDiff1 = vhsubq(vecB, vecD); - /* - * [ 1 1 1 1 ] * [ A B C D ]' .* 1 - */ - vecTmp0 = vhaddq(vecSum0, vecSum1); - vst1q(inA, vecTmp0); - inA += 8; - /* - * [ 1 -1 1 -1 ] * [ A B C D ]' - */ - vecTmp0 = vhsubq(vecSum0, vecSum1); - /* - * [ 1 -1 1 -1 ] * [ A B C D ]'.* W2 - */ - vecW = vld1q(pW2); - pW2 += 8; - vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q15x8_t); - - vst1q(inB, vecTmp1); - inB += 8; - /* - * [ 1 -i -1 +i ] * [ A B C D ]' - */ - vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1); - /* - * [ 1 -i -1 +i ] * [ A B C D ]'.* W1 - */ - vecW = vld1q(pW1); - pW1 += 8; - vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q15x8_t); - vst1q(inC, vecTmp1); - inC += 8; - - /* - * [ 1 +i -1 -i ] * [ A B C D ]' - */ - vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1); - /* - * [ 1 +i -1 -i ] * [ A B C D ]'.* W3 - */ - vecW = vld1q(pW3); - pW3 += 8; - vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q15x8_t); - vst1q(inD, vecTmp1); - inD += 8; - - vecA = vldrhq_s16(inA); - vecC = vldrhq_s16(inC); - - blkCnt--; - } - pBase += CMPLX_DIM * n1; - } - n1 = n2; - n2 >>= 2u; - iter = iter << 2; - stage++; - } - - /* - * start of Last stage process - */ - uint32x4_t vecScGathAddr = vld1q_u32 ((uint32_t*)strides); - vecScGathAddr = vecScGathAddr + (uint32_t) pSrc; - - /* - * load scheduling - */ - vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64); - vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8); - - blkCnt = (fftLen >> 4); - while (blkCnt > 0U) - { - vecSum0 = vhaddq(vecA, vecC); - vecDiff0 = vhsubq(vecA, vecC); - - vecB = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 4); - vecD = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 12); - - vecSum1 = vhaddq(vecB, vecD); - vecDiff1 = vhsubq(vecB, vecD); - /* - * pre-load for next iteration - */ - vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64); - vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8); - - vecTmp0 = vhaddq(vecSum0, vecSum1); - vstrwq_scatter_base_s32(vecScGathAddr, -64, (int32x4_t) vecTmp0); - - vecTmp0 = vhsubq(vecSum0, vecSum1); - vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (int32x4_t) vecTmp0); - - vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1); - vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (int32x4_t) vecTmp0); - - vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1); - vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (int32x4_t) vecTmp0); - - blkCnt--; - } - -} - -static void arm_cfft_radix4by2_q15_mve(const arm_cfft_instance_q15 *S, q15_t *pSrc, uint32_t fftLen) -{ - uint32_t n2; - q15_t *pIn0; - q15_t *pIn1; - const q15_t *pCoef = S->pTwiddle; - uint32_t blkCnt; - q15x8_t vecIn0, vecIn1, vecSum, vecDiff; - q15x8_t vecCmplxTmp, vecTw; - q15_t const *pCoefVec; - - n2 = fftLen >> 1; - - pIn0 = pSrc; - pIn1 = pSrc + fftLen; - pCoefVec = pCoef; - - blkCnt = n2 / 4; - - while (blkCnt > 0U) - { - vecIn0 = *(q15x8_t *) pIn0; - vecIn1 = *(q15x8_t *) pIn1; - - vecIn0 = vecIn0 >> 1; - vecIn1 = vecIn1 >> 1; - vecSum = vhaddq(vecIn0, vecIn1); - vst1q(pIn0, vecSum); - pIn0 += 8; - - vecTw = vld1q(pCoefVec); - pCoefVec += 8; - - vecDiff = vhsubq(vecIn0, vecIn1); - vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw, q15x8_t); - vst1q(pIn1, vecCmplxTmp); - pIn1 += 8; - - blkCnt--; - } - - _arm_radix4_butterfly_q15_mve(S, pSrc, n2); - - _arm_radix4_butterfly_q15_mve(S, pSrc + fftLen, n2); - - - pIn0 = pSrc; - blkCnt = (fftLen << 1) >> 3; - while (blkCnt > 0U) - { - vecIn0 = *(q15x8_t *) pIn0; - vecIn0 = vecIn0 << 1; - vst1q(pIn0, vecIn0); - pIn0 += 8; - blkCnt--; - } - /* - * tail - * (will be merged thru tail predication) - */ - blkCnt = (fftLen << 1) & 7; - if (blkCnt > 0U) - { - mve_pred16_t p0 = vctp16q(blkCnt); - - vecIn0 = *(q15x8_t *) pIn0; - vecIn0 = vecIn0 << 1; - vstrhq_p(pIn0, vecIn0, p0); - } -} - -static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S,q15_t *pSrc, uint32_t fftLen) -{ - q15x8_t vecTmp0, vecTmp1; - q15x8_t vecSum0, vecDiff0, vecSum1, vecDiff1; - q15x8_t vecA, vecB, vecC, vecD; - uint32_t blkCnt; - uint32_t n1, n2; - uint32_t stage = 0; - int32_t iter = 1; - static const int32_t strides[4] = { - (0 - 16) * (int32_t)sizeof(q15_t *), (4 - 16) * (int32_t)sizeof(q15_t *), - (8 - 16) * (int32_t)sizeof(q15_t *), (12 - 16) * (int32_t)sizeof(q15_t *) - }; - - - /* - * Process first stages - * Each stage in middle stages provides two down scaling of the input - */ - n2 = fftLen; - n1 = n2; - n2 >>= 2u; - - for (int k = fftLen / 4u; k > 1; k >>= 2u) - { - q15_t const *p_rearranged_twiddle_tab_stride2 = - &S->rearranged_twiddle_stride2[ - S->rearranged_twiddle_tab_stride2_arr[stage]]; - q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[ - S->rearranged_twiddle_tab_stride3_arr[stage]]; - q15_t const *p_rearranged_twiddle_tab_stride1 = - &S->rearranged_twiddle_stride1[ - S->rearranged_twiddle_tab_stride1_arr[stage]]; - - q15_t * pBase = pSrc; - for (int i = 0; i < iter; i++) - { - q15_t *inA = pBase; - q15_t *inB = inA + n2 * CMPLX_DIM; - q15_t *inC = inB + n2 * CMPLX_DIM; - q15_t *inD = inC + n2 * CMPLX_DIM; - q15_t const *pW1 = p_rearranged_twiddle_tab_stride1; - q15_t const *pW2 = p_rearranged_twiddle_tab_stride2; - q15_t const *pW3 = p_rearranged_twiddle_tab_stride3; - q15x8_t vecW; - - - blkCnt = n2 / 4; - /* - * load 4 x q15 complex pair - */ - vecA = vldrhq_s16(inA); - vecC = vldrhq_s16(inC); - while (blkCnt > 0U) - { - vecB = vldrhq_s16(inB); - vecD = vldrhq_s16(inD); - - vecSum0 = vhaddq(vecA, vecC); - vecDiff0 = vhsubq(vecA, vecC); - - vecSum1 = vhaddq(vecB, vecD); - vecDiff1 = vhsubq(vecB, vecD); - /* - * [ 1 1 1 1 ] * [ A B C D ]' .* 1 - */ - vecTmp0 = vhaddq(vecSum0, vecSum1); - vst1q(inA, vecTmp0); - inA += 8; - /* - * [ 1 -1 1 -1 ] * [ A B C D ]' - */ - vecTmp0 = vhsubq(vecSum0, vecSum1); - /* - * [ 1 -1 1 -1 ] * [ A B C D ]'.* W2 - */ - vecW = vld1q(pW2); - pW2 += 8; - vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q15x8_t); - - vst1q(inB, vecTmp1); - inB += 8; - /* - * [ 1 -i -1 +i ] * [ A B C D ]' - */ - vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1); - /* - * [ 1 -i -1 +i ] * [ A B C D ]'.* W1 - */ - vecW = vld1q(pW1); - pW1 += 8; - vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q15x8_t); - vst1q(inC, vecTmp1); - inC += 8; - /* - * [ 1 +i -1 -i ] * [ A B C D ]' - */ - vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1); - /* - * [ 1 +i -1 -i ] * [ A B C D ]'.* W3 - */ - vecW = vld1q(pW3); - pW3 += 8; - vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q15x8_t); - vst1q(inD, vecTmp1); - inD += 8; - - vecA = vldrhq_s16(inA); - vecC = vldrhq_s16(inC); - - blkCnt--; - } - pBase += CMPLX_DIM * n1; - } - n1 = n2; - n2 >>= 2u; - iter = iter << 2; - stage++; - } - - /* - * start of Last stage process - */ - uint32x4_t vecScGathAddr = vld1q_u32((uint32_t*)strides); - vecScGathAddr = vecScGathAddr + (uint32_t) pSrc; - - /* - * load scheduling - */ - vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64); - vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8); - - blkCnt = (fftLen >> 4); - while (blkCnt > 0U) - { - vecSum0 = vhaddq(vecA, vecC); - vecDiff0 = vhsubq(vecA, vecC); - - vecB = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 4); - vecD = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 12); - - vecSum1 = vhaddq(vecB, vecD); - vecDiff1 = vhsubq(vecB, vecD); - /* - * pre-load for next iteration - */ - vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64); - vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8); - - vecTmp0 = vhaddq(vecSum0, vecSum1); - vstrwq_scatter_base_s32(vecScGathAddr, -64, (int32x4_t) vecTmp0); - - vecTmp0 = vhsubq(vecSum0, vecSum1); - vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (int32x4_t) vecTmp0); - - vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1); - vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (int32x4_t) vecTmp0); - - vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1); - vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (int32x4_t) vecTmp0); - - blkCnt--; - } -} - -static void arm_cfft_radix4by2_inverse_q15_mve(const arm_cfft_instance_q15 *S, q15_t *pSrc, uint32_t fftLen) -{ - uint32_t n2; - q15_t *pIn0; - q15_t *pIn1; - const q15_t *pCoef = S->pTwiddle; - - uint32_t blkCnt; - q15x8_t vecIn0, vecIn1, vecSum, vecDiff; - q15x8_t vecCmplxTmp, vecTw; - q15_t const *pCoefVec; - - n2 = fftLen >> 1; - - pIn0 = pSrc; - pIn1 = pSrc + fftLen; - pCoefVec = pCoef; - - blkCnt = n2 / 4; - - while (blkCnt > 0U) - { - vecIn0 = *(q15x8_t *) pIn0; - vecIn1 = *(q15x8_t *) pIn1; - - vecIn0 = vecIn0 >> 1; - vecIn1 = vecIn1 >> 1; - vecSum = vhaddq(vecIn0, vecIn1); - vst1q(pIn0, vecSum); - pIn0 += 8; - - vecTw = vld1q(pCoefVec); - pCoefVec += 8; - - vecDiff = vhsubq(vecIn0, vecIn1); - vecCmplxTmp = vqrdmlsdhq(vuninitializedq_s16() , vecDiff, vecTw); - vecCmplxTmp = vqrdmladhxq(vecCmplxTmp, vecDiff, vecTw); - vst1q(pIn1, vecCmplxTmp); - pIn1 += 8; - - blkCnt--; - } - - - _arm_radix4_butterfly_inverse_q15_mve(S, pSrc, n2); - - _arm_radix4_butterfly_inverse_q15_mve(S, pSrc + fftLen, n2); - - pIn0 = pSrc; - blkCnt = (fftLen << 1) >> 3; - while (blkCnt > 0U) - { - vecIn0 = *(q15x8_t *) pIn0; - vecIn0 = vecIn0 << 1; - vst1q(pIn0, vecIn0); - pIn0 += 8; - blkCnt--; - } - /* - * tail - * (will be merged thru tail predication) - */ - blkCnt = (fftLen << 1) & 7; - while (blkCnt > 0U) - { - mve_pred16_t p0 = vctp16q(blkCnt); - - vecIn0 = *(q15x8_t *) pIn0; - vecIn0 = vecIn0 << 1; - vstrhq_p(pIn0, vecIn0, p0); - } -} - -/** - @ingroup groupTransforms - */ - -/** - @addtogroup ComplexFFT - @{ - */ - -/** - @brief Processing function for Q15 complex FFT. - @param[in] S points to an instance of Q15 CFFT structure - @param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place - @param[in] ifftFlag flag that selects transform direction - - value = 0: forward transform - - value = 1: inverse transform - @param[in] bitReverseFlag flag that enables / disables bit reversal of output - - value = 0: disables bit reversal of output - - value = 1: enables bit reversal of output - @return none - */ -void arm_cfft_q15( - const arm_cfft_instance_q15 * S, - q15_t * pSrc, - uint8_t ifftFlag, - uint8_t bitReverseFlag) -{ - uint32_t fftLen = S->fftLen; - - if (ifftFlag == 1U) { - - switch (fftLen) { - case 16: - case 64: - case 256: - case 1024: - case 4096: - _arm_radix4_butterfly_inverse_q15_mve(S, pSrc, fftLen); - break; - - case 32: - case 128: - case 512: - case 2048: - arm_cfft_radix4by2_inverse_q15_mve(S, pSrc, fftLen); - break; - } - } else { - switch (fftLen) { - case 16: - case 64: - case 256: - case 1024: - case 4096: - _arm_radix4_butterfly_q15_mve(S, pSrc, fftLen); - break; - - case 32: - case 128: - case 512: - case 2048: - arm_cfft_radix4by2_q15_mve(S, pSrc, fftLen); - break; - } - } - - - if (bitReverseFlag) - { - - arm_bitreversal_16_inpl_mve((uint16_t*)pSrc, S->bitRevLength, S->pBitRevTable); - - } -} - -#else - -extern void arm_radix4_butterfly_q15( - q15_t * pSrc, - uint32_t fftLen, - const q15_t * pCoef, - uint32_t twidCoefModifier); - -extern void arm_radix4_butterfly_inverse_q15( - q15_t * pSrc, - uint32_t fftLen, - const q15_t * pCoef, - uint32_t twidCoefModifier); - -extern void arm_bitreversal_16( - uint16_t * pSrc, - const uint16_t bitRevLen, - const uint16_t * pBitRevTable); - -void arm_cfft_radix4by2_q15( - q15_t * pSrc, - uint32_t fftLen, - const q15_t * pCoef); - -void arm_cfft_radix4by2_inverse_q15( - q15_t * pSrc, - uint32_t fftLen, - const q15_t * pCoef); - -/** - @ingroup groupTransforms - */ - -/** - @addtogroup ComplexFFT - @{ - */ - -/** - @brief Processing function for Q15 complex FFT. - @param[in] S points to an instance of Q15 CFFT structure - @param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place - @param[in] ifftFlag flag that selects transform direction - - value = 0: forward transform - - value = 1: inverse transform - @param[in] bitReverseFlag flag that enables / disables bit reversal of output - - value = 0: disables bit reversal of output - - value = 1: enables bit reversal of output - @return none - */ - -void arm_cfft_q15( - const arm_cfft_instance_q15 * S, - q15_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag) -{ - uint32_t L = S->fftLen; - - if (ifftFlag == 1U) - { - switch (L) - { - case 16: - case 64: - case 256: - case 1024: - case 4096: - arm_radix4_butterfly_inverse_q15 ( p1, L, (q15_t*)S->pTwiddle, 1 ); - break; - - case 32: - case 128: - case 512: - case 2048: - arm_cfft_radix4by2_inverse_q15 ( p1, L, S->pTwiddle ); - break; - } - } - else - { - switch (L) - { - case 16: - case 64: - case 256: - case 1024: - case 4096: - arm_radix4_butterfly_q15 ( p1, L, (q15_t*)S->pTwiddle, 1 ); - break; - - case 32: - case 128: - case 512: - case 2048: - arm_cfft_radix4by2_q15 ( p1, L, S->pTwiddle ); - break; - } - } - - if ( bitReverseFlag ) - arm_bitreversal_16 ((uint16_t*) p1, S->bitRevLength, S->pBitRevTable); -} - -/** - @} end of ComplexFFT group - */ - -void arm_cfft_radix4by2_q15( - q15_t * pSrc, - uint32_t fftLen, - const q15_t * pCoef) -{ - uint32_t i; - uint32_t n2; - q15_t p0, p1, p2, p3; -#if defined (ARM_MATH_DSP) - q31_t T, S, R; - q31_t coeff, out1, out2; - const q15_t *pC = pCoef; - q15_t *pSi = pSrc; - q15_t *pSl = pSrc + fftLen; -#else - uint32_t l; - q15_t xt, yt, cosVal, sinVal; -#endif - - n2 = fftLen >> 1U; - -#if defined (ARM_MATH_DSP) - - for (i = n2; i > 0; i--) - { - coeff = read_q15x2_ia (&pC); - - T = read_q15x2 (pSi); - T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */ - - S = read_q15x2 (pSl); - S = __SHADD16(S, 0); /* this is just a SIMD arithmetic shift right by 1 */ - - R = __QSUB16(T, S); - - write_q15x2_ia (&pSi, __SHADD16(T, S)); - -#ifndef ARM_MATH_BIG_ENDIAN - out1 = __SMUAD(coeff, R) >> 16U; - out2 = __SMUSDX(coeff, R); -#else - out1 = __SMUSDX(R, coeff) >> 16U; - out2 = __SMUAD(coeff, R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - write_q15x2_ia (&pSl, (q31_t)__PKHBT( out1, out2, 0 ) ); - } - -#else /* #if defined (ARM_MATH_DSP) */ - - for (i = 0; i < n2; i++) - { - cosVal = pCoef[2 * i]; - sinVal = pCoef[2 * i + 1]; - - l = i + n2; - - xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U); - pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U; - - yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U); - pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U; - - pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16U)) + - ((int16_t) (((q31_t) yt * sinVal) >> 16U)) ); - - pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16U)) - - ((int16_t) (((q31_t) xt * sinVal) >> 16U)) ); - } - -#endif /* #if defined (ARM_MATH_DSP) */ - - /* first col */ - arm_radix4_butterfly_q15( pSrc, n2, (q15_t*)pCoef, 2U); - - /* second col */ - arm_radix4_butterfly_q15( pSrc + fftLen, n2, (q15_t*)pCoef, 2U); - - n2 = fftLen >> 1U; - for (i = 0; i < n2; i++) - { - p0 = pSrc[4 * i + 0]; - p1 = pSrc[4 * i + 1]; - p2 = pSrc[4 * i + 2]; - p3 = pSrc[4 * i + 3]; - - p0 <<= 1U; - p1 <<= 1U; - p2 <<= 1U; - p3 <<= 1U; - - pSrc[4 * i + 0] = p0; - pSrc[4 * i + 1] = p1; - pSrc[4 * i + 2] = p2; - pSrc[4 * i + 3] = p3; - } - -} - -void arm_cfft_radix4by2_inverse_q15( - q15_t * pSrc, - uint32_t fftLen, - const q15_t * pCoef) -{ - uint32_t i; - uint32_t n2; - q15_t p0, p1, p2, p3; -#if defined (ARM_MATH_DSP) - q31_t T, S, R; - q31_t coeff, out1, out2; - const q15_t *pC = pCoef; - q15_t *pSi = pSrc; - q15_t *pSl = pSrc + fftLen; -#else - uint32_t l; - q15_t xt, yt, cosVal, sinVal; -#endif - - n2 = fftLen >> 1U; - -#if defined (ARM_MATH_DSP) - - for (i = n2; i > 0; i--) - { - coeff = read_q15x2_ia (&pC); - - T = read_q15x2 (pSi); - T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */ - - S = read_q15x2 (pSl); - S = __SHADD16(S, 0); /* this is just a SIMD arithmetic shift right by 1 */ - - R = __QSUB16(T, S); - - write_q15x2_ia (&pSi, __SHADD16(T, S)); - -#ifndef ARM_MATH_BIG_ENDIAN - out1 = __SMUSD(coeff, R) >> 16U; - out2 = __SMUADX(coeff, R); -#else - out1 = __SMUADX(R, coeff) >> 16U; - out2 = __SMUSD(__QSUB(0, coeff), R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - write_q15x2_ia (&pSl, (q31_t)__PKHBT( out1, out2, 0 )); - } - -#else /* #if defined (ARM_MATH_DSP) */ - - for (i = 0; i < n2; i++) - { - cosVal = pCoef[2 * i]; - sinVal = pCoef[2 * i + 1]; - - l = i + n2; - - xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U); - pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U; - - yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U); - pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U; - - pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16U)) - - ((int16_t) (((q31_t) yt * sinVal) >> 16U)) ); - - pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16U)) + - ((int16_t) (((q31_t) xt * sinVal) >> 16U)) ); - } - -#endif /* #if defined (ARM_MATH_DSP) */ - - /* first col */ - arm_radix4_butterfly_inverse_q15( pSrc, n2, (q15_t*)pCoef, 2U); - - /* second col */ - arm_radix4_butterfly_inverse_q15( pSrc + fftLen, n2, (q15_t*)pCoef, 2U); - - n2 = fftLen >> 1U; - for (i = 0; i < n2; i++) - { - p0 = pSrc[4 * i + 0]; - p1 = pSrc[4 * i + 1]; - p2 = pSrc[4 * i + 2]; - p3 = pSrc[4 * i + 3]; - - p0 <<= 1U; - p1 <<= 1U; - p2 <<= 1U; - p3 <<= 1U; - - pSrc[4 * i + 0] = p0; - pSrc[4 * i + 1] = p1; - pSrc[4 * i + 2] = p2; - pSrc[4 * i + 3] = p3; - } -} - -#endif /* defined(ARM_MATH_MVEI) */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cfft_q15.c
+ * Description: Combined Radix Decimation in Q15 Frequency CFFT processing function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+extern void arm_radix4_butterfly_q15(
+ q15_t * pSrc,
+ uint32_t fftLen,
+ const q15_t * pCoef,
+ uint32_t twidCoefModifier);
+
+extern void arm_radix4_butterfly_inverse_q15(
+ q15_t * pSrc,
+ uint32_t fftLen,
+ const q15_t * pCoef,
+ uint32_t twidCoefModifier);
+
+extern void arm_bitreversal_16(
+ uint16_t * pSrc,
+ const uint16_t bitRevLen,
+ const uint16_t * pBitRevTable);
+
+void arm_cfft_radix4by2_q15(
+ q15_t * pSrc,
+ uint32_t fftLen,
+ const q15_t * pCoef);
+
+void arm_cfft_radix4by2_inverse_q15(
+ q15_t * pSrc,
+ uint32_t fftLen,
+ const q15_t * pCoef);
+
+/**
+ @ingroup groupTransforms
+ */
+
+/**
+ @addtogroup ComplexFFT
+ @{
+ */
+
+/**
+ @brief Processing function for Q15 complex FFT.
+ @param[in] S points to an instance of Q15 CFFT structure
+ @param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
+ @param[in] ifftFlag flag that selects transform direction
+ - value = 0: forward transform
+ - value = 1: inverse transform
+ @param[in] bitReverseFlag flag that enables / disables bit reversal of output
+ - value = 0: disables bit reversal of output
+ - value = 1: enables bit reversal of output
+ @return none
+ */
+
+void arm_cfft_q15(
+ const arm_cfft_instance_q15 * S,
+ q15_t * p1,
+ uint8_t ifftFlag,
+ uint8_t bitReverseFlag)
+{
+ uint32_t L = S->fftLen;
+
+ if (ifftFlag == 1U)
+ {
+ switch (L)
+ {
+ case 16:
+ case 64:
+ case 256:
+ case 1024:
+ case 4096:
+ arm_radix4_butterfly_inverse_q15 ( p1, L, (q15_t*)S->pTwiddle, 1 );
+ break;
+
+ case 32:
+ case 128:
+ case 512:
+ case 2048:
+ arm_cfft_radix4by2_inverse_q15 ( p1, L, S->pTwiddle );
+ break;
+ }
+ }
+ else
+ {
+ switch (L)
+ {
+ case 16:
+ case 64:
+ case 256:
+ case 1024:
+ case 4096:
+ arm_radix4_butterfly_q15 ( p1, L, (q15_t*)S->pTwiddle, 1 );
+ break;
+
+ case 32:
+ case 128:
+ case 512:
+ case 2048:
+ arm_cfft_radix4by2_q15 ( p1, L, S->pTwiddle );
+ break;
+ }
+ }
+
+ if ( bitReverseFlag )
+ arm_bitreversal_16 ((uint16_t*) p1, S->bitRevLength, S->pBitRevTable);
+}
+
+/**
+ @} end of ComplexFFT group
+ */
+
+void arm_cfft_radix4by2_q15(
+ q15_t * pSrc,
+ uint32_t fftLen,
+ const q15_t * pCoef)
+{
+ uint32_t i;
+ uint32_t n2;
+ q15_t p0, p1, p2, p3;
+#if defined (ARM_MATH_DSP)
+ q31_t T, S, R;
+ q31_t coeff, out1, out2;
+ const q15_t *pC = pCoef;
+ q15_t *pSi = pSrc;
+ q15_t *pSl = pSrc + fftLen;
+#else
+ uint32_t l;
+ q15_t xt, yt, cosVal, sinVal;
+#endif
+
+ n2 = fftLen >> 1U;
+
+#if defined (ARM_MATH_DSP)
+
+ for (i = n2; i > 0; i--)
+ {
+ coeff = read_q15x2_ia ((q15_t **) &pC);
+
+ T = read_q15x2 (pSi);
+ T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */
+
+ S = read_q15x2 (pSl);
+ S = __SHADD16(S, 0); /* this is just a SIMD arithmetic shift right by 1 */
+
+ R = __QSUB16(T, S);
+
+ write_q15x2_ia (&pSi, __SHADD16(T, S));
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ out1 = __SMUAD(coeff, R) >> 16U;
+ out2 = __SMUSDX(coeff, R);
+#else
+ out1 = __SMUSDX(R, coeff) >> 16U;
+ out2 = __SMUAD(coeff, R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ write_q15x2_ia (&pSl, (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+ }
+
+#else /* #if defined (ARM_MATH_DSP) */
+
+ for (i = 0; i < n2; i++)
+ {
+ cosVal = pCoef[2 * i];
+ sinVal = pCoef[2 * i + 1];
+
+ l = i + n2;
+
+ xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
+ pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
+
+ yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
+ pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
+
+ pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16U)) +
+ ((int16_t) (((q31_t) yt * sinVal) >> 16U)) );
+
+ pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16U)) -
+ ((int16_t) (((q31_t) xt * sinVal) >> 16U)) );
+ }
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* first col */
+ arm_radix4_butterfly_q15( pSrc, n2, (q15_t*)pCoef, 2U);
+
+ /* second col */
+ arm_radix4_butterfly_q15( pSrc + fftLen, n2, (q15_t*)pCoef, 2U);
+
+ n2 = fftLen >> 1U;
+ for (i = 0; i < n2; i++)
+ {
+ p0 = pSrc[4 * i + 0];
+ p1 = pSrc[4 * i + 1];
+ p2 = pSrc[4 * i + 2];
+ p3 = pSrc[4 * i + 3];
+
+ p0 <<= 1U;
+ p1 <<= 1U;
+ p2 <<= 1U;
+ p3 <<= 1U;
+
+ pSrc[4 * i + 0] = p0;
+ pSrc[4 * i + 1] = p1;
+ pSrc[4 * i + 2] = p2;
+ pSrc[4 * i + 3] = p3;
+ }
+
+}
+
+void arm_cfft_radix4by2_inverse_q15(
+ q15_t * pSrc,
+ uint32_t fftLen,
+ const q15_t * pCoef)
+{
+ uint32_t i;
+ uint32_t n2;
+ q15_t p0, p1, p2, p3;
+#if defined (ARM_MATH_DSP)
+ q31_t T, S, R;
+ q31_t coeff, out1, out2;
+ const q15_t *pC = pCoef;
+ q15_t *pSi = pSrc;
+ q15_t *pSl = pSrc + fftLen;
+#else
+ uint32_t l;
+ q15_t xt, yt, cosVal, sinVal;
+#endif
+
+ n2 = fftLen >> 1U;
+
+#if defined (ARM_MATH_DSP)
+
+ for (i = n2; i > 0; i--)
+ {
+ coeff = read_q15x2_ia ((q15_t **) &pC);
+
+ T = read_q15x2 (pSi);
+ T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */
+
+ S = read_q15x2 (pSl);
+ S = __SHADD16(S, 0); /* this is just a SIMD arithmetic shift right by 1 */
+
+ R = __QSUB16(T, S);
+
+ write_q15x2_ia (&pSi, __SHADD16(T, S));
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ out1 = __SMUSD(coeff, R) >> 16U;
+ out2 = __SMUADX(coeff, R);
+#else
+ out1 = __SMUADX(R, coeff) >> 16U;
+ out2 = __SMUSD(__QSUB(0, coeff), R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ write_q15x2_ia (&pSl, (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+ }
+
+#else /* #if defined (ARM_MATH_DSP) */
+
+ for (i = 0; i < n2; i++)
+ {
+ cosVal = pCoef[2 * i];
+ sinVal = pCoef[2 * i + 1];
+
+ l = i + n2;
+
+ xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
+ pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
+
+ yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
+ pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
+
+ pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16U)) -
+ ((int16_t) (((q31_t) yt * sinVal) >> 16U)) );
+
+ pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16U)) +
+ ((int16_t) (((q31_t) xt * sinVal) >> 16U)) );
+ }
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* first col */
+ arm_radix4_butterfly_inverse_q15( pSrc, n2, (q15_t*)pCoef, 2U);
+
+ /* second col */
+ arm_radix4_butterfly_inverse_q15( pSrc + fftLen, n2, (q15_t*)pCoef, 2U);
+
+ n2 = fftLen >> 1U;
+ for (i = 0; i < n2; i++)
+ {
+ p0 = pSrc[4 * i + 0];
+ p1 = pSrc[4 * i + 1];
+ p2 = pSrc[4 * i + 2];
+ p3 = pSrc[4 * i + 3];
+
+ p0 <<= 1U;
+ p1 <<= 1U;
+ p2 <<= 1U;
+ p3 <<= 1U;
+
+ pSrc[4 * i + 0] = p0;
+ pSrc[4 * i + 1] = p1;
+ pSrc[4 * i + 2] = p2;
+ pSrc[4 * i + 3] = p3;
+ }
+}
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q31.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q31.c index 78ce505..785942f 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q31.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q31.c @@ -1,847 +1,254 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_cfft_q31.c - * Description: Combined Radix Decimation in Frequency CFFT fixed point processing function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" - - - -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - -#include "arm_vec_fft.h" - - -static void _arm_radix4_butterfly_q31_mve( - const arm_cfft_instance_q31 * S, - q31_t *pSrc, - uint32_t fftLen) -{ - q31x4_t vecTmp0, vecTmp1; - q31x4_t vecSum0, vecDiff0, vecSum1, vecDiff1; - q31x4_t vecA, vecB, vecC, vecD; - uint32_t blkCnt; - uint32_t n1, n2; - uint32_t stage = 0; - int32_t iter = 1; - static const int32_t strides[4] = { - (0 - 16) * (int32_t)sizeof(q31_t *), (1 - 16) * (int32_t)sizeof(q31_t *), - (8 - 16) * (int32_t)sizeof(q31_t *), (9 - 16) * (int32_t)sizeof(q31_t *) - }; - - - /* - * Process first stages - * Each stage in middle stages provides two down scaling of the input - */ - n2 = fftLen; - n1 = n2; - n2 >>= 2u; - - for (int k = fftLen / 4u; k > 1; k >>= 2u) - { - q31_t const *p_rearranged_twiddle_tab_stride2 = - &S->rearranged_twiddle_stride2[ - S->rearranged_twiddle_tab_stride2_arr[stage]]; - q31_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[ - S->rearranged_twiddle_tab_stride3_arr[stage]]; - q31_t const *p_rearranged_twiddle_tab_stride1 = - &S->rearranged_twiddle_stride1[ - S->rearranged_twiddle_tab_stride1_arr[stage]]; - - q31_t * pBase = pSrc; - for (int i = 0; i < iter; i++) - { - q31_t *inA = pBase; - q31_t *inB = inA + n2 * CMPLX_DIM; - q31_t *inC = inB + n2 * CMPLX_DIM; - q31_t *inD = inC + n2 * CMPLX_DIM; - q31_t const *pW1 = p_rearranged_twiddle_tab_stride1; - q31_t const *pW2 = p_rearranged_twiddle_tab_stride2; - q31_t const *pW3 = p_rearranged_twiddle_tab_stride3; - q31x4_t vecW; - - - blkCnt = n2 / 2; - /* - * load 2 x q31 complex pair - */ - vecA = vldrwq_s32(inA); - vecC = vldrwq_s32(inC); - while (blkCnt > 0U) - { - vecB = vldrwq_s32(inB); - vecD = vldrwq_s32(inD); - - vecSum0 = vhaddq(vecA, vecC); - vecDiff0 = vhsubq(vecA, vecC); - - vecSum1 = vhaddq(vecB, vecD); - vecDiff1 = vhsubq(vecB, vecD); - /* - * [ 1 1 1 1 ] * [ A B C D ]' .* 1 - */ - vecTmp0 = vhaddq(vecSum0, vecSum1); - vst1q(inA, vecTmp0); - inA += 4; - /* - * [ 1 -1 1 -1 ] * [ A B C D ]' - */ - vecTmp0 = vhsubq(vecSum0, vecSum1); - /* - * [ 1 -1 1 -1 ] * [ A B C D ]'.* W2 - */ - vecW = vld1q(pW2); - pW2 += 4; - vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q31x4_t); - - vst1q(inB, vecTmp1); - inB += 4; - /* - * [ 1 -i -1 +i ] * [ A B C D ]' - */ - vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1); - /* - * [ 1 -i -1 +i ] * [ A B C D ]'.* W1 - */ - vecW = vld1q(pW1); - pW1 += 4; - vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q31x4_t); - vst1q(inC, vecTmp1); - inC += 4; - /* - * [ 1 +i -1 -i ] * [ A B C D ]' - */ - vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1); - /* - * [ 1 +i -1 -i ] * [ A B C D ]'.* W3 - */ - vecW = vld1q(pW3); - pW3 += 4; - vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q31x4_t); - vst1q(inD, vecTmp1); - inD += 4; - - vecA = vldrwq_s32(inA); - vecC = vldrwq_s32(inC); - - blkCnt--; - } - pBase += CMPLX_DIM * n1; - } - n1 = n2; - n2 >>= 2u; - iter = iter << 2; - stage++; - } - - /* - * End of 1st stages process - * data is in 11.21(q21) format for the 1024 point as there are 3 middle stages - * data is in 9.23(q23) format for the 256 point as there are 2 middle stages - * data is in 7.25(q25) format for the 64 point as there are 1 middle stage - * data is in 5.27(q27) format for the 16 point as there are no middle stages - */ - - /* - * start of Last stage process - */ - uint32x4_t vecScGathAddr = vld1q_u32((uint32_t*)strides); - vecScGathAddr = vecScGathAddr + (uint32_t) pSrc; - - /* - * load scheduling - */ - vecA = vldrwq_gather_base_wb_s32(&vecScGathAddr, 64); - vecC = vldrwq_gather_base_s32(vecScGathAddr, 16); - - blkCnt = (fftLen >> 3); - while (blkCnt > 0U) - { - vecSum0 = vhaddq(vecA, vecC); - vecDiff0 = vhsubq(vecA, vecC); - - vecB = vldrwq_gather_base_s32(vecScGathAddr, 8); - vecD = vldrwq_gather_base_s32(vecScGathAddr, 24); - - vecSum1 = vhaddq(vecB, vecD); - vecDiff1 = vhsubq(vecB, vecD); - /* - * pre-load for next iteration - */ - vecA = vldrwq_gather_base_wb_s32(&vecScGathAddr, 64); - vecC = vldrwq_gather_base_s32(vecScGathAddr, 16); - - vecTmp0 = vhaddq(vecSum0, vecSum1); - vstrwq_scatter_base_s32(vecScGathAddr, -64, vecTmp0); - - vecTmp0 = vhsubq(vecSum0, vecSum1); - vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, vecTmp0); - - vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1); - vstrwq_scatter_base_s32(vecScGathAddr, -64 + 16, vecTmp0); - - vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1); - vstrwq_scatter_base_s32(vecScGathAddr, -64 + 24, vecTmp0); - - blkCnt--; - } - - /* - * output is in 11.21(q21) format for the 1024 point - * output is in 9.23(q23) format for the 256 point - * output is in 7.25(q25) format for the 64 point - * output is in 5.27(q27) format for the 16 point - */ -} - - -static void arm_cfft_radix4by2_q31_mve(const arm_cfft_instance_q31 *S, q31_t *pSrc, uint32_t fftLen) -{ - uint32_t n2; - q31_t *pIn0; - q31_t *pIn1; - const q31_t *pCoef = S->pTwiddle; - uint32_t blkCnt; - q31x4_t vecIn0, vecIn1, vecSum, vecDiff; - q31x4_t vecCmplxTmp, vecTw; - - n2 = fftLen >> 1; - pIn0 = pSrc; - pIn1 = pSrc + fftLen; - - blkCnt = n2 / 2; - - while (blkCnt > 0U) - { - vecIn0 = vld1q_s32(pIn0); - vecIn1 = vld1q_s32(pIn1); - - vecIn0 = vecIn0 >> 1; - vecIn1 = vecIn1 >> 1; - vecSum = vhaddq(vecIn0, vecIn1); - vst1q(pIn0, vecSum); - pIn0 += 4; - - vecTw = vld1q_s32(pCoef); - pCoef += 4; - vecDiff = vhsubq(vecIn0, vecIn1); - - vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw, q31x4_t); - vst1q(pIn1, vecCmplxTmp); - pIn1 += 4; - - blkCnt--; - } - - _arm_radix4_butterfly_q31_mve(S, pSrc, n2); - - _arm_radix4_butterfly_q31_mve(S, pSrc + fftLen, n2); - - pIn0 = pSrc; - blkCnt = (fftLen << 1) >> 2; - while (blkCnt > 0U) - { - vecIn0 = vld1q_s32(pIn0); - vecIn0 = vecIn0 << 1; - vst1q(pIn0, vecIn0); - pIn0 += 4; - blkCnt--; - } - /* - * tail - * (will be merged thru tail predication) - */ - blkCnt = (fftLen << 1) & 3; - if (blkCnt > 0U) - { - mve_pred16_t p0 = vctp32q(blkCnt); - - vecIn0 = vld1q_s32(pIn0); - vecIn0 = vecIn0 << 1; - vstrwq_p(pIn0, vecIn0, p0); - } - -} - -static void _arm_radix4_butterfly_inverse_q31_mve( - const arm_cfft_instance_q31 *S, - q31_t *pSrc, - uint32_t fftLen) -{ - q31x4_t vecTmp0, vecTmp1; - q31x4_t vecSum0, vecDiff0, vecSum1, vecDiff1; - q31x4_t vecA, vecB, vecC, vecD; - uint32_t blkCnt; - uint32_t n1, n2; - uint32_t stage = 0; - int32_t iter = 1; - static const int32_t strides[4] = { - (0 - 16) * (int32_t)sizeof(q31_t *), (1 - 16) * (int32_t)sizeof(q31_t *), - (8 - 16) * (int32_t)sizeof(q31_t *), (9 - 16) * (int32_t)sizeof(q31_t *) - }; - - /* - * Process first stages - * Each stage in middle stages provides two down scaling of the input - */ - n2 = fftLen; - n1 = n2; - n2 >>= 2u; - - for (int k = fftLen / 4u; k > 1; k >>= 2u) - { - q31_t const *p_rearranged_twiddle_tab_stride2 = - &S->rearranged_twiddle_stride2[ - S->rearranged_twiddle_tab_stride2_arr[stage]]; - q31_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[ - S->rearranged_twiddle_tab_stride3_arr[stage]]; - q31_t const *p_rearranged_twiddle_tab_stride1 = - &S->rearranged_twiddle_stride1[ - S->rearranged_twiddle_tab_stride1_arr[stage]]; - - q31_t * pBase = pSrc; - for (int i = 0; i < iter; i++) - { - q31_t *inA = pBase; - q31_t *inB = inA + n2 * CMPLX_DIM; - q31_t *inC = inB + n2 * CMPLX_DIM; - q31_t *inD = inC + n2 * CMPLX_DIM; - q31_t const *pW1 = p_rearranged_twiddle_tab_stride1; - q31_t const *pW2 = p_rearranged_twiddle_tab_stride2; - q31_t const *pW3 = p_rearranged_twiddle_tab_stride3; - q31x4_t vecW; - - blkCnt = n2 / 2; - /* - * load 2 x q31 complex pair - */ - vecA = vldrwq_s32(inA); - vecC = vldrwq_s32(inC); - while (blkCnt > 0U) - { - vecB = vldrwq_s32(inB); - vecD = vldrwq_s32(inD); - - vecSum0 = vhaddq(vecA, vecC); - vecDiff0 = vhsubq(vecA, vecC); - - vecSum1 = vhaddq(vecB, vecD); - vecDiff1 = vhsubq(vecB, vecD); - /* - * [ 1 1 1 1 ] * [ A B C D ]' .* 1 - */ - vecTmp0 = vhaddq(vecSum0, vecSum1); - vst1q(inA, vecTmp0); - inA += 4; - /* - * [ 1 -1 1 -1 ] * [ A B C D ]' - */ - vecTmp0 = vhsubq(vecSum0, vecSum1); - /* - * [ 1 -1 1 -1 ] * [ A B C D ]'.* W2 - */ - vecW = vld1q(pW2); - pW2 += 4; - vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q31x4_t); - - vst1q(inB, vecTmp1); - inB += 4; - /* - * [ 1 -i -1 +i ] * [ A B C D ]' - */ - vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1); - /* - * [ 1 -i -1 +i ] * [ A B C D ]'.* W1 - */ - vecW = vld1q(pW1); - pW1 += 4; - vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q31x4_t); - vst1q(inC, vecTmp1); - inC += 4; - /* - * [ 1 +i -1 -i ] * [ A B C D ]' - */ - vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1); - /* - * [ 1 +i -1 -i ] * [ A B C D ]'.* W3 - */ - vecW = vld1q(pW3); - pW3 += 4; - vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q31x4_t); - vst1q(inD, vecTmp1); - inD += 4; - - vecA = vldrwq_s32(inA); - vecC = vldrwq_s32(inC); - - blkCnt--; - } - pBase += CMPLX_DIM * n1; - } - n1 = n2; - n2 >>= 2u; - iter = iter << 2; - stage++; - } - - /* - * End of 1st stages process - * data is in 11.21(q21) format for the 1024 point as there are 3 middle stages - * data is in 9.23(q23) format for the 256 point as there are 2 middle stages - * data is in 7.25(q25) format for the 64 point as there are 1 middle stage - * data is in 5.27(q27) format for the 16 point as there are no middle stages - */ - - /* - * start of Last stage process - */ - uint32x4_t vecScGathAddr = vld1q_u32((uint32_t*)strides); - vecScGathAddr = vecScGathAddr + (uint32_t) pSrc; - - /* - * load scheduling - */ - vecA = vldrwq_gather_base_wb_s32(&vecScGathAddr, 64); - vecC = vldrwq_gather_base_s32(vecScGathAddr, 16); - - blkCnt = (fftLen >> 3); - while (blkCnt > 0U) - { - vecSum0 = vhaddq(vecA, vecC); - vecDiff0 = vhsubq(vecA, vecC); - - vecB = vldrwq_gather_base_s32(vecScGathAddr, 8); - vecD = vldrwq_gather_base_s32(vecScGathAddr, 24); - - vecSum1 = vhaddq(vecB, vecD); - vecDiff1 = vhsubq(vecB, vecD); - /* - * pre-load for next iteration - */ - vecA = vldrwq_gather_base_wb_s32(&vecScGathAddr, 64); - vecC = vldrwq_gather_base_s32(vecScGathAddr, 16); - - vecTmp0 = vhaddq(vecSum0, vecSum1); - vstrwq_scatter_base_s32(vecScGathAddr, -64, vecTmp0); - - vecTmp0 = vhsubq(vecSum0, vecSum1); - vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, vecTmp0); - - vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1); - vstrwq_scatter_base_s32(vecScGathAddr, -64 + 16, vecTmp0); - - vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1); - vstrwq_scatter_base_s32(vecScGathAddr, -64 + 24, vecTmp0); - - blkCnt--; - } - /* - * output is in 11.21(q21) format for the 1024 point - * output is in 9.23(q23) format for the 256 point - * output is in 7.25(q25) format for the 64 point - * output is in 5.27(q27) format for the 16 point - */ -} - -static void arm_cfft_radix4by2_inverse_q31_mve(const arm_cfft_instance_q31 *S, q31_t *pSrc, uint32_t fftLen) -{ - uint32_t n2; - q31_t *pIn0; - q31_t *pIn1; - const q31_t *pCoef = S->pTwiddle; - - //uint16_t twidCoefModifier = arm_cfft_radix2_twiddle_factor(S->fftLen); - //q31_t twidIncr = (2 * twidCoefModifier * sizeof(q31_t)); - uint32_t blkCnt; - //uint64x2_t vecOffs; - q31x4_t vecIn0, vecIn1, vecSum, vecDiff; - q31x4_t vecCmplxTmp, vecTw; - - n2 = fftLen >> 1; - - pIn0 = pSrc; - pIn1 = pSrc + fftLen; - //vecOffs[0] = 0; - //vecOffs[1] = (uint64_t) twidIncr; - blkCnt = n2 / 2; - - while (blkCnt > 0U) - { - vecIn0 = vld1q_s32(pIn0); - vecIn1 = vld1q_s32(pIn1); - - vecIn0 = vecIn0 >> 1; - vecIn1 = vecIn1 >> 1; - vecSum = vhaddq(vecIn0, vecIn1); - vst1q(pIn0, vecSum); - pIn0 += 4; - - //vecTw = (q31x4_t) vldrdq_gather_offset_s64(pCoef, vecOffs); - vecTw = vld1q_s32(pCoef); - pCoef += 4; - vecDiff = vhsubq(vecIn0, vecIn1); - - vecCmplxTmp = MVE_CMPLX_MULT_FX_AxB(vecDiff, vecTw, q31x4_t); - vst1q(pIn1, vecCmplxTmp); - pIn1 += 4; - - //vecOffs = vaddq((q31x4_t) vecOffs, 2 * twidIncr); - blkCnt--; - } - - _arm_radix4_butterfly_inverse_q31_mve(S, pSrc, n2); - - _arm_radix4_butterfly_inverse_q31_mve(S, pSrc + fftLen, n2); - - pIn0 = pSrc; - blkCnt = (fftLen << 1) >> 2; - while (blkCnt > 0U) - { - vecIn0 = vld1q_s32(pIn0); - vecIn0 = vecIn0 << 1; - vst1q(pIn0, vecIn0); - pIn0 += 4; - blkCnt--; - } - /* - * tail - * (will be merged thru tail predication) - */ - blkCnt = (fftLen << 1) & 3; - if (blkCnt > 0U) - { - mve_pred16_t p0 = vctp32q(blkCnt); - - vecIn0 = vld1q_s32(pIn0); - vecIn0 = vecIn0 << 1; - vstrwq_p(pIn0, vecIn0, p0); - } - -} - -/** - @ingroup groupTransforms - */ - -/** - @addtogroup ComplexFFT - @{ - */ - -/** - @brief Processing function for the Q31 complex FFT. - @param[in] S points to an instance of the fixed-point CFFT structure - @param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place - @param[in] ifftFlag flag that selects transform direction - - value = 0: forward transform - - value = 1: inverse transform - @param[in] bitReverseFlag flag that enables / disables bit reversal of output - - value = 0: disables bit reversal of output - - value = 1: enables bit reversal of output - @return none - */ -void arm_cfft_q31( - const arm_cfft_instance_q31 * S, - q31_t * pSrc, - uint8_t ifftFlag, - uint8_t bitReverseFlag) -{ - uint32_t fftLen = S->fftLen; - - if (ifftFlag == 1U) { - - switch (fftLen) { - case 16: - case 64: - case 256: - case 1024: - case 4096: - _arm_radix4_butterfly_inverse_q31_mve(S, pSrc, fftLen); - break; - - case 32: - case 128: - case 512: - case 2048: - arm_cfft_radix4by2_inverse_q31_mve(S, pSrc, fftLen); - break; - } - } else { - switch (fftLen) { - case 16: - case 64: - case 256: - case 1024: - case 4096: - _arm_radix4_butterfly_q31_mve(S, pSrc, fftLen); - break; - - case 32: - case 128: - case 512: - case 2048: - arm_cfft_radix4by2_q31_mve(S, pSrc, fftLen); - break; - } - } - - - if (bitReverseFlag) - { - - arm_bitreversal_32_inpl_mve((uint32_t*)pSrc, S->bitRevLength, S->pBitRevTable); - - } -} -#else - -extern void arm_radix4_butterfly_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pCoef, - uint32_t twidCoefModifier); - -extern void arm_radix4_butterfly_inverse_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pCoef, - uint32_t twidCoefModifier); - -extern void arm_bitreversal_32( - uint32_t * pSrc, - const uint16_t bitRevLen, - const uint16_t * pBitRevTable); - -void arm_cfft_radix4by2_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pCoef); - -void arm_cfft_radix4by2_inverse_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pCoef); - - -/** - @ingroup groupTransforms - */ - -/** - @addtogroup ComplexFFT - @{ - */ - -/** - @brief Processing function for the Q31 complex FFT. - @param[in] S points to an instance of the fixed-point CFFT structure - @param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place - @param[in] ifftFlag flag that selects transform direction - - value = 0: forward transform - - value = 1: inverse transform - @param[in] bitReverseFlag flag that enables / disables bit reversal of output - - value = 0: disables bit reversal of output - - value = 1: enables bit reversal of output - @return none - */ -void arm_cfft_q31( - const arm_cfft_instance_q31 * S, - q31_t * p1, - uint8_t ifftFlag, - uint8_t bitReverseFlag) -{ - uint32_t L = S->fftLen; - - if (ifftFlag == 1U) - { - switch (L) - { - case 16: - case 64: - case 256: - case 1024: - case 4096: - arm_radix4_butterfly_inverse_q31 ( p1, L, (q31_t*)S->pTwiddle, 1 ); - break; - - case 32: - case 128: - case 512: - case 2048: - arm_cfft_radix4by2_inverse_q31 ( p1, L, S->pTwiddle ); - break; - } - } - else - { - switch (L) - { - case 16: - case 64: - case 256: - case 1024: - case 4096: - arm_radix4_butterfly_q31 ( p1, L, (q31_t*)S->pTwiddle, 1 ); - break; - - case 32: - case 128: - case 512: - case 2048: - arm_cfft_radix4by2_q31 ( p1, L, S->pTwiddle ); - break; - } - } - - if ( bitReverseFlag ) - arm_bitreversal_32 ((uint32_t*) p1, S->bitRevLength, S->pBitRevTable); -} - -/** - @} end of ComplexFFT group - */ - -void arm_cfft_radix4by2_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pCoef) -{ - uint32_t i, l; - uint32_t n2; - q31_t xt, yt, cosVal, sinVal; - q31_t p0, p1; - - n2 = fftLen >> 1U; - for (i = 0; i < n2; i++) - { - cosVal = pCoef[2 * i]; - sinVal = pCoef[2 * i + 1]; - - l = i + n2; - - xt = (pSrc[2 * i] >> 2U) - (pSrc[2 * l] >> 2U); - pSrc[2 * i] = (pSrc[2 * i] >> 2U) + (pSrc[2 * l] >> 2U); - - yt = (pSrc[2 * i + 1] >> 2U) - (pSrc[2 * l + 1] >> 2U); - pSrc[2 * i + 1] = (pSrc[2 * l + 1] >> 2U) + (pSrc[2 * i + 1] >> 2U); - - mult_32x32_keep32_R(p0, xt, cosVal); - mult_32x32_keep32_R(p1, yt, cosVal); - multAcc_32x32_keep32_R(p0, yt, sinVal); - multSub_32x32_keep32_R(p1, xt, sinVal); - - pSrc[2 * l] = p0 << 1; - pSrc[2 * l + 1] = p1 << 1; - } - - - /* first col */ - arm_radix4_butterfly_q31 (pSrc, n2, (q31_t*)pCoef, 2U); - - /* second col */ - arm_radix4_butterfly_q31 (pSrc + fftLen, n2, (q31_t*)pCoef, 2U); - - n2 = fftLen >> 1U; - for (i = 0; i < n2; i++) - { - p0 = pSrc[4 * i + 0]; - p1 = pSrc[4 * i + 1]; - xt = pSrc[4 * i + 2]; - yt = pSrc[4 * i + 3]; - - p0 <<= 1U; - p1 <<= 1U; - xt <<= 1U; - yt <<= 1U; - - pSrc[4 * i + 0] = p0; - pSrc[4 * i + 1] = p1; - pSrc[4 * i + 2] = xt; - pSrc[4 * i + 3] = yt; - } - -} - -void arm_cfft_radix4by2_inverse_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pCoef) -{ - uint32_t i, l; - uint32_t n2; - q31_t xt, yt, cosVal, sinVal; - q31_t p0, p1; - - n2 = fftLen >> 1U; - for (i = 0; i < n2; i++) - { - cosVal = pCoef[2 * i]; - sinVal = pCoef[2 * i + 1]; - - l = i + n2; - - xt = (pSrc[2 * i] >> 2U) - (pSrc[2 * l] >> 2U); - pSrc[2 * i] = (pSrc[2 * i] >> 2U) + (pSrc[2 * l] >> 2U); - - yt = (pSrc[2 * i + 1] >> 2U) - (pSrc[2 * l + 1] >> 2U); - pSrc[2 * i + 1] = (pSrc[2 * l + 1] >> 2U) + (pSrc[2 * i + 1] >> 2U); - - mult_32x32_keep32_R(p0, xt, cosVal); - mult_32x32_keep32_R(p1, yt, cosVal); - multSub_32x32_keep32_R(p0, yt, sinVal); - multAcc_32x32_keep32_R(p1, xt, sinVal); - - pSrc[2 * l] = p0 << 1U; - pSrc[2 * l + 1] = p1 << 1U; - } - - /* first col */ - arm_radix4_butterfly_inverse_q31( pSrc, n2, (q31_t*)pCoef, 2U); - - /* second col */ - arm_radix4_butterfly_inverse_q31( pSrc + fftLen, n2, (q31_t*)pCoef, 2U); - - n2 = fftLen >> 1U; - for (i = 0; i < n2; i++) - { - p0 = pSrc[4 * i + 0]; - p1 = pSrc[4 * i + 1]; - xt = pSrc[4 * i + 2]; - yt = pSrc[4 * i + 3]; - - p0 <<= 1U; - p1 <<= 1U; - xt <<= 1U; - yt <<= 1U; - - pSrc[4 * i + 0] = p0; - pSrc[4 * i + 1] = p1; - pSrc[4 * i + 2] = xt; - pSrc[4 * i + 3] = yt; - } -} -#endif /* defined(ARM_MATH_MVEI) */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cfft_q31.c
+ * Description: Combined Radix Decimation in Frequency CFFT fixed point processing function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+extern void arm_radix4_butterfly_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pCoef,
+ uint32_t twidCoefModifier);
+
+extern void arm_radix4_butterfly_inverse_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pCoef,
+ uint32_t twidCoefModifier);
+
+extern void arm_bitreversal_32(
+ uint32_t * pSrc,
+ const uint16_t bitRevLen,
+ const uint16_t * pBitRevTable);
+
+void arm_cfft_radix4by2_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pCoef);
+
+void arm_cfft_radix4by2_inverse_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pCoef);
+
+/**
+ @ingroup groupTransforms
+ */
+
+/**
+ @addtogroup ComplexFFT
+ @{
+ */
+
+/**
+ @brief Processing function for the Q31 complex FFT.
+ @param[in] S points to an instance of the fixed-point CFFT structure
+ @param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
+ @param[in] ifftFlag flag that selects transform direction
+ - value = 0: forward transform
+ - value = 1: inverse transform
+ @param[in] bitReverseFlag flag that enables / disables bit reversal of output
+ - value = 0: disables bit reversal of output
+ - value = 1: enables bit reversal of output
+ @return none
+ */
+
+void arm_cfft_q31(
+ const arm_cfft_instance_q31 * S,
+ q31_t * p1,
+ uint8_t ifftFlag,
+ uint8_t bitReverseFlag)
+{
+ uint32_t L = S->fftLen;
+
+ if (ifftFlag == 1U)
+ {
+ switch (L)
+ {
+ case 16:
+ case 64:
+ case 256:
+ case 1024:
+ case 4096:
+ arm_radix4_butterfly_inverse_q31 ( p1, L, (q31_t*)S->pTwiddle, 1 );
+ break;
+
+ case 32:
+ case 128:
+ case 512:
+ case 2048:
+ arm_cfft_radix4by2_inverse_q31 ( p1, L, S->pTwiddle );
+ break;
+ }
+ }
+ else
+ {
+ switch (L)
+ {
+ case 16:
+ case 64:
+ case 256:
+ case 1024:
+ case 4096:
+ arm_radix4_butterfly_q31 ( p1, L, (q31_t*)S->pTwiddle, 1 );
+ break;
+
+ case 32:
+ case 128:
+ case 512:
+ case 2048:
+ arm_cfft_radix4by2_q31 ( p1, L, S->pTwiddle );
+ break;
+ }
+ }
+
+ if ( bitReverseFlag )
+ arm_bitreversal_32 ((uint32_t*) p1, S->bitRevLength, S->pBitRevTable);
+}
+
+/**
+ @} end of ComplexFFT group
+ */
+
+void arm_cfft_radix4by2_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pCoef)
+{
+ uint32_t i, l;
+ uint32_t n2;
+ q31_t xt, yt, cosVal, sinVal;
+ q31_t p0, p1;
+
+ n2 = fftLen >> 1U;
+ for (i = 0; i < n2; i++)
+ {
+ cosVal = pCoef[2 * i];
+ sinVal = pCoef[2 * i + 1];
+
+ l = i + n2;
+
+ xt = (pSrc[2 * i] >> 2U) - (pSrc[2 * l] >> 2U);
+ pSrc[2 * i] = (pSrc[2 * i] >> 2U) + (pSrc[2 * l] >> 2U);
+
+ yt = (pSrc[2 * i + 1] >> 2U) - (pSrc[2 * l + 1] >> 2U);
+ pSrc[2 * i + 1] = (pSrc[2 * l + 1] >> 2U) + (pSrc[2 * i + 1] >> 2U);
+
+ mult_32x32_keep32_R(p0, xt, cosVal);
+ mult_32x32_keep32_R(p1, yt, cosVal);
+ multAcc_32x32_keep32_R(p0, yt, sinVal);
+ multSub_32x32_keep32_R(p1, xt, sinVal);
+
+ pSrc[2 * l] = p0 << 1;
+ pSrc[2 * l + 1] = p1 << 1;
+ }
+
+ /* first col */
+ arm_radix4_butterfly_q31 (pSrc, n2, (q31_t*)pCoef, 2U);
+
+ /* second col */
+ arm_radix4_butterfly_q31 (pSrc + fftLen, n2, (q31_t*)pCoef, 2U);
+
+ n2 = fftLen >> 1U;
+ for (i = 0; i < n2; i++)
+ {
+ p0 = pSrc[4 * i + 0];
+ p1 = pSrc[4 * i + 1];
+ xt = pSrc[4 * i + 2];
+ yt = pSrc[4 * i + 3];
+
+ p0 <<= 1U;
+ p1 <<= 1U;
+ xt <<= 1U;
+ yt <<= 1U;
+
+ pSrc[4 * i + 0] = p0;
+ pSrc[4 * i + 1] = p1;
+ pSrc[4 * i + 2] = xt;
+ pSrc[4 * i + 3] = yt;
+ }
+
+}
+
+void arm_cfft_radix4by2_inverse_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pCoef)
+{
+ uint32_t i, l;
+ uint32_t n2;
+ q31_t xt, yt, cosVal, sinVal;
+ q31_t p0, p1;
+
+ n2 = fftLen >> 1U;
+ for (i = 0; i < n2; i++)
+ {
+ cosVal = pCoef[2 * i];
+ sinVal = pCoef[2 * i + 1];
+
+ l = i + n2;
+
+ xt = (pSrc[2 * i] >> 2U) - (pSrc[2 * l] >> 2U);
+ pSrc[2 * i] = (pSrc[2 * i] >> 2U) + (pSrc[2 * l] >> 2U);
+
+ yt = (pSrc[2 * i + 1] >> 2U) - (pSrc[2 * l + 1] >> 2U);
+ pSrc[2 * i + 1] = (pSrc[2 * l + 1] >> 2U) + (pSrc[2 * i + 1] >> 2U);
+
+ mult_32x32_keep32_R(p0, xt, cosVal);
+ mult_32x32_keep32_R(p1, yt, cosVal);
+ multSub_32x32_keep32_R(p0, yt, sinVal);
+ multAcc_32x32_keep32_R(p1, xt, sinVal);
+
+ pSrc[2 * l] = p0 << 1U;
+ pSrc[2 * l + 1] = p1 << 1U;
+ }
+
+ /* first col */
+ arm_radix4_butterfly_inverse_q31( pSrc, n2, (q31_t*)pCoef, 2U);
+
+ /* second col */
+ arm_radix4_butterfly_inverse_q31( pSrc + fftLen, n2, (q31_t*)pCoef, 2U);
+
+ n2 = fftLen >> 1U;
+ for (i = 0; i < n2; i++)
+ {
+ p0 = pSrc[4 * i + 0];
+ p1 = pSrc[4 * i + 1];
+ xt = pSrc[4 * i + 2];
+ yt = pSrc[4 * i + 3];
+
+ p0 <<= 1U;
+ p1 <<= 1U;
+ xt <<= 1U;
+ yt <<= 1U;
+
+ pSrc[4 * i + 0] = p0;
+ pSrc[4 * i + 1] = p1;
+ pSrc[4 * i + 2] = xt;
+ pSrc[4 * i + 3] = yt;
+ }
+}
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_f32.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_f32.c index ab218a5..c514fda 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_f32.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_f32.c @@ -1,470 +1,470 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_cfft_radix2_f32.c - * Description: Radix-2 Decimation in Frequency CFFT & CIFFT Floating point processing function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" - -void arm_radix2_butterfly_f32( - float32_t * pSrc, - uint32_t fftLen, - const float32_t * pCoef, - uint16_t twidCoefModifier); - -void arm_radix2_butterfly_inverse_f32( - float32_t * pSrc, - uint32_t fftLen, - const float32_t * pCoef, - uint16_t twidCoefModifier, - float32_t onebyfftLen); - -extern void arm_bitreversal_f32( - float32_t * pSrc, - uint16_t fftSize, - uint16_t bitRevFactor, - const uint16_t * pBitRevTab); - -/** - @ingroup groupTransforms - */ - -/** - @addtogroup ComplexFFT - @{ - */ - -/** - @brief Radix-2 CFFT/CIFFT. - @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future - @param[in] S points to an instance of the floating-point Radix-2 CFFT/CIFFT structure - @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place - @return none - */ - -void arm_cfft_radix2_f32( -const arm_cfft_radix2_instance_f32 * S, - float32_t * pSrc) -{ - - if (S->ifftFlag == 1U) - { - /* Complex IFFT radix-2 */ - arm_radix2_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle, - S->twidCoefModifier, S->onebyfftLen); - } - else - { - /* Complex FFT radix-2 */ - arm_radix2_butterfly_f32(pSrc, S->fftLen, S->pTwiddle, - S->twidCoefModifier); - } - - if (S->bitReverseFlag == 1U) - { - /* Bit Reversal */ - arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); - } - -} - - -/** - @} end of ComplexFFT group - */ - - - -/* ---------------------------------------------------------------------- - ** Internal helper function used by the FFTs - ** ------------------------------------------------------------------- */ - -/** - brief Core function for the floating-point CFFT butterfly process. - param[in,out] pSrc points to in-place buffer of floating-point data type - param[in] fftLen length of the FFT - param[in] pCoef points to twiddle coefficient buffer - param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table - return none - */ - -void arm_radix2_butterfly_f32( - float32_t * pSrc, - uint32_t fftLen, - const float32_t * pCoef, - uint16_t twidCoefModifier) -{ - - uint32_t i, j, k, l; - uint32_t n1, n2, ia; - float32_t xt, yt, cosVal, sinVal; - float32_t p0, p1, p2, p3; - float32_t a0, a1; - -#if defined (ARM_MATH_DSP) - - /* Initializations for the first stage */ - n2 = fftLen >> 1; - ia = 0; - i = 0; - - // loop for groups - for (k = n2; k > 0; k--) - { - cosVal = pCoef[ia * 2]; - sinVal = pCoef[(ia * 2) + 1]; - - /* Twiddle coefficients index modifier */ - ia += twidCoefModifier; - - /* index calculation for the input as, */ - /* pSrc[i + 0], pSrc[i + fftLen/1] */ - l = i + n2; - - /* Butterfly implementation */ - a0 = pSrc[2 * i] + pSrc[2 * l]; - xt = pSrc[2 * i] - pSrc[2 * l]; - - yt = pSrc[2 * i + 1] - pSrc[2 * l + 1]; - a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1]; - - p0 = xt * cosVal; - p1 = yt * sinVal; - p2 = yt * cosVal; - p3 = xt * sinVal; - - pSrc[2 * i] = a0; - pSrc[2 * i + 1] = a1; - - pSrc[2 * l] = p0 + p1; - pSrc[2 * l + 1] = p2 - p3; - - i++; - } // groups loop end - - twidCoefModifier <<= 1U; - - // loop for stage - for (k = n2; k > 2; k = k >> 1) - { - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - // loop for groups - j = 0; - do - { - cosVal = pCoef[ia * 2]; - sinVal = pCoef[(ia * 2) + 1]; - ia += twidCoefModifier; - - // loop for butterfly - i = j; - do - { - l = i + n2; - a0 = pSrc[2 * i] + pSrc[2 * l]; - xt = pSrc[2 * i] - pSrc[2 * l]; - - yt = pSrc[2 * i + 1] - pSrc[2 * l + 1]; - a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1]; - - p0 = xt * cosVal; - p1 = yt * sinVal; - p2 = yt * cosVal; - p3 = xt * sinVal; - - pSrc[2 * i] = a0; - pSrc[2 * i + 1] = a1; - - pSrc[2 * l] = p0 + p1; - pSrc[2 * l + 1] = p2 - p3; - - i += n1; - } while ( i < fftLen ); // butterfly loop end - j++; - } while ( j < n2); // groups loop end - twidCoefModifier <<= 1U; - } // stages loop end - - // loop for butterfly - for (i = 0; i < fftLen; i += 2) - { - a0 = pSrc[2 * i] + pSrc[2 * i + 2]; - xt = pSrc[2 * i] - pSrc[2 * i + 2]; - - yt = pSrc[2 * i + 1] - pSrc[2 * i + 3]; - a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1]; - - pSrc[2 * i] = a0; - pSrc[2 * i + 1] = a1; - pSrc[2 * i + 2] = xt; - pSrc[2 * i + 3] = yt; - } // groups loop end - -#else /* #if defined (ARM_MATH_DSP) */ - - n2 = fftLen; - - // loop for stage - for (k = fftLen; k > 1; k = k >> 1) - { - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - // loop for groups - j = 0; - do - { - cosVal = pCoef[ia * 2]; - sinVal = pCoef[(ia * 2) + 1]; - ia += twidCoefModifier; - - // loop for butterfly - i = j; - do - { - l = i + n2; - a0 = pSrc[2 * i] + pSrc[2 * l]; - xt = pSrc[2 * i] - pSrc[2 * l]; - - yt = pSrc[2 * i + 1] - pSrc[2 * l + 1]; - a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1]; - - p0 = xt * cosVal; - p1 = yt * sinVal; - p2 = yt * cosVal; - p3 = xt * sinVal; - - pSrc[2 * i] = a0; - pSrc[2 * i + 1] = a1; - - pSrc[2 * l] = p0 + p1; - pSrc[2 * l + 1] = p2 - p3; - - i += n1; - } while (i < fftLen); - j++; - } while (j < n2); - twidCoefModifier <<= 1U; - } - -#endif /* #if defined (ARM_MATH_DSP) */ - -} - - -void arm_radix2_butterfly_inverse_f32( - float32_t * pSrc, - uint32_t fftLen, - const float32_t * pCoef, - uint16_t twidCoefModifier, - float32_t onebyfftLen) -{ - - uint32_t i, j, k, l; - uint32_t n1, n2, ia; - float32_t xt, yt, cosVal, sinVal; - float32_t p0, p1, p2, p3; - float32_t a0, a1; - -#if defined (ARM_MATH_DSP) - - n2 = fftLen >> 1; - ia = 0; - - // loop for groups - for (i = 0; i < n2; i++) - { - cosVal = pCoef[ia * 2]; - sinVal = pCoef[(ia * 2) + 1]; - ia += twidCoefModifier; - - l = i + n2; - a0 = pSrc[2 * i] + pSrc[2 * l]; - xt = pSrc[2 * i] - pSrc[2 * l]; - - yt = pSrc[2 * i + 1] - pSrc[2 * l + 1]; - a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1]; - - p0 = xt * cosVal; - p1 = yt * sinVal; - p2 = yt * cosVal; - p3 = xt * sinVal; - - pSrc[2 * i] = a0; - pSrc[2 * i + 1] = a1; - - pSrc[2 * l] = p0 - p1; - pSrc[2 * l + 1] = p2 + p3; - } // groups loop end - - twidCoefModifier <<= 1U; - - // loop for stage - for (k = fftLen / 2; k > 2; k = k >> 1) - { - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - // loop for groups - j = 0; - do - { - cosVal = pCoef[ia * 2]; - sinVal = pCoef[(ia * 2) + 1]; - ia += twidCoefModifier; - - // loop for butterfly - i = j; - do - { - l = i + n2; - a0 = pSrc[2 * i] + pSrc[2 * l]; - xt = pSrc[2 * i] - pSrc[2 * l]; - - yt = pSrc[2 * i + 1] - pSrc[2 * l + 1]; - a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1]; - - p0 = xt * cosVal; - p1 = yt * sinVal; - p2 = yt * cosVal; - p3 = xt * sinVal; - - pSrc[2 * i] = a0; - pSrc[2 * i + 1] = a1; - - pSrc[2 * l] = p0 - p1; - pSrc[2 * l + 1] = p2 + p3; - - i += n1; - } while ( i < fftLen ); // butterfly loop end - j++; - } while (j < n2); // groups loop end - - twidCoefModifier <<= 1U; - } // stages loop end - - // loop for butterfly - for (i = 0; i < fftLen; i += 2) - { - a0 = pSrc[2 * i] + pSrc[2 * i + 2]; - xt = pSrc[2 * i] - pSrc[2 * i + 2]; - - a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1]; - yt = pSrc[2 * i + 1] - pSrc[2 * i + 3]; - - p0 = a0 * onebyfftLen; - p2 = xt * onebyfftLen; - p1 = a1 * onebyfftLen; - p3 = yt * onebyfftLen; - - pSrc[2 * i] = p0; - pSrc[2 * i + 1] = p1; - pSrc[2 * i + 2] = p2; - pSrc[2 * i + 3] = p3; - } // butterfly loop end - -#else /* #if defined (ARM_MATH_DSP) */ - - n2 = fftLen; - - // loop for stage - for (k = fftLen; k > 2; k = k >> 1) - { - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - // loop for groups - j = 0; - do - { - cosVal = pCoef[ia * 2]; - sinVal = pCoef[(ia * 2) + 1]; - ia = ia + twidCoefModifier; - - // loop for butterfly - i = j; - do - { - l = i + n2; - a0 = pSrc[2 * i] + pSrc[2 * l]; - xt = pSrc[2 * i] - pSrc[2 * l]; - - yt = pSrc[2 * i + 1] - pSrc[2 * l + 1]; - a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1]; - - p0 = xt * cosVal; - p1 = yt * sinVal; - p2 = yt * cosVal; - p3 = xt * sinVal; - - pSrc[2 * i] = a0; - pSrc[2 * i + 1] = a1; - - pSrc[2 * l] = p0 - p1; - pSrc[2 * l + 1] = p2 + p3; - - i += n1; - } while ( i < fftLen ); // butterfly loop end - j++; - } while ( j < n2 ); // groups loop end - - twidCoefModifier = twidCoefModifier << 1U; - } // stages loop end - - n1 = n2; - n2 = n2 >> 1; - - // loop for butterfly - for (i = 0; i < fftLen; i += n1) - { - l = i + n2; - - a0 = pSrc[2 * i] + pSrc[2 * l]; - xt = pSrc[2 * i] - pSrc[2 * l]; - - a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1]; - yt = pSrc[2 * i + 1] - pSrc[2 * l + 1]; - - p0 = a0 * onebyfftLen; - p2 = xt * onebyfftLen; - p1 = a1 * onebyfftLen; - p3 = yt * onebyfftLen; - - pSrc[2 * i] = p0; - pSrc[2 * l] = p2; - - pSrc[2 * i + 1] = p1; - pSrc[2 * l + 1] = p3; - } // butterfly loop end - -#endif /* #if defined (ARM_MATH_DSP) */ - -} +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cfft_radix2_f32.c
+ * Description: Radix-2 Decimation in Frequency CFFT & CIFFT Floating point processing function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+void arm_radix2_butterfly_f32(
+ float32_t * pSrc,
+ uint32_t fftLen,
+ const float32_t * pCoef,
+ uint16_t twidCoefModifier);
+
+void arm_radix2_butterfly_inverse_f32(
+ float32_t * pSrc,
+ uint32_t fftLen,
+ const float32_t * pCoef,
+ uint16_t twidCoefModifier,
+ float32_t onebyfftLen);
+
+extern void arm_bitreversal_f32(
+ float32_t * pSrc,
+ uint16_t fftSize,
+ uint16_t bitRevFactor,
+ const uint16_t * pBitRevTab);
+
+/**
+ @ingroup groupTransforms
+ */
+
+/**
+ @addtogroup ComplexFFT
+ @{
+ */
+
+/**
+ @brief Radix-2 CFFT/CIFFT.
+ @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future
+ @param[in] S points to an instance of the floating-point Radix-2 CFFT/CIFFT structure
+ @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
+ @return none
+ */
+
+void arm_cfft_radix2_f32(
+const arm_cfft_radix2_instance_f32 * S,
+ float32_t * pSrc)
+{
+
+ if (S->ifftFlag == 1U)
+ {
+ /* Complex IFFT radix-2 */
+ arm_radix2_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle,
+ S->twidCoefModifier, S->onebyfftLen);
+ }
+ else
+ {
+ /* Complex FFT radix-2 */
+ arm_radix2_butterfly_f32(pSrc, S->fftLen, S->pTwiddle,
+ S->twidCoefModifier);
+ }
+
+ if (S->bitReverseFlag == 1U)
+ {
+ /* Bit Reversal */
+ arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
+ }
+
+}
+
+
+/**
+ @} end of ComplexFFT group
+ */
+
+
+
+/* ----------------------------------------------------------------------
+ ** Internal helper function used by the FFTs
+ ** ------------------------------------------------------------------- */
+
+/**
+ brief Core function for the floating-point CFFT butterfly process.
+ param[in,out] pSrc points to in-place buffer of floating-point data type
+ param[in] fftLen length of the FFT
+ param[in] pCoef points to twiddle coefficient buffer
+ param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
+ return none
+ */
+
+void arm_radix2_butterfly_f32(
+ float32_t * pSrc,
+ uint32_t fftLen,
+ const float32_t * pCoef,
+ uint16_t twidCoefModifier)
+{
+
+ uint32_t i, j, k, l;
+ uint32_t n1, n2, ia;
+ float32_t xt, yt, cosVal, sinVal;
+ float32_t p0, p1, p2, p3;
+ float32_t a0, a1;
+
+#if defined (ARM_MATH_DSP)
+
+ /* Initializations for the first stage */
+ n2 = fftLen >> 1;
+ ia = 0;
+ i = 0;
+
+ // loop for groups
+ for (k = n2; k > 0; k--)
+ {
+ cosVal = pCoef[ia * 2];
+ sinVal = pCoef[(ia * 2) + 1];
+
+ /* Twiddle coefficients index modifier */
+ ia += twidCoefModifier;
+
+ /* index calculation for the input as, */
+ /* pSrc[i + 0], pSrc[i + fftLen/1] */
+ l = i + n2;
+
+ /* Butterfly implementation */
+ a0 = pSrc[2 * i] + pSrc[2 * l];
+ xt = pSrc[2 * i] - pSrc[2 * l];
+
+ yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+ a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+
+ p0 = xt * cosVal;
+ p1 = yt * sinVal;
+ p2 = yt * cosVal;
+ p3 = xt * sinVal;
+
+ pSrc[2 * i] = a0;
+ pSrc[2 * i + 1] = a1;
+
+ pSrc[2 * l] = p0 + p1;
+ pSrc[2 * l + 1] = p2 - p3;
+
+ i++;
+ } // groups loop end
+
+ twidCoefModifier <<= 1U;
+
+ // loop for stage
+ for (k = n2; k > 2; k = k >> 1)
+ {
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ // loop for groups
+ j = 0;
+ do
+ {
+ cosVal = pCoef[ia * 2];
+ sinVal = pCoef[(ia * 2) + 1];
+ ia += twidCoefModifier;
+
+ // loop for butterfly
+ i = j;
+ do
+ {
+ l = i + n2;
+ a0 = pSrc[2 * i] + pSrc[2 * l];
+ xt = pSrc[2 * i] - pSrc[2 * l];
+
+ yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+ a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+
+ p0 = xt * cosVal;
+ p1 = yt * sinVal;
+ p2 = yt * cosVal;
+ p3 = xt * sinVal;
+
+ pSrc[2 * i] = a0;
+ pSrc[2 * i + 1] = a1;
+
+ pSrc[2 * l] = p0 + p1;
+ pSrc[2 * l + 1] = p2 - p3;
+
+ i += n1;
+ } while ( i < fftLen ); // butterfly loop end
+ j++;
+ } while ( j < n2); // groups loop end
+ twidCoefModifier <<= 1U;
+ } // stages loop end
+
+ // loop for butterfly
+ for (i = 0; i < fftLen; i += 2)
+ {
+ a0 = pSrc[2 * i] + pSrc[2 * i + 2];
+ xt = pSrc[2 * i] - pSrc[2 * i + 2];
+
+ yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
+ a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
+
+ pSrc[2 * i] = a0;
+ pSrc[2 * i + 1] = a1;
+ pSrc[2 * i + 2] = xt;
+ pSrc[2 * i + 3] = yt;
+ } // groups loop end
+
+#else /* #if defined (ARM_MATH_DSP) */
+
+ n2 = fftLen;
+
+ // loop for stage
+ for (k = fftLen; k > 1; k = k >> 1)
+ {
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ // loop for groups
+ j = 0;
+ do
+ {
+ cosVal = pCoef[ia * 2];
+ sinVal = pCoef[(ia * 2) + 1];
+ ia += twidCoefModifier;
+
+ // loop for butterfly
+ i = j;
+ do
+ {
+ l = i + n2;
+ a0 = pSrc[2 * i] + pSrc[2 * l];
+ xt = pSrc[2 * i] - pSrc[2 * l];
+
+ yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+ a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+
+ p0 = xt * cosVal;
+ p1 = yt * sinVal;
+ p2 = yt * cosVal;
+ p3 = xt * sinVal;
+
+ pSrc[2 * i] = a0;
+ pSrc[2 * i + 1] = a1;
+
+ pSrc[2 * l] = p0 + p1;
+ pSrc[2 * l + 1] = p2 - p3;
+
+ i += n1;
+ } while (i < fftLen);
+ j++;
+ } while (j < n2);
+ twidCoefModifier <<= 1U;
+ }
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+}
+
+
+void arm_radix2_butterfly_inverse_f32(
+ float32_t * pSrc,
+ uint32_t fftLen,
+ const float32_t * pCoef,
+ uint16_t twidCoefModifier,
+ float32_t onebyfftLen)
+{
+
+ uint32_t i, j, k, l;
+ uint32_t n1, n2, ia;
+ float32_t xt, yt, cosVal, sinVal;
+ float32_t p0, p1, p2, p3;
+ float32_t a0, a1;
+
+#if defined (ARM_MATH_DSP)
+
+ n2 = fftLen >> 1;
+ ia = 0;
+
+ // loop for groups
+ for (i = 0; i < n2; i++)
+ {
+ cosVal = pCoef[ia * 2];
+ sinVal = pCoef[(ia * 2) + 1];
+ ia += twidCoefModifier;
+
+ l = i + n2;
+ a0 = pSrc[2 * i] + pSrc[2 * l];
+ xt = pSrc[2 * i] - pSrc[2 * l];
+
+ yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+ a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+
+ p0 = xt * cosVal;
+ p1 = yt * sinVal;
+ p2 = yt * cosVal;
+ p3 = xt * sinVal;
+
+ pSrc[2 * i] = a0;
+ pSrc[2 * i + 1] = a1;
+
+ pSrc[2 * l] = p0 - p1;
+ pSrc[2 * l + 1] = p2 + p3;
+ } // groups loop end
+
+ twidCoefModifier <<= 1U;
+
+ // loop for stage
+ for (k = fftLen / 2; k > 2; k = k >> 1)
+ {
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ // loop for groups
+ j = 0;
+ do
+ {
+ cosVal = pCoef[ia * 2];
+ sinVal = pCoef[(ia * 2) + 1];
+ ia += twidCoefModifier;
+
+ // loop for butterfly
+ i = j;
+ do
+ {
+ l = i + n2;
+ a0 = pSrc[2 * i] + pSrc[2 * l];
+ xt = pSrc[2 * i] - pSrc[2 * l];
+
+ yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+ a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+
+ p0 = xt * cosVal;
+ p1 = yt * sinVal;
+ p2 = yt * cosVal;
+ p3 = xt * sinVal;
+
+ pSrc[2 * i] = a0;
+ pSrc[2 * i + 1] = a1;
+
+ pSrc[2 * l] = p0 - p1;
+ pSrc[2 * l + 1] = p2 + p3;
+
+ i += n1;
+ } while ( i < fftLen ); // butterfly loop end
+ j++;
+ } while (j < n2); // groups loop end
+
+ twidCoefModifier <<= 1U;
+ } // stages loop end
+
+ // loop for butterfly
+ for (i = 0; i < fftLen; i += 2)
+ {
+ a0 = pSrc[2 * i] + pSrc[2 * i + 2];
+ xt = pSrc[2 * i] - pSrc[2 * i + 2];
+
+ a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
+ yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
+
+ p0 = a0 * onebyfftLen;
+ p2 = xt * onebyfftLen;
+ p1 = a1 * onebyfftLen;
+ p3 = yt * onebyfftLen;
+
+ pSrc[2 * i] = p0;
+ pSrc[2 * i + 1] = p1;
+ pSrc[2 * i + 2] = p2;
+ pSrc[2 * i + 3] = p3;
+ } // butterfly loop end
+
+#else /* #if defined (ARM_MATH_DSP) */
+
+ n2 = fftLen;
+
+ // loop for stage
+ for (k = fftLen; k > 2; k = k >> 1)
+ {
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ // loop for groups
+ j = 0;
+ do
+ {
+ cosVal = pCoef[ia * 2];
+ sinVal = pCoef[(ia * 2) + 1];
+ ia = ia + twidCoefModifier;
+
+ // loop for butterfly
+ i = j;
+ do
+ {
+ l = i + n2;
+ a0 = pSrc[2 * i] + pSrc[2 * l];
+ xt = pSrc[2 * i] - pSrc[2 * l];
+
+ yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+ a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+
+ p0 = xt * cosVal;
+ p1 = yt * sinVal;
+ p2 = yt * cosVal;
+ p3 = xt * sinVal;
+
+ pSrc[2 * i] = a0;
+ pSrc[2 * i + 1] = a1;
+
+ pSrc[2 * l] = p0 - p1;
+ pSrc[2 * l + 1] = p2 + p3;
+
+ i += n1;
+ } while ( i < fftLen ); // butterfly loop end
+ j++;
+ } while ( j < n2 ); // groups loop end
+
+ twidCoefModifier = twidCoefModifier << 1U;
+ } // stages loop end
+
+ n1 = n2;
+ n2 = n2 >> 1;
+
+ // loop for butterfly
+ for (i = 0; i < fftLen; i += n1)
+ {
+ l = i + n2;
+
+ a0 = pSrc[2 * i] + pSrc[2 * l];
+ xt = pSrc[2 * i] - pSrc[2 * l];
+
+ a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+ yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+
+ p0 = a0 * onebyfftLen;
+ p2 = xt * onebyfftLen;
+ p1 = a1 * onebyfftLen;
+ p3 = yt * onebyfftLen;
+
+ pSrc[2 * i] = p0;
+ pSrc[2 * l] = p2;
+
+ pSrc[2 * i + 1] = p1;
+ pSrc[2 * l + 1] = p3;
+ } // butterfly loop end
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+}
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_f32.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_f32.c index ae9f29a..4a11840 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_f32.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_f32.c @@ -1,209 +1,197 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_cfft_radix2_init_f32.c - * Description: Radix-2 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" -#include "arm_common_tables.h" - -/** - @ingroup groupTransforms - */ - -/** - @addtogroup ComplexFFT - @{ - */ - -/** - @brief Initialization function for the floating-point CFFT/CIFFT. - @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future. - @param[in,out] S points to an instance of the floating-point CFFT/CIFFT structure - @param[in] fftLen length of the FFT - @param[in] ifftFlag flag that selects transform direction - - value = 0: forward transform - - value = 1: inverse transform - @param[in] bitReverseFlag flag that enables / disables bit reversal of output - - value = 0: disables bit reversal of output - - value = 1: enables bit reversal of output - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length - - @par Details - The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed. - Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated - @par - The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order. - Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order. - @par - The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024. - @par - This Function also initializes Twiddle factor table pointer and Bit reversal table pointer. -*/ - -arm_status arm_cfft_radix2_init_f32( - arm_cfft_radix2_instance_f32 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag) -{ - /* Initialise the default arm status */ - arm_status status = ARM_MATH_ARGUMENT_ERROR; - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_4096) - - /* Initialise the default arm status */ - status = ARM_MATH_SUCCESS; - - /* Initialise the FFT length */ - S->fftLen = fftLen; - - /* Initialise the Twiddle coefficient pointer */ - S->pTwiddle = (float32_t *) twiddleCoef; - - /* Initialise the Flag for selection of CFFT or CIFFT */ - S->ifftFlag = ifftFlag; - - /* Initialise the Flag for calculation Bit reversal or not */ - S->bitReverseFlag = bitReverseFlag; - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_4096) - - /* Initializations of structure parameters depending on the FFT length */ - switch (S->fftLen) - { - - case 4096U: - /* Initializations of structure parameters for 4096 point FFT */ - - /* Initialise the twiddle coef modifier value */ - S->twidCoefModifier = 1U; - /* Initialise the bit reversal table modifier */ - S->bitRevFactor = 1U; - /* Initialise the bit reversal table pointer */ - S->pBitRevTable = (uint16_t *) armBitRevTable; - /* Initialise the 1/fftLen Value */ - S->onebyfftLen = 0.000244140625; - break; - - case 2048U: - /* Initializations of structure parameters for 2048 point FFT */ - - /* Initialise the twiddle coef modifier value */ - S->twidCoefModifier = 2U; - /* Initialise the bit reversal table modifier */ - S->bitRevFactor = 2U; - /* Initialise the bit reversal table pointer */ - S->pBitRevTable = (uint16_t *) & armBitRevTable[1]; - /* Initialise the 1/fftLen Value */ - S->onebyfftLen = 0.00048828125; - break; - - case 1024U: - /* Initializations of structure parameters for 1024 point FFT */ - - /* Initialise the twiddle coef modifier value */ - S->twidCoefModifier = 4U; - /* Initialise the bit reversal table modifier */ - S->bitRevFactor = 4U; - /* Initialise the bit reversal table pointer */ - S->pBitRevTable = (uint16_t *) & armBitRevTable[3]; - /* Initialise the 1/fftLen Value */ - S->onebyfftLen = 0.0009765625f; - break; - - case 512U: - /* Initializations of structure parameters for 512 point FFT */ - - /* Initialise the twiddle coef modifier value */ - S->twidCoefModifier = 8U; - /* Initialise the bit reversal table modifier */ - S->bitRevFactor = 8U; - /* Initialise the bit reversal table pointer */ - S->pBitRevTable = (uint16_t *) & armBitRevTable[7]; - /* Initialise the 1/fftLen Value */ - S->onebyfftLen = 0.001953125; - break; - - case 256U: - /* Initializations of structure parameters for 256 point FFT */ - S->twidCoefModifier = 16U; - S->bitRevFactor = 16U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[15]; - S->onebyfftLen = 0.00390625f; - break; - - case 128U: - /* Initializations of structure parameters for 128 point FFT */ - S->twidCoefModifier = 32U; - S->bitRevFactor = 32U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[31]; - S->onebyfftLen = 0.0078125; - break; - - case 64U: - /* Initializations of structure parameters for 64 point FFT */ - S->twidCoefModifier = 64U; - S->bitRevFactor = 64U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[63]; - S->onebyfftLen = 0.015625f; - break; - - case 32U: - /* Initializations of structure parameters for 64 point FFT */ - S->twidCoefModifier = 128U; - S->bitRevFactor = 128U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[127]; - S->onebyfftLen = 0.03125; - break; - - case 16U: - /* Initializations of structure parameters for 16 point FFT */ - S->twidCoefModifier = 256U; - S->bitRevFactor = 256U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[255]; - S->onebyfftLen = 0.0625f; - break; - - - default: - /* Reporting argument error if fftSize is not valid value */ - status = ARM_MATH_ARGUMENT_ERROR; - break; - } - -#endif -#endif -#endif - return (status); -} - -/** - @} end of ComplexFFT group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cfft_radix2_init_f32.c
+ * Description: Radix-2 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**
+ @ingroup groupTransforms
+ */
+
+/**
+ @addtogroup ComplexFFT
+ @{
+ */
+
+/**
+ @brief Initialization function for the floating-point CFFT/CIFFT.
+ @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future.
+ @param[in,out] S points to an instance of the floating-point CFFT/CIFFT structure
+ @param[in] fftLen length of the FFT
+ @param[in] ifftFlag flag that selects transform direction
+ - value = 0: forward transform
+ - value = 1: inverse transform
+ @param[in] bitReverseFlag flag that enables / disables bit reversal of output
+ - value = 0: disables bit reversal of output
+ - value = 1: enables bit reversal of output
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
+
+ @par Details
+ The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
+ Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
+ @par
+ The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
+ Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
+ @par
+ The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
+ @par
+ This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
+*/
+
+arm_status arm_cfft_radix2_init_f32(
+ arm_cfft_radix2_instance_f32 * S,
+ uint16_t fftLen,
+ uint8_t ifftFlag,
+ uint8_t bitReverseFlag)
+{
+ /* Initialise the default arm status */
+ arm_status status = ARM_MATH_SUCCESS;
+
+ /* Initialise the FFT length */
+ S->fftLen = fftLen;
+
+ /* Initialise the Twiddle coefficient pointer */
+ S->pTwiddle = (float32_t *) twiddleCoef;
+
+ /* Initialise the Flag for selection of CFFT or CIFFT */
+ S->ifftFlag = ifftFlag;
+
+ /* Initialise the Flag for calculation Bit reversal or not */
+ S->bitReverseFlag = bitReverseFlag;
+
+ /* Initializations of structure parameters depending on the FFT length */
+ switch (S->fftLen)
+ {
+
+ case 4096U:
+ /* Initializations of structure parameters for 4096 point FFT */
+
+ /* Initialise the twiddle coef modifier value */
+ S->twidCoefModifier = 1U;
+ /* Initialise the bit reversal table modifier */
+ S->bitRevFactor = 1U;
+ /* Initialise the bit reversal table pointer */
+ S->pBitRevTable = (uint16_t *) armBitRevTable;
+ /* Initialise the 1/fftLen Value */
+ S->onebyfftLen = 0.000244140625;
+ break;
+
+ case 2048U:
+ /* Initializations of structure parameters for 2048 point FFT */
+
+ /* Initialise the twiddle coef modifier value */
+ S->twidCoefModifier = 2U;
+ /* Initialise the bit reversal table modifier */
+ S->bitRevFactor = 2U;
+ /* Initialise the bit reversal table pointer */
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
+ /* Initialise the 1/fftLen Value */
+ S->onebyfftLen = 0.00048828125;
+ break;
+
+ case 1024U:
+ /* Initializations of structure parameters for 1024 point FFT */
+
+ /* Initialise the twiddle coef modifier value */
+ S->twidCoefModifier = 4U;
+ /* Initialise the bit reversal table modifier */
+ S->bitRevFactor = 4U;
+ /* Initialise the bit reversal table pointer */
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
+ /* Initialise the 1/fftLen Value */
+ S->onebyfftLen = 0.0009765625f;
+ break;
+
+ case 512U:
+ /* Initializations of structure parameters for 512 point FFT */
+
+ /* Initialise the twiddle coef modifier value */
+ S->twidCoefModifier = 8U;
+ /* Initialise the bit reversal table modifier */
+ S->bitRevFactor = 8U;
+ /* Initialise the bit reversal table pointer */
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
+ /* Initialise the 1/fftLen Value */
+ S->onebyfftLen = 0.001953125;
+ break;
+
+ case 256U:
+ /* Initializations of structure parameters for 256 point FFT */
+ S->twidCoefModifier = 16U;
+ S->bitRevFactor = 16U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
+ S->onebyfftLen = 0.00390625f;
+ break;
+
+ case 128U:
+ /* Initializations of structure parameters for 128 point FFT */
+ S->twidCoefModifier = 32U;
+ S->bitRevFactor = 32U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
+ S->onebyfftLen = 0.0078125;
+ break;
+
+ case 64U:
+ /* Initializations of structure parameters for 64 point FFT */
+ S->twidCoefModifier = 64U;
+ S->bitRevFactor = 64U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
+ S->onebyfftLen = 0.015625f;
+ break;
+
+ case 32U:
+ /* Initializations of structure parameters for 64 point FFT */
+ S->twidCoefModifier = 128U;
+ S->bitRevFactor = 128U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
+ S->onebyfftLen = 0.03125;
+ break;
+
+ case 16U:
+ /* Initializations of structure parameters for 16 point FFT */
+ S->twidCoefModifier = 256U;
+ S->bitRevFactor = 256U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
+ S->onebyfftLen = 0.0625f;
+ break;
+
+
+ default:
+ /* Reporting argument error if fftSize is not valid value */
+ status = ARM_MATH_ARGUMENT_ERROR;
+ break;
+ }
+
+ return (status);
+}
+
+/**
+ @} end of ComplexFFT group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q15.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q15.c index 68c9930..a31b569 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q15.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q15.c @@ -1,194 +1,182 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_cfft_radix2_init_q15.c - * Description: Radix-2 Decimation in Frequency Q15 FFT & IFFT initialization function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" -#include "arm_common_tables.h" - -/** - @ingroup groupTransforms - */ - - -/** - @addtogroup ComplexFFT - @{ - */ - -/** - @brief Initialization function for the Q15 CFFT/CIFFT. - @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed - @param[in,out] S points to an instance of the Q15 CFFT/CIFFT structure. - @param[in] fftLen length of the FFT. - @param[in] ifftFlag flag that selects transform direction - - value = 0: forward transform - - value = 1: inverse transform - @param[in] bitReverseFlag flag that enables / disables bit reversal of output - - value = 0: disables bit reversal of output - - value = 1: enables bit reversal of output - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length - - @par Details - The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed. - Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated - @par - The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order. - Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order. - @par - The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024. - @par - This Function also initializes Twiddle factor table pointer and Bit reversal table pointer. -*/ - -arm_status arm_cfft_radix2_init_q15( - arm_cfft_radix2_instance_q15 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag) -{ - /* Initialise the default arm status */ - arm_status status = ARM_MATH_ARGUMENT_ERROR; - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) - - /* Initialise the default arm status */ - status = ARM_MATH_SUCCESS; - - /* Initialise the FFT length */ - S->fftLen = fftLen; - - /* Initialise the Twiddle coefficient pointer */ - S->pTwiddle = (q15_t *) twiddleCoef_4096_q15; - /* Initialise the Flag for selection of CFFT or CIFFT */ - S->ifftFlag = ifftFlag; - /* Initialise the Flag for calculation Bit reversal or not */ - S->bitReverseFlag = bitReverseFlag; - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024) - - /* Initializations of structure parameters depending on the FFT length */ - switch (S->fftLen) - { - case 4096U: - /* Initializations of structure parameters for 4096 point FFT */ - - /* Initialise the twiddle coef modifier value */ - S->twidCoefModifier = 1U; - /* Initialise the bit reversal table modifier */ - S->bitRevFactor = 1U; - /* Initialise the bit reversal table pointer */ - S->pBitRevTable = (uint16_t *) armBitRevTable; - - break; - - case 2048U: - /* Initializations of structure parameters for 2048 point FFT */ - - /* Initialise the twiddle coef modifier value */ - S->twidCoefModifier = 2U; - /* Initialise the bit reversal table modifier */ - S->bitRevFactor = 2U; - /* Initialise the bit reversal table pointer */ - S->pBitRevTable = (uint16_t *) & armBitRevTable[1]; - - break; - - case 1024U: - /* Initializations of structure parameters for 1024 point FFT */ - S->twidCoefModifier = 4U; - S->bitRevFactor = 4U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[3]; - - break; - - case 512U: - /* Initializations of structure parameters for 512 point FFT */ - S->twidCoefModifier = 8U; - S->bitRevFactor = 8U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[7]; - - break; - - case 256U: - /* Initializations of structure parameters for 256 point FFT */ - S->twidCoefModifier = 16U; - S->bitRevFactor = 16U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[15]; - - break; - - case 128U: - /* Initializations of structure parameters for 128 point FFT */ - S->twidCoefModifier = 32U; - S->bitRevFactor = 32U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[31]; - - break; - - case 64U: - /* Initializations of structure parameters for 64 point FFT */ - S->twidCoefModifier = 64U; - S->bitRevFactor = 64U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[63]; - - break; - - case 32U: - /* Initializations of structure parameters for 32 point FFT */ - S->twidCoefModifier = 128U; - S->bitRevFactor = 128U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[127]; - - break; - - case 16U: - /* Initializations of structure parameters for 16 point FFT */ - S->twidCoefModifier = 256U; - S->bitRevFactor = 256U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[255]; - - break; - - default: - /* Reporting argument error if fftSize is not valid value */ - status = ARM_MATH_ARGUMENT_ERROR; - break; - } - -#endif -#endif -#endif - return (status); -} - -/** - @} end of ComplexFFT group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cfft_radix2_init_q15.c
+ * Description: Radix-2 Decimation in Frequency Q15 FFT & IFFT initialization function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**
+ @ingroup groupTransforms
+ */
+
+
+/**
+ @addtogroup ComplexFFT
+ @{
+ */
+
+/**
+ @brief Initialization function for the Q15 CFFT/CIFFT.
+ @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed
+ @param[in,out] S points to an instance of the Q15 CFFT/CIFFT structure.
+ @param[in] fftLen length of the FFT.
+ @param[in] ifftFlag flag that selects transform direction
+ - value = 0: forward transform
+ - value = 1: inverse transform
+ @param[in] bitReverseFlag flag that enables / disables bit reversal of output
+ - value = 0: disables bit reversal of output
+ - value = 1: enables bit reversal of output
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
+
+ @par Details
+ The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
+ Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
+ @par
+ The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
+ Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
+ @par
+ The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
+ @par
+ This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
+*/
+
+arm_status arm_cfft_radix2_init_q15(
+ arm_cfft_radix2_instance_q15 * S,
+ uint16_t fftLen,
+ uint8_t ifftFlag,
+ uint8_t bitReverseFlag)
+{
+ /* Initialise the default arm status */
+ arm_status status = ARM_MATH_SUCCESS;
+
+ /* Initialise the FFT length */
+ S->fftLen = fftLen;
+
+ /* Initialise the Twiddle coefficient pointer */
+ S->pTwiddle = (q15_t *) twiddleCoef_4096_q15;
+ /* Initialise the Flag for selection of CFFT or CIFFT */
+ S->ifftFlag = ifftFlag;
+ /* Initialise the Flag for calculation Bit reversal or not */
+ S->bitReverseFlag = bitReverseFlag;
+
+ /* Initializations of structure parameters depending on the FFT length */
+ switch (S->fftLen)
+ {
+ case 4096U:
+ /* Initializations of structure parameters for 4096 point FFT */
+
+ /* Initialise the twiddle coef modifier value */
+ S->twidCoefModifier = 1U;
+ /* Initialise the bit reversal table modifier */
+ S->bitRevFactor = 1U;
+ /* Initialise the bit reversal table pointer */
+ S->pBitRevTable = (uint16_t *) armBitRevTable;
+
+ break;
+
+ case 2048U:
+ /* Initializations of structure parameters for 2048 point FFT */
+
+ /* Initialise the twiddle coef modifier value */
+ S->twidCoefModifier = 2U;
+ /* Initialise the bit reversal table modifier */
+ S->bitRevFactor = 2U;
+ /* Initialise the bit reversal table pointer */
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
+
+ break;
+
+ case 1024U:
+ /* Initializations of structure parameters for 1024 point FFT */
+ S->twidCoefModifier = 4U;
+ S->bitRevFactor = 4U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
+
+ break;
+
+ case 512U:
+ /* Initializations of structure parameters for 512 point FFT */
+ S->twidCoefModifier = 8U;
+ S->bitRevFactor = 8U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
+
+ break;
+
+ case 256U:
+ /* Initializations of structure parameters for 256 point FFT */
+ S->twidCoefModifier = 16U;
+ S->bitRevFactor = 16U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
+
+ break;
+
+ case 128U:
+ /* Initializations of structure parameters for 128 point FFT */
+ S->twidCoefModifier = 32U;
+ S->bitRevFactor = 32U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
+
+ break;
+
+ case 64U:
+ /* Initializations of structure parameters for 64 point FFT */
+ S->twidCoefModifier = 64U;
+ S->bitRevFactor = 64U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
+
+ break;
+
+ case 32U:
+ /* Initializations of structure parameters for 32 point FFT */
+ S->twidCoefModifier = 128U;
+ S->bitRevFactor = 128U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
+
+ break;
+
+ case 16U:
+ /* Initializations of structure parameters for 16 point FFT */
+ S->twidCoefModifier = 256U;
+ S->bitRevFactor = 256U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
+
+ break;
+
+ default:
+ /* Reporting argument error if fftSize is not valid value */
+ status = ARM_MATH_ARGUMENT_ERROR;
+ break;
+ }
+
+ return (status);
+}
+
+/**
+ @} end of ComplexFFT group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q31.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q31.c index 73b8a39..2cf4fd5 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q31.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q31.c @@ -1,191 +1,179 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_cfft_radix2_init_q31.c - * Description: Radix-2 Decimation in Frequency Fixed-point CFFT & CIFFT Initialization function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" -#include "arm_common_tables.h" - -/** - @ingroup groupTransforms - */ - -/** - @addtogroup ComplexFFT - @{ - */ - -/** - @brief Initialization function for the Q31 CFFT/CIFFT. - @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future. - @param[in,out] S points to an instance of the Q31 CFFT/CIFFT structure - @param[in] fftLen length of the FFT - @param[in] ifftFlag flag that selects transform direction - - value = 0: forward transform - - value = 1: inverse transform - @param[in] bitReverseFlag flag that enables / disables bit reversal of output - - value = 0: disables bit reversal of output - - value = 1: enables bit reversal of output - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length - - @par Details - The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed. - Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated - @par - The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order. - Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order. - @par - The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024. - @par - This Function also initializes Twiddle factor table pointer and Bit reversal table pointer. -*/ - -arm_status arm_cfft_radix2_init_q31( - arm_cfft_radix2_instance_q31 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag) -{ - /* Initialise the default arm status */ - arm_status status = ARM_MATH_ARGUMENT_ERROR; - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) - - /* Initialise the default arm status */ - status = ARM_MATH_SUCCESS; - - /* Initialise the FFT length */ - S->fftLen = fftLen; - - /* Initialise the Twiddle coefficient pointer */ - S->pTwiddle = (q31_t *) twiddleCoef_4096_q31; - - /* Initialise the Flag for selection of CFFT or CIFFT */ - S->ifftFlag = ifftFlag; - - /* Initialise the Flag for calculation Bit reversal or not */ - S->bitReverseFlag = bitReverseFlag; - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024) - - /* Initializations of Instance structure depending on the FFT length */ - switch (S->fftLen) - { - /* Initializations of structure parameters for 4096 point FFT */ - case 4096U: - /* Initialise the twiddle coef modifier value */ - S->twidCoefModifier = 1U; - /* Initialise the bit reversal table modifier */ - S->bitRevFactor = 1U; - /* Initialise the bit reversal table pointer */ - S->pBitRevTable = (uint16_t *) armBitRevTable; - break; - - /* Initializations of structure parameters for 2048 point FFT */ - case 2048U: - /* Initialise the twiddle coef modifier value */ - S->twidCoefModifier = 2U; - /* Initialise the bit reversal table modifier */ - S->bitRevFactor = 2U; - /* Initialise the bit reversal table pointer */ - S->pBitRevTable = (uint16_t *) & armBitRevTable[1]; - break; - - /* Initializations of structure parameters for 1024 point FFT */ - case 1024U: - /* Initialise the twiddle coef modifier value */ - S->twidCoefModifier = 4U; - /* Initialise the bit reversal table modifier */ - S->bitRevFactor = 4U; - /* Initialise the bit reversal table pointer */ - S->pBitRevTable = (uint16_t *) & armBitRevTable[3]; - break; - - /* Initializations of structure parameters for 512 point FFT */ - case 512U: - /* Initialise the twiddle coef modifier value */ - S->twidCoefModifier = 8U; - /* Initialise the bit reversal table modifier */ - S->bitRevFactor = 8U; - /* Initialise the bit reversal table pointer */ - S->pBitRevTable = (uint16_t *) & armBitRevTable[7]; - break; - - case 256U: - /* Initializations of structure parameters for 256 point FFT */ - S->twidCoefModifier = 16U; - S->bitRevFactor = 16U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[15]; - break; - - case 128U: - /* Initializations of structure parameters for 128 point FFT */ - S->twidCoefModifier = 32U; - S->bitRevFactor = 32U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[31]; - break; - - case 64U: - /* Initializations of structure parameters for 64 point FFT */ - S->twidCoefModifier = 64U; - S->bitRevFactor = 64U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[63]; - break; - - case 32U: - /* Initializations of structure parameters for 32 point FFT */ - S->twidCoefModifier = 128U; - S->bitRevFactor = 128U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[127]; - break; - - case 16U: - /* Initializations of structure parameters for 16 point FFT */ - S->twidCoefModifier = 256U; - S->bitRevFactor = 256U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[255]; - break; - - - default: - /* Reporting argument error if fftSize is not valid value */ - status = ARM_MATH_ARGUMENT_ERROR; - break; - } - -#endif -#endif -#endif - return (status); -} - -/** - @} end of ComplexFFT group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cfft_radix2_init_q31.c
+ * Description: Radix-2 Decimation in Frequency Fixed-point CFFT & CIFFT Initialization function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**
+ @ingroup groupTransforms
+ */
+
+/**
+ @addtogroup ComplexFFT
+ @{
+ */
+
+/**
+ @brief Initialization function for the Q31 CFFT/CIFFT.
+ @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
+ @param[in,out] S points to an instance of the Q31 CFFT/CIFFT structure
+ @param[in] fftLen length of the FFT
+ @param[in] ifftFlag flag that selects transform direction
+ - value = 0: forward transform
+ - value = 1: inverse transform
+ @param[in] bitReverseFlag flag that enables / disables bit reversal of output
+ - value = 0: disables bit reversal of output
+ - value = 1: enables bit reversal of output
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
+
+ @par Details
+ The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
+ Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
+ @par
+ The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
+ Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
+ @par
+ The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
+ @par
+ This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
+*/
+
+arm_status arm_cfft_radix2_init_q31(
+ arm_cfft_radix2_instance_q31 * S,
+ uint16_t fftLen,
+ uint8_t ifftFlag,
+ uint8_t bitReverseFlag)
+{
+ /* Initialise the default arm status */
+ arm_status status = ARM_MATH_SUCCESS;
+
+ /* Initialise the FFT length */
+ S->fftLen = fftLen;
+
+ /* Initialise the Twiddle coefficient pointer */
+ S->pTwiddle = (q31_t *) twiddleCoef_4096_q31;
+
+ /* Initialise the Flag for selection of CFFT or CIFFT */
+ S->ifftFlag = ifftFlag;
+
+ /* Initialise the Flag for calculation Bit reversal or not */
+ S->bitReverseFlag = bitReverseFlag;
+
+ /* Initializations of Instance structure depending on the FFT length */
+ switch (S->fftLen)
+ {
+ /* Initializations of structure parameters for 4096 point FFT */
+ case 4096U:
+ /* Initialise the twiddle coef modifier value */
+ S->twidCoefModifier = 1U;
+ /* Initialise the bit reversal table modifier */
+ S->bitRevFactor = 1U;
+ /* Initialise the bit reversal table pointer */
+ S->pBitRevTable = (uint16_t *) armBitRevTable;
+ break;
+
+ /* Initializations of structure parameters for 2048 point FFT */
+ case 2048U:
+ /* Initialise the twiddle coef modifier value */
+ S->twidCoefModifier = 2U;
+ /* Initialise the bit reversal table modifier */
+ S->bitRevFactor = 2U;
+ /* Initialise the bit reversal table pointer */
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
+ break;
+
+ /* Initializations of structure parameters for 1024 point FFT */
+ case 1024U:
+ /* Initialise the twiddle coef modifier value */
+ S->twidCoefModifier = 4U;
+ /* Initialise the bit reversal table modifier */
+ S->bitRevFactor = 4U;
+ /* Initialise the bit reversal table pointer */
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
+ break;
+
+ /* Initializations of structure parameters for 512 point FFT */
+ case 512U:
+ /* Initialise the twiddle coef modifier value */
+ S->twidCoefModifier = 8U;
+ /* Initialise the bit reversal table modifier */
+ S->bitRevFactor = 8U;
+ /* Initialise the bit reversal table pointer */
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
+ break;
+
+ case 256U:
+ /* Initializations of structure parameters for 256 point FFT */
+ S->twidCoefModifier = 16U;
+ S->bitRevFactor = 16U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
+ break;
+
+ case 128U:
+ /* Initializations of structure parameters for 128 point FFT */
+ S->twidCoefModifier = 32U;
+ S->bitRevFactor = 32U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
+ break;
+
+ case 64U:
+ /* Initializations of structure parameters for 64 point FFT */
+ S->twidCoefModifier = 64U;
+ S->bitRevFactor = 64U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
+ break;
+
+ case 32U:
+ /* Initializations of structure parameters for 32 point FFT */
+ S->twidCoefModifier = 128U;
+ S->bitRevFactor = 128U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
+ break;
+
+ case 16U:
+ /* Initializations of structure parameters for 16 point FFT */
+ S->twidCoefModifier = 256U;
+ S->bitRevFactor = 256U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
+ break;
+
+
+ default:
+ /* Reporting argument error if fftSize is not valid value */
+ status = ARM_MATH_ARGUMENT_ERROR;
+ break;
+ }
+
+ return (status);
+}
+
+/**
+ @} end of ComplexFFT group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_q15.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_q15.c index ca15ea1..fbb809e 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_q15.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_q15.c @@ -1,689 +1,689 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_cfft_radix2_q15.c - * Description: Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" - -void arm_radix2_butterfly_q15( - q15_t * pSrc, - uint32_t fftLen, - const q15_t * pCoef, - uint16_t twidCoefModifier); - -void arm_radix2_butterfly_inverse_q15( - q15_t * pSrc, - uint32_t fftLen, - const q15_t * pCoef, - uint16_t twidCoefModifier); - -void arm_bitreversal_q15( - q15_t * pSrc, - uint32_t fftLen, - uint16_t bitRevFactor, - const uint16_t * pBitRevTab); - -/** - @ingroup groupTransforms - */ - -/** - @addtogroup ComplexFFT - @{ - */ - -/** - @brief Processing function for the fixed-point CFFT/CIFFT. - @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed in the future. - @param[in] S points to an instance of the fixed-point CFFT/CIFFT structure - @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place - @return none - */ - -void arm_cfft_radix2_q15( - const arm_cfft_radix2_instance_q15 * S, - q15_t * pSrc) -{ - - if (S->ifftFlag == 1U) - { - arm_radix2_butterfly_inverse_q15 (pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier); - } - else - { - arm_radix2_butterfly_q15 (pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier); - } - - arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); -} - -/** - @} end of ComplexFFT group - */ - -void arm_radix2_butterfly_q15( - q15_t * pSrc, - uint32_t fftLen, - const q15_t * pCoef, - uint16_t twidCoefModifier) -{ -#if defined (ARM_MATH_DSP) - - uint32_t i, j, k, l; - uint32_t n1, n2, ia; - q15_t in; - q31_t T, S, R; - q31_t coeff, out1, out2; - - //N = fftLen; - n2 = fftLen; - - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - // loop for groups - for (i = 0; i < n2; i++) - { - coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U)); - - ia = ia + twidCoefModifier; - - l = i + n2; - - T = read_q15x2 (pSrc + (2 * i)); - in = ((int16_t) (T & 0xFFFF)) >> 1; - T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF); - - S = read_q15x2 (pSrc + (2 * l)); - in = ((int16_t) (S & 0xFFFF)) >> 1; - S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF); - - R = __QSUB16(T, S); - - write_q15x2 (pSrc + (2 * i), __SHADD16(T, S)); - -#ifndef ARM_MATH_BIG_ENDIAN - out1 = __SMUAD(coeff, R) >> 16; - out2 = __SMUSDX(coeff, R); -#else - out1 = __SMUSDX(R, coeff) >> 16U; - out2 = __SMUAD(coeff, R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF)); - - coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U)); - - ia = ia + twidCoefModifier; - - /* loop for butterfly */ - i++; - l++; - - T = read_q15x2 (pSrc + (2 * i)); - in = ((int16_t) (T & 0xFFFF)) >> 1; - T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF); - - S = read_q15x2 (pSrc + (2 * l)); - in = ((int16_t) (S & 0xFFFF)) >> 1; - S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF); - - R = __QSUB16(T, S); - - write_q15x2 (pSrc + (2 * i), __SHADD16(T, S)); - -#ifndef ARM_MATH_BIG_ENDIAN - out1 = __SMUAD(coeff, R) >> 16; - out2 = __SMUSDX(coeff, R); -#else - - out1 = __SMUSDX(R, coeff) >> 16U; - out2 = __SMUAD(coeff, R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF)); - - } /* groups loop end */ - - twidCoefModifier = twidCoefModifier << 1U; - - /* loop for stage */ - for (k = fftLen / 2; k > 2; k = k >> 1) - { - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - /* loop for groups */ - for (j = 0; j < n2; j++) - { - coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U)); - - ia = ia + twidCoefModifier; - - /* loop for butterfly */ - for (i = j; i < fftLen; i += n1) - { - l = i + n2; - - T = read_q15x2 (pSrc + (2 * i)); - - S = read_q15x2 (pSrc + (2 * l)); - - R = __QSUB16(T, S); - - write_q15x2 (pSrc + (2 * i), __SHADD16(T, S)); - -#ifndef ARM_MATH_BIG_ENDIAN - out1 = __SMUAD(coeff, R) >> 16; - out2 = __SMUSDX(coeff, R); -#else - out1 = __SMUSDX(R, coeff) >> 16U; - out2 = __SMUAD(coeff, R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF)); - - i += n1; - - l = i + n2; - - T = read_q15x2 (pSrc + (2 * i)); - - S = read_q15x2 (pSrc + (2 * l)); - - R = __QSUB16(T, S); - - write_q15x2 (pSrc + (2 * i), __SHADD16(T, S)); - -#ifndef ARM_MATH_BIG_ENDIAN - out1 = __SMUAD(coeff, R) >> 16; - out2 = __SMUSDX(coeff, R); -#else - out1 = __SMUSDX(R, coeff) >> 16U; - out2 = __SMUAD(coeff, R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF)); - - } /* butterfly loop end */ - - } /* groups loop end */ - - twidCoefModifier = twidCoefModifier << 1U; - } /* stages loop end */ - - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U)); - - ia = ia + twidCoefModifier; - - /* loop for butterfly */ - for (i = 0; i < fftLen; i += n1) - { - l = i + n2; - - T = read_q15x2 (pSrc + (2 * i)); - - S = read_q15x2 (pSrc + (2 * l)); - - R = __QSUB16(T, S); - - write_q15x2 (pSrc + (2 * i), __QADD16(T, S)); - - write_q15x2 (pSrc + (2 * l), R); - - i += n1; - l = i + n2; - - T = read_q15x2 (pSrc + (2 * i)); - - S = read_q15x2 (pSrc + (2 * l)); - - R = __QSUB16(T, S); - - write_q15x2 (pSrc + (2 * i), __QADD16(T, S)); - - write_q15x2 (pSrc + (2 * l), R); - - } /* groups loop end */ - - -#else /* #if defined (ARM_MATH_DSP) */ - - uint32_t i, j, k, l; - uint32_t n1, n2, ia; - q15_t xt, yt, cosVal, sinVal; - - - // N = fftLen; - n2 = fftLen; - - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - /* loop for groups */ - for (j = 0; j < n2; j++) - { - cosVal = pCoef[(ia * 2)]; - sinVal = pCoef[(ia * 2) + 1]; - ia = ia + twidCoefModifier; - - /* loop for butterfly */ - for (i = j; i < fftLen; i += n1) - { - l = i + n2; - xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U); - pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U; - - yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U); - pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) + - (pSrc[2 * i + 1] >> 1U) ) >> 1U; - - pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) + - ((int16_t) (((q31_t) yt * sinVal) >> 16))); - - pSrc[2U * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) - - ((int16_t) (((q31_t) xt * sinVal) >> 16))); - - } /* butterfly loop end */ - - } /* groups loop end */ - - twidCoefModifier = twidCoefModifier << 1U; - - /* loop for stage */ - for (k = fftLen / 2; k > 2; k = k >> 1) - { - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - /* loop for groups */ - for (j = 0; j < n2; j++) - { - cosVal = pCoef[ia * 2]; - sinVal = pCoef[(ia * 2) + 1]; - ia = ia + twidCoefModifier; - - /* loop for butterfly */ - for (i = j; i < fftLen; i += n1) - { - l = i + n2; - xt = pSrc[2 * i] - pSrc[2 * l]; - pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U; - - yt = pSrc[2 * i + 1] - pSrc[2 * l + 1]; - pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U; - - pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) + - ((int16_t) (((q31_t) yt * sinVal) >> 16))); - - pSrc[2U * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) - - ((int16_t) (((q31_t) xt * sinVal) >> 16))); - - } /* butterfly loop end */ - - } /* groups loop end */ - - twidCoefModifier = twidCoefModifier << 1U; - } /* stages loop end */ - - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - /* loop for groups */ - for (j = 0; j < n2; j++) - { - cosVal = pCoef[ia * 2]; - sinVal = pCoef[(ia * 2) + 1]; - - ia = ia + twidCoefModifier; - - /* loop for butterfly */ - for (i = j; i < fftLen; i += n1) - { - l = i + n2; - xt = pSrc[2 * i] - pSrc[2 * l]; - pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]); - - yt = pSrc[2 * i + 1] - pSrc[2 * l + 1]; - pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]); - - pSrc[2 * l] = xt; - - pSrc[2 * l + 1] = yt; - - } /* butterfly loop end */ - - } /* groups loop end */ - - twidCoefModifier = twidCoefModifier << 1U; - -#endif /* #if defined (ARM_MATH_DSP) */ - -} - - -void arm_radix2_butterfly_inverse_q15( - q15_t * pSrc, - uint32_t fftLen, - const q15_t * pCoef, - uint16_t twidCoefModifier) -{ -#if defined (ARM_MATH_DSP) - - uint32_t i, j, k, l; - uint32_t n1, n2, ia; - q15_t in; - q31_t T, S, R; - q31_t coeff, out1, out2; - - // N = fftLen; - n2 = fftLen; - - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - /* loop for groups */ - for (i = 0; i < n2; i++) - { - coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U)); - - ia = ia + twidCoefModifier; - - l = i + n2; - - T = read_q15x2 (pSrc + (2 * i)); - in = ((int16_t) (T & 0xFFFF)) >> 1; - T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF); - - S = read_q15x2 (pSrc + (2 * l)); - in = ((int16_t) (S & 0xFFFF)) >> 1; - S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF); - - R = __QSUB16(T, S); - - write_q15x2 (pSrc + (2 * i), __SHADD16(T, S)); - -#ifndef ARM_MATH_BIG_ENDIAN - out1 = __SMUSD(coeff, R) >> 16; - out2 = __SMUADX(coeff, R); -#else - out1 = __SMUADX(R, coeff) >> 16U; - out2 = __SMUSD(__QSUB(0, coeff), R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF)); - - coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U)); - - ia = ia + twidCoefModifier; - - /* loop for butterfly */ - i++; - l++; - - T = read_q15x2 (pSrc + (2 * i)); - in = ((int16_t) (T & 0xFFFF)) >> 1; - T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF); - - S = read_q15x2 (pSrc + (2 * l)); - in = ((int16_t) (S & 0xFFFF)) >> 1; - S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF); - - R = __QSUB16(T, S); - - write_q15x2 (pSrc + (2 * i), __SHADD16(T, S)); - -#ifndef ARM_MATH_BIG_ENDIAN - out1 = __SMUSD(coeff, R) >> 16; - out2 = __SMUADX(coeff, R); -#else - out1 = __SMUADX(R, coeff) >> 16U; - out2 = __SMUSD(__QSUB(0, coeff), R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF)); - - } /* groups loop end */ - - twidCoefModifier = twidCoefModifier << 1U; - - /* loop for stage */ - for (k = fftLen / 2; k > 2; k = k >> 1) - { - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - /* loop for groups */ - for (j = 0; j < n2; j++) - { - coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U)); - - ia = ia + twidCoefModifier; - - /* loop for butterfly */ - for (i = j; i < fftLen; i += n1) - { - l = i + n2; - - T = read_q15x2 (pSrc + (2 * i)); - - S = read_q15x2 (pSrc + (2 * l)); - - R = __QSUB16(T, S); - - write_q15x2 (pSrc + (2 * i), __SHADD16(T, S)); - -#ifndef ARM_MATH_BIG_ENDIAN - out1 = __SMUSD(coeff, R) >> 16; - out2 = __SMUADX(coeff, R); -#else - out1 = __SMUADX(R, coeff) >> 16U; - out2 = __SMUSD(__QSUB(0, coeff), R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF)); - - i += n1; - - l = i + n2; - - T = read_q15x2 (pSrc + (2 * i)); - - S = read_q15x2 (pSrc + (2 * l)); - - R = __QSUB16(T, S); - - write_q15x2 (pSrc + (2 * i), __SHADD16(T, S)); - -#ifndef ARM_MATH_BIG_ENDIAN - out1 = __SMUSD(coeff, R) >> 16; - out2 = __SMUADX(coeff, R); -#else - out1 = __SMUADX(R, coeff) >> 16U; - out2 = __SMUSD(__QSUB(0, coeff), R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF)); - - } /* butterfly loop end */ - - } /* groups loop end */ - - twidCoefModifier = twidCoefModifier << 1U; - } /* stages loop end */ - - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - /* loop for groups */ - for (j = 0; j < n2; j++) - { - coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U)); - - ia = ia + twidCoefModifier; - - /* loop for butterfly */ - for (i = j; i < fftLen; i += n1) - { - l = i + n2; - - T = read_q15x2 (pSrc + (2 * i)); - - S = read_q15x2 (pSrc + (2 * l)); - - R = __QSUB16(T, S); - - write_q15x2 (pSrc + (2 * i), __QADD16(T, S)); - - write_q15x2 (pSrc + (2 * l), R); - - } /* butterfly loop end */ - - } /* groups loop end */ - - twidCoefModifier = twidCoefModifier << 1U; - -#else /* #if defined (ARM_MATH_DSP) */ - - uint32_t i, j, k, l; - uint32_t n1, n2, ia; - q15_t xt, yt, cosVal, sinVal; - - // N = fftLen; - n2 = fftLen; - - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - /* loop for groups */ - for (j = 0; j < n2; j++) - { - cosVal = pCoef[(ia * 2)]; - sinVal = pCoef[(ia * 2) + 1]; - ia = ia + twidCoefModifier; - - /* loop for butterfly */ - for (i = j; i < fftLen; i += n1) - { - l = i + n2; - xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U); - pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U; - - yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U); - pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) + - (pSrc[2 * i + 1] >> 1U) ) >> 1U; - - pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) - - ((int16_t) (((q31_t) yt * sinVal) >> 16))); - - pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) + - ((int16_t) (((q31_t) xt * sinVal) >> 16))); - - } /* butterfly loop end */ - - } /* groups loop end */ - - twidCoefModifier = twidCoefModifier << 1U; - - /* loop for stage */ - for (k = fftLen / 2; k > 2; k = k >> 1) - { - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - /* loop for groups */ - for (j = 0; j < n2; j++) - { - cosVal = pCoef[(ia * 2)]; - sinVal = pCoef[(ia * 2) + 1]; - ia = ia + twidCoefModifier; - - /* loop for butterfly */ - for (i = j; i < fftLen; i += n1) - { - l = i + n2; - xt = pSrc[2 * i] - pSrc[2 * l]; - pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U; - - yt = pSrc[2 * i + 1] - pSrc[2 * l + 1]; - pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U; - - pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) - - ((int16_t) (((q31_t) yt * sinVal) >> 16)) ); - - pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) + - ((int16_t) (((q31_t) xt * sinVal) >> 16)) ); - - } /* butterfly loop end */ - - } /* groups loop end */ - - twidCoefModifier = twidCoefModifier << 1U; - } /* stages loop end */ - - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - cosVal = pCoef[(ia * 2)]; - sinVal = pCoef[(ia * 2) + 1]; - - ia = ia + twidCoefModifier; - - /* loop for butterfly */ - for (i = 0; i < fftLen; i += n1) - { - l = i + n2; - xt = pSrc[2 * i] - pSrc[2 * l]; - pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]); - - yt = pSrc[2 * i + 1] - pSrc[2 * l + 1]; - pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]); - - pSrc[2 * l] = xt; - - pSrc[2 * l + 1] = yt; - - } /* groups loop end */ - - -#endif /* #if defined (ARM_MATH_DSP) */ - -} +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cfft_radix2_q15.c
+ * Description: Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+void arm_radix2_butterfly_q15(
+ q15_t * pSrc,
+ uint32_t fftLen,
+ const q15_t * pCoef,
+ uint16_t twidCoefModifier);
+
+void arm_radix2_butterfly_inverse_q15(
+ q15_t * pSrc,
+ uint32_t fftLen,
+ const q15_t * pCoef,
+ uint16_t twidCoefModifier);
+
+void arm_bitreversal_q15(
+ q15_t * pSrc,
+ uint32_t fftLen,
+ uint16_t bitRevFactor,
+ const uint16_t * pBitRevTab);
+
+/**
+ @ingroup groupTransforms
+ */
+
+/**
+ @addtogroup ComplexFFT
+ @{
+ */
+
+/**
+ @brief Processing function for the fixed-point CFFT/CIFFT.
+ @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed in the future.
+ @param[in] S points to an instance of the fixed-point CFFT/CIFFT structure
+ @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
+ @return none
+ */
+
+void arm_cfft_radix2_q15(
+ const arm_cfft_radix2_instance_q15 * S,
+ q15_t * pSrc)
+{
+
+ if (S->ifftFlag == 1U)
+ {
+ arm_radix2_butterfly_inverse_q15 (pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
+ }
+ else
+ {
+ arm_radix2_butterfly_q15 (pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
+ }
+
+ arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
+}
+
+/**
+ @} end of ComplexFFT group
+ */
+
+void arm_radix2_butterfly_q15(
+ q15_t * pSrc,
+ uint32_t fftLen,
+ const q15_t * pCoef,
+ uint16_t twidCoefModifier)
+{
+#if defined (ARM_MATH_DSP)
+
+ uint32_t i, j, k, l;
+ uint32_t n1, n2, ia;
+ q15_t in;
+ q31_t T, S, R;
+ q31_t coeff, out1, out2;
+
+ //N = fftLen;
+ n2 = fftLen;
+
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ // loop for groups
+ for (i = 0; i < n2; i++)
+ {
+ coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
+
+ ia = ia + twidCoefModifier;
+
+ l = i + n2;
+
+ T = read_q15x2 (pSrc + (2 * i));
+ in = ((int16_t) (T & 0xFFFF)) >> 1;
+ T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
+
+ S = read_q15x2 (pSrc + (2 * l));
+ in = ((int16_t) (S & 0xFFFF)) >> 1;
+ S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
+
+ R = __QSUB16(T, S);
+
+ write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ out1 = __SMUAD(coeff, R) >> 16;
+ out2 = __SMUSDX(coeff, R);
+#else
+ out1 = __SMUSDX(R, coeff) >> 16U;
+ out2 = __SMUAD(coeff, R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+
+ coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
+
+ ia = ia + twidCoefModifier;
+
+ /* loop for butterfly */
+ i++;
+ l++;
+
+ T = read_q15x2 (pSrc + (2 * i));
+ in = ((int16_t) (T & 0xFFFF)) >> 1;
+ T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
+
+ S = read_q15x2 (pSrc + (2 * l));
+ in = ((int16_t) (S & 0xFFFF)) >> 1;
+ S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
+
+ R = __QSUB16(T, S);
+
+ write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ out1 = __SMUAD(coeff, R) >> 16;
+ out2 = __SMUSDX(coeff, R);
+#else
+
+ out1 = __SMUSDX(R, coeff) >> 16U;
+ out2 = __SMUAD(coeff, R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+
+ } /* groups loop end */
+
+ twidCoefModifier = twidCoefModifier << 1U;
+
+ /* loop for stage */
+ for (k = fftLen / 2; k > 2; k = k >> 1)
+ {
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ /* loop for groups */
+ for (j = 0; j < n2; j++)
+ {
+ coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
+
+ ia = ia + twidCoefModifier;
+
+ /* loop for butterfly */
+ for (i = j; i < fftLen; i += n1)
+ {
+ l = i + n2;
+
+ T = read_q15x2 (pSrc + (2 * i));
+
+ S = read_q15x2 (pSrc + (2 * l));
+
+ R = __QSUB16(T, S);
+
+ write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ out1 = __SMUAD(coeff, R) >> 16;
+ out2 = __SMUSDX(coeff, R);
+#else
+ out1 = __SMUSDX(R, coeff) >> 16U;
+ out2 = __SMUAD(coeff, R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+
+ i += n1;
+
+ l = i + n2;
+
+ T = read_q15x2 (pSrc + (2 * i));
+
+ S = read_q15x2 (pSrc + (2 * l));
+
+ R = __QSUB16(T, S);
+
+ write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ out1 = __SMUAD(coeff, R) >> 16;
+ out2 = __SMUSDX(coeff, R);
+#else
+ out1 = __SMUSDX(R, coeff) >> 16U;
+ out2 = __SMUAD(coeff, R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+
+ } /* butterfly loop end */
+
+ } /* groups loop end */
+
+ twidCoefModifier = twidCoefModifier << 1U;
+ } /* stages loop end */
+
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
+
+ ia = ia + twidCoefModifier;
+
+ /* loop for butterfly */
+ for (i = 0; i < fftLen; i += n1)
+ {
+ l = i + n2;
+
+ T = read_q15x2 (pSrc + (2 * i));
+
+ S = read_q15x2 (pSrc + (2 * l));
+
+ R = __QSUB16(T, S);
+
+ write_q15x2 (pSrc + (2 * i), __QADD16(T, S));
+
+ write_q15x2 (pSrc + (2 * l), R);
+
+ i += n1;
+ l = i + n2;
+
+ T = read_q15x2 (pSrc + (2 * i));
+
+ S = read_q15x2 (pSrc + (2 * l));
+
+ R = __QSUB16(T, S);
+
+ write_q15x2 (pSrc + (2 * i), __QADD16(T, S));
+
+ write_q15x2 (pSrc + (2 * l), R);
+
+ } /* groups loop end */
+
+
+#else /* #if defined (ARM_MATH_DSP) */
+
+ uint32_t i, j, k, l;
+ uint32_t n1, n2, ia;
+ q15_t xt, yt, cosVal, sinVal;
+
+
+ // N = fftLen;
+ n2 = fftLen;
+
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ /* loop for groups */
+ for (j = 0; j < n2; j++)
+ {
+ cosVal = pCoef[(ia * 2)];
+ sinVal = pCoef[(ia * 2) + 1];
+ ia = ia + twidCoefModifier;
+
+ /* loop for butterfly */
+ for (i = j; i < fftLen; i += n1)
+ {
+ l = i + n2;
+ xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
+ pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
+
+ yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
+ pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) +
+ (pSrc[2 * i + 1] >> 1U) ) >> 1U;
+
+ pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
+ ((int16_t) (((q31_t) yt * sinVal) >> 16)));
+
+ pSrc[2U * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
+ ((int16_t) (((q31_t) xt * sinVal) >> 16)));
+
+ } /* butterfly loop end */
+
+ } /* groups loop end */
+
+ twidCoefModifier = twidCoefModifier << 1U;
+
+ /* loop for stage */
+ for (k = fftLen / 2; k > 2; k = k >> 1)
+ {
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ /* loop for groups */
+ for (j = 0; j < n2; j++)
+ {
+ cosVal = pCoef[ia * 2];
+ sinVal = pCoef[(ia * 2) + 1];
+ ia = ia + twidCoefModifier;
+
+ /* loop for butterfly */
+ for (i = j; i < fftLen; i += n1)
+ {
+ l = i + n2;
+ xt = pSrc[2 * i] - pSrc[2 * l];
+ pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U;
+
+ yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+ pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;
+
+ pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
+ ((int16_t) (((q31_t) yt * sinVal) >> 16)));
+
+ pSrc[2U * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
+ ((int16_t) (((q31_t) xt * sinVal) >> 16)));
+
+ } /* butterfly loop end */
+
+ } /* groups loop end */
+
+ twidCoefModifier = twidCoefModifier << 1U;
+ } /* stages loop end */
+
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ /* loop for groups */
+ for (j = 0; j < n2; j++)
+ {
+ cosVal = pCoef[ia * 2];
+ sinVal = pCoef[(ia * 2) + 1];
+
+ ia = ia + twidCoefModifier;
+
+ /* loop for butterfly */
+ for (i = j; i < fftLen; i += n1)
+ {
+ l = i + n2;
+ xt = pSrc[2 * i] - pSrc[2 * l];
+ pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
+
+ yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+ pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
+
+ pSrc[2 * l] = xt;
+
+ pSrc[2 * l + 1] = yt;
+
+ } /* butterfly loop end */
+
+ } /* groups loop end */
+
+ twidCoefModifier = twidCoefModifier << 1U;
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+}
+
+
+void arm_radix2_butterfly_inverse_q15(
+ q15_t * pSrc,
+ uint32_t fftLen,
+ const q15_t * pCoef,
+ uint16_t twidCoefModifier)
+{
+#if defined (ARM_MATH_DSP)
+
+ uint32_t i, j, k, l;
+ uint32_t n1, n2, ia;
+ q15_t in;
+ q31_t T, S, R;
+ q31_t coeff, out1, out2;
+
+ // N = fftLen;
+ n2 = fftLen;
+
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ /* loop for groups */
+ for (i = 0; i < n2; i++)
+ {
+ coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
+
+ ia = ia + twidCoefModifier;
+
+ l = i + n2;
+
+ T = read_q15x2 (pSrc + (2 * i));
+ in = ((int16_t) (T & 0xFFFF)) >> 1;
+ T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
+
+ S = read_q15x2 (pSrc + (2 * l));
+ in = ((int16_t) (S & 0xFFFF)) >> 1;
+ S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
+
+ R = __QSUB16(T, S);
+
+ write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ out1 = __SMUSD(coeff, R) >> 16;
+ out2 = __SMUADX(coeff, R);
+#else
+ out1 = __SMUADX(R, coeff) >> 16U;
+ out2 = __SMUSD(__QSUB(0, coeff), R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+
+ coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
+
+ ia = ia + twidCoefModifier;
+
+ /* loop for butterfly */
+ i++;
+ l++;
+
+ T = read_q15x2 (pSrc + (2 * i));
+ in = ((int16_t) (T & 0xFFFF)) >> 1;
+ T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
+
+ S = read_q15x2 (pSrc + (2 * l));
+ in = ((int16_t) (S & 0xFFFF)) >> 1;
+ S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
+
+ R = __QSUB16(T, S);
+
+ write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ out1 = __SMUSD(coeff, R) >> 16;
+ out2 = __SMUADX(coeff, R);
+#else
+ out1 = __SMUADX(R, coeff) >> 16U;
+ out2 = __SMUSD(__QSUB(0, coeff), R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+
+ } /* groups loop end */
+
+ twidCoefModifier = twidCoefModifier << 1U;
+
+ /* loop for stage */
+ for (k = fftLen / 2; k > 2; k = k >> 1)
+ {
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ /* loop for groups */
+ for (j = 0; j < n2; j++)
+ {
+ coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
+
+ ia = ia + twidCoefModifier;
+
+ /* loop for butterfly */
+ for (i = j; i < fftLen; i += n1)
+ {
+ l = i + n2;
+
+ T = read_q15x2 (pSrc + (2 * i));
+
+ S = read_q15x2 (pSrc + (2 * l));
+
+ R = __QSUB16(T, S);
+
+ write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ out1 = __SMUSD(coeff, R) >> 16;
+ out2 = __SMUADX(coeff, R);
+#else
+ out1 = __SMUADX(R, coeff) >> 16U;
+ out2 = __SMUSD(__QSUB(0, coeff), R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+
+ i += n1;
+
+ l = i + n2;
+
+ T = read_q15x2 (pSrc + (2 * i));
+
+ S = read_q15x2 (pSrc + (2 * l));
+
+ R = __QSUB16(T, S);
+
+ write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ out1 = __SMUSD(coeff, R) >> 16;
+ out2 = __SMUADX(coeff, R);
+#else
+ out1 = __SMUADX(R, coeff) >> 16U;
+ out2 = __SMUSD(__QSUB(0, coeff), R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+
+ } /* butterfly loop end */
+
+ } /* groups loop end */
+
+ twidCoefModifier = twidCoefModifier << 1U;
+ } /* stages loop end */
+
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ /* loop for groups */
+ for (j = 0; j < n2; j++)
+ {
+ coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
+
+ ia = ia + twidCoefModifier;
+
+ /* loop for butterfly */
+ for (i = j; i < fftLen; i += n1)
+ {
+ l = i + n2;
+
+ T = read_q15x2 (pSrc + (2 * i));
+
+ S = read_q15x2 (pSrc + (2 * l));
+
+ R = __QSUB16(T, S);
+
+ write_q15x2 (pSrc + (2 * i), __QADD16(T, S));
+
+ write_q15x2 (pSrc + (2 * l), R);
+
+ } /* butterfly loop end */
+
+ } /* groups loop end */
+
+ twidCoefModifier = twidCoefModifier << 1U;
+
+#else /* #if defined (ARM_MATH_DSP) */
+
+ uint32_t i, j, k, l;
+ uint32_t n1, n2, ia;
+ q15_t xt, yt, cosVal, sinVal;
+
+ // N = fftLen;
+ n2 = fftLen;
+
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ /* loop for groups */
+ for (j = 0; j < n2; j++)
+ {
+ cosVal = pCoef[(ia * 2)];
+ sinVal = pCoef[(ia * 2) + 1];
+ ia = ia + twidCoefModifier;
+
+ /* loop for butterfly */
+ for (i = j; i < fftLen; i += n1)
+ {
+ l = i + n2;
+ xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
+ pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
+
+ yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
+ pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) +
+ (pSrc[2 * i + 1] >> 1U) ) >> 1U;
+
+ pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
+ ((int16_t) (((q31_t) yt * sinVal) >> 16)));
+
+ pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
+ ((int16_t) (((q31_t) xt * sinVal) >> 16)));
+
+ } /* butterfly loop end */
+
+ } /* groups loop end */
+
+ twidCoefModifier = twidCoefModifier << 1U;
+
+ /* loop for stage */
+ for (k = fftLen / 2; k > 2; k = k >> 1)
+ {
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ /* loop for groups */
+ for (j = 0; j < n2; j++)
+ {
+ cosVal = pCoef[(ia * 2)];
+ sinVal = pCoef[(ia * 2) + 1];
+ ia = ia + twidCoefModifier;
+
+ /* loop for butterfly */
+ for (i = j; i < fftLen; i += n1)
+ {
+ l = i + n2;
+ xt = pSrc[2 * i] - pSrc[2 * l];
+ pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U;
+
+ yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+ pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;
+
+ pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
+ ((int16_t) (((q31_t) yt * sinVal) >> 16)) );
+
+ pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
+ ((int16_t) (((q31_t) xt * sinVal) >> 16)) );
+
+ } /* butterfly loop end */
+
+ } /* groups loop end */
+
+ twidCoefModifier = twidCoefModifier << 1U;
+ } /* stages loop end */
+
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ cosVal = pCoef[(ia * 2)];
+ sinVal = pCoef[(ia * 2) + 1];
+
+ ia = ia + twidCoefModifier;
+
+ /* loop for butterfly */
+ for (i = 0; i < fftLen; i += n1)
+ {
+ l = i + n2;
+ xt = pSrc[2 * i] - pSrc[2 * l];
+ pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
+
+ yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+ pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
+
+ pSrc[2 * l] = xt;
+
+ pSrc[2 * l + 1] = yt;
+
+ } /* groups loop end */
+
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+}
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_q31.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_q31.c index 996e91d..27f1408 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_q31.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix2_q31.c @@ -1,337 +1,337 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_cfft_radix2_q31.c - * Description: Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" - -void arm_radix2_butterfly_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pCoef, - uint16_t twidCoefModifier); - -void arm_radix2_butterfly_inverse_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pCoef, - uint16_t twidCoefModifier); - -void arm_bitreversal_q31( - q31_t * pSrc, - uint32_t fftLen, - uint16_t bitRevFactor, - const uint16_t * pBitRevTab); - -/** - @ingroup groupTransforms - */ - -/** - @addtogroup ComplexFFT - @{ - */ - -/** - @brief Processing function for the fixed-point CFFT/CIFFT. - @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future. - @param[in] S points to an instance of the fixed-point CFFT/CIFFT structure - @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place - @return none - */ - -void arm_cfft_radix2_q31( - const arm_cfft_radix2_instance_q31 * S, - q31_t * pSrc) -{ - - if (S->ifftFlag == 1U) - { - arm_radix2_butterfly_inverse_q31(pSrc, S->fftLen, - S->pTwiddle, S->twidCoefModifier); - } - else - { - arm_radix2_butterfly_q31(pSrc, S->fftLen, - S->pTwiddle, S->twidCoefModifier); - } - - arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); -} - -/** - @} end of ComplexFFT group - */ - -void arm_radix2_butterfly_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pCoef, - uint16_t twidCoefModifier) -{ - - unsigned i, j, k, l, m; - unsigned n1, n2, ia; - q31_t xt, yt, cosVal, sinVal; - q31_t p0, p1; - - //N = fftLen; - n2 = fftLen; - - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - // loop for groups - for (i = 0; i < n2; i++) - { - cosVal = pCoef[ia * 2]; - sinVal = pCoef[(ia * 2) + 1]; - ia = ia + twidCoefModifier; - - l = i + n2; - xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U); - pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U; - - yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U); - pSrc[2 * i + 1] = - ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U; - - mult_32x32_keep32_R(p0, xt, cosVal); - mult_32x32_keep32_R(p1, yt, cosVal); - multAcc_32x32_keep32_R(p0, yt, sinVal); - multSub_32x32_keep32_R(p1, xt, sinVal); - - pSrc[2U * l] = p0; - pSrc[2U * l + 1U] = p1; - - } // groups loop end - - twidCoefModifier <<= 1U; - - // loop for stage - for (k = fftLen / 2; k > 2; k = k >> 1) - { - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - // loop for groups - for (j = 0; j < n2; j++) - { - cosVal = pCoef[ia * 2]; - sinVal = pCoef[(ia * 2) + 1]; - ia = ia + twidCoefModifier; - - // loop for butterfly - i = j; - m = fftLen / n1; - do - { - l = i + n2; - xt = pSrc[2 * i] - pSrc[2 * l]; - pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U; - - yt = pSrc[2 * i + 1] - pSrc[2 * l + 1]; - pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U; - - mult_32x32_keep32_R(p0, xt, cosVal); - mult_32x32_keep32_R(p1, yt, cosVal); - multAcc_32x32_keep32_R(p0, yt, sinVal); - multSub_32x32_keep32_R(p1, xt, sinVal); - - pSrc[2U * l] = p0; - pSrc[2U * l + 1U] = p1; - i += n1; - m--; - } while ( m > 0); // butterfly loop end - - } // groups loop end - - twidCoefModifier <<= 1U; - } // stages loop end - - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - cosVal = pCoef[ia * 2]; - sinVal = pCoef[(ia * 2) + 1]; - ia = ia + twidCoefModifier; - - // loop for butterfly - for (i = 0; i < fftLen; i += n1) - { - l = i + n2; - xt = pSrc[2 * i] - pSrc[2 * l]; - pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]); - - yt = pSrc[2 * i + 1] - pSrc[2 * l + 1]; - pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]); - - pSrc[2U * l] = xt; - - pSrc[2U * l + 1U] = yt; - - i += n1; - l = i + n2; - - xt = pSrc[2 * i] - pSrc[2 * l]; - pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]); - - yt = pSrc[2 * i + 1] - pSrc[2 * l + 1]; - pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]); - - pSrc[2U * l] = xt; - - pSrc[2U * l + 1U] = yt; - - } // butterfly loop end - -} - - -void arm_radix2_butterfly_inverse_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pCoef, - uint16_t twidCoefModifier) -{ - - unsigned i, j, k, l; - unsigned n1, n2, ia; - q31_t xt, yt, cosVal, sinVal; - q31_t p0, p1; - - //N = fftLen; - n2 = fftLen; - - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - // loop for groups - for (i = 0; i < n2; i++) - { - cosVal = pCoef[ia * 2]; - sinVal = pCoef[(ia * 2) + 1]; - ia = ia + twidCoefModifier; - - l = i + n2; - xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U); - pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U; - - yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U); - pSrc[2 * i + 1] = - ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U; - - mult_32x32_keep32_R(p0, xt, cosVal); - mult_32x32_keep32_R(p1, yt, cosVal); - multSub_32x32_keep32_R(p0, yt, sinVal); - multAcc_32x32_keep32_R(p1, xt, sinVal); - - pSrc[2U * l] = p0; - pSrc[2U * l + 1U] = p1; - } // groups loop end - - twidCoefModifier = twidCoefModifier << 1U; - - // loop for stage - for (k = fftLen / 2; k > 2; k = k >> 1) - { - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - // loop for groups - for (j = 0; j < n2; j++) - { - cosVal = pCoef[ia * 2]; - sinVal = pCoef[(ia * 2) + 1]; - ia = ia + twidCoefModifier; - - // loop for butterfly - for (i = j; i < fftLen; i += n1) - { - l = i + n2; - xt = pSrc[2 * i] - pSrc[2 * l]; - pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U; - - yt = pSrc[2 * i + 1] - pSrc[2 * l + 1]; - pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U; - - mult_32x32_keep32_R(p0, xt, cosVal); - mult_32x32_keep32_R(p1, yt, cosVal); - multSub_32x32_keep32_R(p0, yt, sinVal); - multAcc_32x32_keep32_R(p1, xt, sinVal); - - pSrc[2U * l] = p0; - pSrc[2U * l + 1U] = p1; - } // butterfly loop end - - } // groups loop end - - twidCoefModifier = twidCoefModifier << 1U; - } // stages loop end - - n1 = n2; - n2 = n2 >> 1; - ia = 0; - - cosVal = pCoef[ia * 2]; - sinVal = pCoef[(ia * 2) + 1]; - ia = ia + twidCoefModifier; - - // loop for butterfly - for (i = 0; i < fftLen; i += n1) - { - l = i + n2; - xt = pSrc[2 * i] - pSrc[2 * l]; - pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]); - - yt = pSrc[2 * i + 1] - pSrc[2 * l + 1]; - pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]); - - pSrc[2U * l] = xt; - - pSrc[2U * l + 1U] = yt; - - i += n1; - l = i + n2; - - xt = pSrc[2 * i] - pSrc[2 * l]; - pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]); - - yt = pSrc[2 * i + 1] - pSrc[2 * l + 1]; - pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]); - - pSrc[2U * l] = xt; - - pSrc[2U * l + 1U] = yt; - - } // butterfly loop end - -} +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cfft_radix2_q31.c
+ * Description: Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+void arm_radix2_butterfly_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pCoef,
+ uint16_t twidCoefModifier);
+
+void arm_radix2_butterfly_inverse_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pCoef,
+ uint16_t twidCoefModifier);
+
+void arm_bitreversal_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ uint16_t bitRevFactor,
+ const uint16_t * pBitRevTab);
+
+/**
+ @ingroup groupTransforms
+ */
+
+/**
+ @addtogroup ComplexFFT
+ @{
+ */
+
+/**
+ @brief Processing function for the fixed-point CFFT/CIFFT.
+ @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
+ @param[in] S points to an instance of the fixed-point CFFT/CIFFT structure
+ @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
+ @return none
+ */
+
+void arm_cfft_radix2_q31(
+ const arm_cfft_radix2_instance_q31 * S,
+ q31_t * pSrc)
+{
+
+ if (S->ifftFlag == 1U)
+ {
+ arm_radix2_butterfly_inverse_q31(pSrc, S->fftLen,
+ S->pTwiddle, S->twidCoefModifier);
+ }
+ else
+ {
+ arm_radix2_butterfly_q31(pSrc, S->fftLen,
+ S->pTwiddle, S->twidCoefModifier);
+ }
+
+ arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
+}
+
+/**
+ @} end of ComplexFFT group
+ */
+
+void arm_radix2_butterfly_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pCoef,
+ uint16_t twidCoefModifier)
+{
+
+ unsigned i, j, k, l, m;
+ unsigned n1, n2, ia;
+ q31_t xt, yt, cosVal, sinVal;
+ q31_t p0, p1;
+
+ //N = fftLen;
+ n2 = fftLen;
+
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ // loop for groups
+ for (i = 0; i < n2; i++)
+ {
+ cosVal = pCoef[ia * 2];
+ sinVal = pCoef[(ia * 2) + 1];
+ ia = ia + twidCoefModifier;
+
+ l = i + n2;
+ xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
+ pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
+
+ yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
+ pSrc[2 * i + 1] =
+ ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
+
+ mult_32x32_keep32_R(p0, xt, cosVal);
+ mult_32x32_keep32_R(p1, yt, cosVal);
+ multAcc_32x32_keep32_R(p0, yt, sinVal);
+ multSub_32x32_keep32_R(p1, xt, sinVal);
+
+ pSrc[2U * l] = p0;
+ pSrc[2U * l + 1U] = p1;
+
+ } // groups loop end
+
+ twidCoefModifier <<= 1U;
+
+ // loop for stage
+ for (k = fftLen / 2; k > 2; k = k >> 1)
+ {
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ // loop for groups
+ for (j = 0; j < n2; j++)
+ {
+ cosVal = pCoef[ia * 2];
+ sinVal = pCoef[(ia * 2) + 1];
+ ia = ia + twidCoefModifier;
+
+ // loop for butterfly
+ i = j;
+ m = fftLen / n1;
+ do
+ {
+ l = i + n2;
+ xt = pSrc[2 * i] - pSrc[2 * l];
+ pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U;
+
+ yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+ pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;
+
+ mult_32x32_keep32_R(p0, xt, cosVal);
+ mult_32x32_keep32_R(p1, yt, cosVal);
+ multAcc_32x32_keep32_R(p0, yt, sinVal);
+ multSub_32x32_keep32_R(p1, xt, sinVal);
+
+ pSrc[2U * l] = p0;
+ pSrc[2U * l + 1U] = p1;
+ i += n1;
+ m--;
+ } while ( m > 0); // butterfly loop end
+
+ } // groups loop end
+
+ twidCoefModifier <<= 1U;
+ } // stages loop end
+
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ cosVal = pCoef[ia * 2];
+ sinVal = pCoef[(ia * 2) + 1];
+ ia = ia + twidCoefModifier;
+
+ // loop for butterfly
+ for (i = 0; i < fftLen; i += n1)
+ {
+ l = i + n2;
+ xt = pSrc[2 * i] - pSrc[2 * l];
+ pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
+
+ yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+ pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
+
+ pSrc[2U * l] = xt;
+
+ pSrc[2U * l + 1U] = yt;
+
+ i += n1;
+ l = i + n2;
+
+ xt = pSrc[2 * i] - pSrc[2 * l];
+ pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
+
+ yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+ pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
+
+ pSrc[2U * l] = xt;
+
+ pSrc[2U * l + 1U] = yt;
+
+ } // butterfly loop end
+
+}
+
+
+void arm_radix2_butterfly_inverse_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pCoef,
+ uint16_t twidCoefModifier)
+{
+
+ unsigned i, j, k, l;
+ unsigned n1, n2, ia;
+ q31_t xt, yt, cosVal, sinVal;
+ q31_t p0, p1;
+
+ //N = fftLen;
+ n2 = fftLen;
+
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ // loop for groups
+ for (i = 0; i < n2; i++)
+ {
+ cosVal = pCoef[ia * 2];
+ sinVal = pCoef[(ia * 2) + 1];
+ ia = ia + twidCoefModifier;
+
+ l = i + n2;
+ xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
+ pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
+
+ yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
+ pSrc[2 * i + 1] =
+ ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
+
+ mult_32x32_keep32_R(p0, xt, cosVal);
+ mult_32x32_keep32_R(p1, yt, cosVal);
+ multSub_32x32_keep32_R(p0, yt, sinVal);
+ multAcc_32x32_keep32_R(p1, xt, sinVal);
+
+ pSrc[2U * l] = p0;
+ pSrc[2U * l + 1U] = p1;
+ } // groups loop end
+
+ twidCoefModifier = twidCoefModifier << 1U;
+
+ // loop for stage
+ for (k = fftLen / 2; k > 2; k = k >> 1)
+ {
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ // loop for groups
+ for (j = 0; j < n2; j++)
+ {
+ cosVal = pCoef[ia * 2];
+ sinVal = pCoef[(ia * 2) + 1];
+ ia = ia + twidCoefModifier;
+
+ // loop for butterfly
+ for (i = j; i < fftLen; i += n1)
+ {
+ l = i + n2;
+ xt = pSrc[2 * i] - pSrc[2 * l];
+ pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U;
+
+ yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+ pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;
+
+ mult_32x32_keep32_R(p0, xt, cosVal);
+ mult_32x32_keep32_R(p1, yt, cosVal);
+ multSub_32x32_keep32_R(p0, yt, sinVal);
+ multAcc_32x32_keep32_R(p1, xt, sinVal);
+
+ pSrc[2U * l] = p0;
+ pSrc[2U * l + 1U] = p1;
+ } // butterfly loop end
+
+ } // groups loop end
+
+ twidCoefModifier = twidCoefModifier << 1U;
+ } // stages loop end
+
+ n1 = n2;
+ n2 = n2 >> 1;
+ ia = 0;
+
+ cosVal = pCoef[ia * 2];
+ sinVal = pCoef[(ia * 2) + 1];
+ ia = ia + twidCoefModifier;
+
+ // loop for butterfly
+ for (i = 0; i < fftLen; i += n1)
+ {
+ l = i + n2;
+ xt = pSrc[2 * i] - pSrc[2 * l];
+ pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
+
+ yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+ pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
+
+ pSrc[2U * l] = xt;
+
+ pSrc[2U * l + 1U] = yt;
+
+ i += n1;
+ l = i + n2;
+
+ xt = pSrc[2 * i] - pSrc[2 * l];
+ pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
+
+ yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+ pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
+
+ pSrc[2U * l] = xt;
+
+ pSrc[2U * l + 1U] = yt;
+
+ } // butterfly loop end
+
+}
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f32.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f32.c index 9d9d4d5..675a303 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f32.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f32.c @@ -1,1203 +1,1200 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_cfft_radix4_f32.c - * Description: Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" - -extern void arm_bitreversal_f32( - float32_t * pSrc, - uint16_t fftSize, - uint16_t bitRevFactor, - const uint16_t * pBitRevTab); - -void arm_radix4_butterfly_f32( - float32_t * pSrc, - uint16_t fftLen, - const float32_t * pCoef, - uint16_t twidCoefModifier); - -void arm_radix4_butterfly_inverse_f32( - float32_t * pSrc, - uint16_t fftLen, - const float32_t * pCoef, - uint16_t twidCoefModifier, - float32_t onebyfftLen); - - - - -/** - @ingroup groupTransforms - */ - -/** - @addtogroup ComplexFFT - @{ - */ - - -/** - @brief Processing function for the floating-point Radix-4 CFFT/CIFFT. - @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future. - @param[in] S points to an instance of the floating-point Radix-4 CFFT/CIFFT structure - @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place - @return none - */ - -void arm_cfft_radix4_f32( - const arm_cfft_radix4_instance_f32 * S, - float32_t * pSrc) -{ - if (S->ifftFlag == 1U) - { - /* Complex IFFT radix-4 */ - arm_radix4_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen); - } - else - { - /* Complex FFT radix-4 */ - arm_radix4_butterfly_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier); - } - - if (S->bitReverseFlag == 1U) - { - /* Bit Reversal */ - arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); - } - -} - -/** - @} end of ComplexFFT group - */ - -/* ---------------------------------------------------------------------- - * Internal helper function used by the FFTs - * ---------------------------------------------------------------------- */ - -/** - brief Core function for the floating-point CFFT butterfly process. - param[in,out] pSrc points to the in-place buffer of floating-point data type - param[in] fftLen length of the FFT - param[in] pCoef points to the twiddle coefficient buffer - param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table - return none - */ - -void arm_radix4_butterfly_f32( - float32_t * pSrc, - uint16_t fftLen, - const float32_t * pCoef, - uint16_t twidCoefModifier) -{ - float32_t co1, co2, co3, si1, si2, si3; - uint32_t ia1, ia2, ia3; - uint32_t i0, i1, i2, i3; - uint32_t n1, n2, j, k; - -#if defined (ARM_MATH_LOOPUNROLL) - - float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn; - float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc, - Ybminusd; - float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out; - float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out; - float32_t *ptr1; - float32_t p0,p1,p2,p3,p4,p5; - float32_t a0,a1,a2,a3,a4,a5,a6,a7; - - /* Initializations for the first stage */ - n2 = fftLen; - n1 = n2; - - /* n2 = fftLen/4 */ - n2 >>= 2U; - i0 = 0U; - ia1 = 0U; - - j = n2; - - /* Calculation of first stage */ - do - { - /* index calculation for the input as, */ - /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ - i1 = i0 + n2; - i2 = i1 + n2; - i3 = i2 + n2; - - xaIn = pSrc[(2U * i0)]; - yaIn = pSrc[(2U * i0) + 1U]; - - xbIn = pSrc[(2U * i1)]; - ybIn = pSrc[(2U * i1) + 1U]; - - xcIn = pSrc[(2U * i2)]; - ycIn = pSrc[(2U * i2) + 1U]; - - xdIn = pSrc[(2U * i3)]; - ydIn = pSrc[(2U * i3) + 1U]; - - /* xa + xc */ - Xaplusc = xaIn + xcIn; - /* xb + xd */ - Xbplusd = xbIn + xdIn; - /* ya + yc */ - Yaplusc = yaIn + ycIn; - /* yb + yd */ - Ybplusd = ybIn + ydIn; - - /* index calculation for the coefficients */ - ia2 = ia1 + ia1; - co2 = pCoef[ia2 * 2U]; - si2 = pCoef[(ia2 * 2U) + 1U]; - - /* xa - xc */ - Xaminusc = xaIn - xcIn; - /* xb - xd */ - Xbminusd = xbIn - xdIn; - /* ya - yc */ - Yaminusc = yaIn - ycIn; - /* yb - yd */ - Ybminusd = ybIn - ydIn; - - /* xa' = xa + xb + xc + xd */ - pSrc[(2U * i0)] = Xaplusc + Xbplusd; - /* ya' = ya + yb + yc + yd */ - pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd; - - /* (xa - xc) + (yb - yd) */ - Xb12C_out = (Xaminusc + Ybminusd); - /* (ya - yc) + (xb - xd) */ - Yb12C_out = (Yaminusc - Xbminusd); - /* (xa + xc) - (xb + xd) */ - Xc12C_out = (Xaplusc - Xbplusd); - /* (ya + yc) - (yb + yd) */ - Yc12C_out = (Yaplusc - Ybplusd); - /* (xa - xc) - (yb - yd) */ - Xd12C_out = (Xaminusc - Ybminusd); - /* (ya - yc) + (xb - xd) */ - Yd12C_out = (Xbminusd + Yaminusc); - - co1 = pCoef[ia1 * 2U]; - si1 = pCoef[(ia1 * 2U) + 1U]; - - /* index calculation for the coefficients */ - ia3 = ia2 + ia1; - co3 = pCoef[ia3 * 2U]; - si3 = pCoef[(ia3 * 2U) + 1U]; - - Xb12_out = Xb12C_out * co1; - Yb12_out = Yb12C_out * co1; - Xc12_out = Xc12C_out * co2; - Yc12_out = Yc12C_out * co2; - Xd12_out = Xd12C_out * co3; - Yd12_out = Yd12C_out * co3; - - /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ - //Xb12_out -= Yb12C_out * si1; - p0 = Yb12C_out * si1; - /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ - //Yb12_out += Xb12C_out * si1; - p1 = Xb12C_out * si1; - /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ - //Xc12_out -= Yc12C_out * si2; - p2 = Yc12C_out * si2; - /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ - //Yc12_out += Xc12C_out * si2; - p3 = Xc12C_out * si2; - /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ - //Xd12_out -= Yd12C_out * si3; - p4 = Yd12C_out * si3; - /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ - //Yd12_out += Xd12C_out * si3; - p5 = Xd12C_out * si3; - - Xb12_out += p0; - Yb12_out -= p1; - Xc12_out += p2; - Yc12_out -= p3; - Xd12_out += p4; - Yd12_out -= p5; - - /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ - pSrc[2U * i1] = Xc12_out; - - /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ - pSrc[(2U * i1) + 1U] = Yc12_out; - - /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ - pSrc[2U * i2] = Xb12_out; - - /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ - pSrc[(2U * i2) + 1U] = Yb12_out; - - /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ - pSrc[2U * i3] = Xd12_out; - - /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ - pSrc[(2U * i3) + 1U] = Yd12_out; - - /* Twiddle coefficients index modifier */ - ia1 += twidCoefModifier; - - /* Updating input index */ - i0++; - - } - while (--j); - - twidCoefModifier <<= 2U; - - /* Calculation of second stage to excluding last stage */ - for (k = fftLen >> 2U; k > 4U; k >>= 2U) - { - /* Initializations for the first stage */ - n1 = n2; - n2 >>= 2U; - ia1 = 0U; - - /* Calculation of first stage */ - j = 0; - do - { - /* index calculation for the coefficients */ - ia2 = ia1 + ia1; - ia3 = ia2 + ia1; - co1 = pCoef[(ia1 * 2U)]; - si1 = pCoef[(ia1 * 2U) + 1U]; - co2 = pCoef[(ia2 * 2U)]; - si2 = pCoef[(ia2 * 2U) + 1U]; - co3 = pCoef[(ia3 * 2U)]; - si3 = pCoef[(ia3 * 2U) + 1U]; - - /* Twiddle coefficients index modifier */ - ia1 += twidCoefModifier; - - i0 = j; - do - { - /* index calculation for the input as, */ - /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ - i1 = i0 + n2; - i2 = i1 + n2; - i3 = i2 + n2; - - xaIn = pSrc[(2U * i0)]; - yaIn = pSrc[(2U * i0) + 1U]; - - xbIn = pSrc[(2U * i1)]; - ybIn = pSrc[(2U * i1) + 1U]; - - xcIn = pSrc[(2U * i2)]; - ycIn = pSrc[(2U * i2) + 1U]; - - xdIn = pSrc[(2U * i3)]; - ydIn = pSrc[(2U * i3) + 1U]; - - /* xa - xc */ - Xaminusc = xaIn - xcIn; - /* (xb - xd) */ - Xbminusd = xbIn - xdIn; - /* ya - yc */ - Yaminusc = yaIn - ycIn; - /* (yb - yd) */ - Ybminusd = ybIn - ydIn; - - /* xa + xc */ - Xaplusc = xaIn + xcIn; - /* xb + xd */ - Xbplusd = xbIn + xdIn; - /* ya + yc */ - Yaplusc = yaIn + ycIn; - /* yb + yd */ - Ybplusd = ybIn + ydIn; - - /* (xa - xc) + (yb - yd) */ - Xb12C_out = (Xaminusc + Ybminusd); - /* (ya - yc) - (xb - xd) */ - Yb12C_out = (Yaminusc - Xbminusd); - /* xa + xc -(xb + xd) */ - Xc12C_out = (Xaplusc - Xbplusd); - /* (ya + yc) - (yb + yd) */ - Yc12C_out = (Yaplusc - Ybplusd); - /* (xa - xc) - (yb - yd) */ - Xd12C_out = (Xaminusc - Ybminusd); - /* (ya - yc) + (xb - xd) */ - Yd12C_out = (Xbminusd + Yaminusc); - - pSrc[(2U * i0)] = Xaplusc + Xbplusd; - pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd; - - Xb12_out = Xb12C_out * co1; - Yb12_out = Yb12C_out * co1; - Xc12_out = Xc12C_out * co2; - Yc12_out = Yc12C_out * co2; - Xd12_out = Xd12C_out * co3; - Yd12_out = Yd12C_out * co3; - - /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ - //Xb12_out -= Yb12C_out * si1; - p0 = Yb12C_out * si1; - /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ - //Yb12_out += Xb12C_out * si1; - p1 = Xb12C_out * si1; - /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ - //Xc12_out -= Yc12C_out * si2; - p2 = Yc12C_out * si2; - /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ - //Yc12_out += Xc12C_out * si2; - p3 = Xc12C_out * si2; - /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ - //Xd12_out -= Yd12C_out * si3; - p4 = Yd12C_out * si3; - /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ - //Yd12_out += Xd12C_out * si3; - p5 = Xd12C_out * si3; - - Xb12_out += p0; - Yb12_out -= p1; - Xc12_out += p2; - Yc12_out -= p3; - Xd12_out += p4; - Yd12_out -= p5; - - /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ - pSrc[2U * i1] = Xc12_out; - - /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ - pSrc[(2U * i1) + 1U] = Yc12_out; - - /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ - pSrc[2U * i2] = Xb12_out; - - /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ - pSrc[(2U * i2) + 1U] = Yb12_out; - - /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ - pSrc[2U * i3] = Xd12_out; - - /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ - pSrc[(2U * i3) + 1U] = Yd12_out; - - i0 += n1; - } while (i0 < fftLen); - j++; - } while (j <= (n2 - 1U)); - twidCoefModifier <<= 2U; - } - - j = fftLen >> 2; - ptr1 = &pSrc[0]; - - /* Calculations of last stage */ - do - { - xaIn = ptr1[0]; - yaIn = ptr1[1]; - xbIn = ptr1[2]; - ybIn = ptr1[3]; - xcIn = ptr1[4]; - ycIn = ptr1[5]; - xdIn = ptr1[6]; - ydIn = ptr1[7]; - - /* xa + xc */ - Xaplusc = xaIn + xcIn; - - /* xa - xc */ - Xaminusc = xaIn - xcIn; - - /* ya + yc */ - Yaplusc = yaIn + ycIn; - - /* ya - yc */ - Yaminusc = yaIn - ycIn; - - /* xb + xd */ - Xbplusd = xbIn + xdIn; - - /* yb + yd */ - Ybplusd = ybIn + ydIn; - - /* (xb-xd) */ - Xbminusd = xbIn - xdIn; - - /* (yb-yd) */ - Ybminusd = ybIn - ydIn; - - /* xa' = xa + xb + xc + xd */ - a0 = (Xaplusc + Xbplusd); - /* ya' = ya + yb + yc + yd */ - a1 = (Yaplusc + Ybplusd); - /* xc' = (xa-xb+xc-xd) */ - a2 = (Xaplusc - Xbplusd); - /* yc' = (ya-yb+yc-yd) */ - a3 = (Yaplusc - Ybplusd); - /* xb' = (xa+yb-xc-yd) */ - a4 = (Xaminusc + Ybminusd); - /* yb' = (ya-xb-yc+xd) */ - a5 = (Yaminusc - Xbminusd); - /* xd' = (xa-yb-xc+yd)) */ - a6 = (Xaminusc - Ybminusd); - /* yd' = (ya+xb-yc-xd) */ - a7 = (Xbminusd + Yaminusc); - - ptr1[0] = a0; - ptr1[1] = a1; - ptr1[2] = a2; - ptr1[3] = a3; - ptr1[4] = a4; - ptr1[5] = a5; - ptr1[6] = a6; - ptr1[7] = a7; - - /* increment pointer by 8 */ - ptr1 += 8U; - } while (--j); - -#else - - float32_t t1, t2, r1, r2, s1, s2; - - /* Initializations for the fft calculation */ - n2 = fftLen; - n1 = n2; - for (k = fftLen; k > 1U; k >>= 2U) - { - /* Initializations for the fft calculation */ - n1 = n2; - n2 >>= 2U; - ia1 = 0U; - - /* FFT Calculation */ - j = 0; - do - { - /* index calculation for the coefficients */ - ia2 = ia1 + ia1; - ia3 = ia2 + ia1; - co1 = pCoef[ia1 * 2U]; - si1 = pCoef[(ia1 * 2U) + 1U]; - co2 = pCoef[ia2 * 2U]; - si2 = pCoef[(ia2 * 2U) + 1U]; - co3 = pCoef[ia3 * 2U]; - si3 = pCoef[(ia3 * 2U) + 1U]; - - /* Twiddle coefficients index modifier */ - ia1 = ia1 + twidCoefModifier; - - i0 = j; - do - { - /* index calculation for the input as, */ - /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ - i1 = i0 + n2; - i2 = i1 + n2; - i3 = i2 + n2; - - /* xa + xc */ - r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)]; - - /* xa - xc */ - r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)]; - - /* ya + yc */ - s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U]; - - /* ya - yc */ - s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U]; - - /* xb + xd */ - t1 = pSrc[2U * i1] + pSrc[2U * i3]; - - /* xa' = xa + xb + xc + xd */ - pSrc[2U * i0] = r1 + t1; - - /* xa + xc -(xb + xd) */ - r1 = r1 - t1; - - /* yb + yd */ - t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U]; - - /* ya' = ya + yb + yc + yd */ - pSrc[(2U * i0) + 1U] = s1 + t2; - - /* (ya + yc) - (yb + yd) */ - s1 = s1 - t2; - - /* (yb - yd) */ - t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U]; - - /* (xb - xd) */ - t2 = pSrc[2U * i1] - pSrc[2U * i3]; - - /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ - pSrc[2U * i1] = (r1 * co2) + (s1 * si2); - - /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ - pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2); - - /* (xa - xc) + (yb - yd) */ - r1 = r2 + t1; - - /* (xa - xc) - (yb - yd) */ - r2 = r2 - t1; - - /* (ya - yc) - (xb - xd) */ - s1 = s2 - t2; - - /* (ya - yc) + (xb - xd) */ - s2 = s2 + t2; - - /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ - pSrc[2U * i2] = (r1 * co1) + (s1 * si1); - - /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ - pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1); - - /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ - pSrc[2U * i3] = (r2 * co3) + (s2 * si3); - - /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ - pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3); - - i0 += n1; - } while ( i0 < fftLen); - j++; - } while (j <= (n2 - 1U)); - twidCoefModifier <<= 2U; - } - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - -} - -/** - brief Core function for the floating-point CIFFT butterfly process. - param[in,out] pSrc points to the in-place buffer of floating-point data type - param[in] fftLen length of the FFT - param[in] pCoef points to twiddle coefficient buffer - param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. - param[in] onebyfftLen value of 1/fftLen - return none - */ - -void arm_radix4_butterfly_inverse_f32( - float32_t * pSrc, - uint16_t fftLen, - const float32_t * pCoef, - uint16_t twidCoefModifier, - float32_t onebyfftLen) -{ - float32_t co1, co2, co3, si1, si2, si3; - uint32_t ia1, ia2, ia3; - uint32_t i0, i1, i2, i3; - uint32_t n1, n2, j, k; - -#if defined (ARM_MATH_LOOPUNROLL) - - float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn; - float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc, - Ybminusd; - float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out; - float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out; - float32_t *ptr1; - float32_t p0,p1,p2,p3,p4,p5,p6,p7; - float32_t a0,a1,a2,a3,a4,a5,a6,a7; - - - /* Initializations for the first stage */ - n2 = fftLen; - n1 = n2; - - /* n2 = fftLen/4 */ - n2 >>= 2U; - i0 = 0U; - ia1 = 0U; - - j = n2; - - /* Calculation of first stage */ - do - { - /* index calculation for the input as, */ - /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ - i1 = i0 + n2; - i2 = i1 + n2; - i3 = i2 + n2; - - /* Butterfly implementation */ - xaIn = pSrc[(2U * i0)]; - yaIn = pSrc[(2U * i0) + 1U]; - - xcIn = pSrc[(2U * i2)]; - ycIn = pSrc[(2U * i2) + 1U]; - - xbIn = pSrc[(2U * i1)]; - ybIn = pSrc[(2U * i1) + 1U]; - - xdIn = pSrc[(2U * i3)]; - ydIn = pSrc[(2U * i3) + 1U]; - - /* xa + xc */ - Xaplusc = xaIn + xcIn; - /* xb + xd */ - Xbplusd = xbIn + xdIn; - /* ya + yc */ - Yaplusc = yaIn + ycIn; - /* yb + yd */ - Ybplusd = ybIn + ydIn; - - /* index calculation for the coefficients */ - ia2 = ia1 + ia1; - co2 = pCoef[ia2 * 2U]; - si2 = pCoef[(ia2 * 2U) + 1U]; - - /* xa - xc */ - Xaminusc = xaIn - xcIn; - /* xb - xd */ - Xbminusd = xbIn - xdIn; - /* ya - yc */ - Yaminusc = yaIn - ycIn; - /* yb - yd */ - Ybminusd = ybIn - ydIn; - - /* xa' = xa + xb + xc + xd */ - pSrc[(2U * i0)] = Xaplusc + Xbplusd; - - /* ya' = ya + yb + yc + yd */ - pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd; - - /* (xa - xc) - (yb - yd) */ - Xb12C_out = (Xaminusc - Ybminusd); - /* (ya - yc) + (xb - xd) */ - Yb12C_out = (Yaminusc + Xbminusd); - /* (xa + xc) - (xb + xd) */ - Xc12C_out = (Xaplusc - Xbplusd); - /* (ya + yc) - (yb + yd) */ - Yc12C_out = (Yaplusc - Ybplusd); - /* (xa - xc) + (yb - yd) */ - Xd12C_out = (Xaminusc + Ybminusd); - /* (ya - yc) - (xb - xd) */ - Yd12C_out = (Yaminusc - Xbminusd); - - co1 = pCoef[ia1 * 2U]; - si1 = pCoef[(ia1 * 2U) + 1U]; - - /* index calculation for the coefficients */ - ia3 = ia2 + ia1; - co3 = pCoef[ia3 * 2U]; - si3 = pCoef[(ia3 * 2U) + 1U]; - - Xb12_out = Xb12C_out * co1; - Yb12_out = Yb12C_out * co1; - Xc12_out = Xc12C_out * co2; - Yc12_out = Yc12C_out * co2; - Xd12_out = Xd12C_out * co3; - Yd12_out = Yd12C_out * co3; - - /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ - //Xb12_out -= Yb12C_out * si1; - p0 = Yb12C_out * si1; - /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ - //Yb12_out += Xb12C_out * si1; - p1 = Xb12C_out * si1; - /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ - //Xc12_out -= Yc12C_out * si2; - p2 = Yc12C_out * si2; - /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ - //Yc12_out += Xc12C_out * si2; - p3 = Xc12C_out * si2; - /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ - //Xd12_out -= Yd12C_out * si3; - p4 = Yd12C_out * si3; - /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ - //Yd12_out += Xd12C_out * si3; - p5 = Xd12C_out * si3; - - Xb12_out -= p0; - Yb12_out += p1; - Xc12_out -= p2; - Yc12_out += p3; - Xd12_out -= p4; - Yd12_out += p5; - - /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ - pSrc[2U * i1] = Xc12_out; - - /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ - pSrc[(2U * i1) + 1U] = Yc12_out; - - /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ - pSrc[2U * i2] = Xb12_out; - - /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ - pSrc[(2U * i2) + 1U] = Yb12_out; - - /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ - pSrc[2U * i3] = Xd12_out; - - /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ - pSrc[(2U * i3) + 1U] = Yd12_out; - - /* Twiddle coefficients index modifier */ - ia1 = ia1 + twidCoefModifier; - - /* Updating input index */ - i0 = i0 + 1U; - - } while (--j); - - twidCoefModifier <<= 2U; - - /* Calculation of second stage to excluding last stage */ - for (k = fftLen >> 2U; k > 4U; k >>= 2U) - { - /* Initializations for the first stage */ - n1 = n2; - n2 >>= 2U; - ia1 = 0U; - - /* Calculation of first stage */ - j = 0; - do - { - /* index calculation for the coefficients */ - ia2 = ia1 + ia1; - ia3 = ia2 + ia1; - co1 = pCoef[ia1 * 2U]; - si1 = pCoef[(ia1 * 2U) + 1U]; - co2 = pCoef[ia2 * 2U]; - si2 = pCoef[(ia2 * 2U) + 1U]; - co3 = pCoef[ia3 * 2U]; - si3 = pCoef[(ia3 * 2U) + 1U]; - - /* Twiddle coefficients index modifier */ - ia1 = ia1 + twidCoefModifier; - - i0 = j; - do - { - /* index calculation for the input as, */ - /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ - i1 = i0 + n2; - i2 = i1 + n2; - i3 = i2 + n2; - - xaIn = pSrc[(2U * i0)]; - yaIn = pSrc[(2U * i0) + 1U]; - - xbIn = pSrc[(2U * i1)]; - ybIn = pSrc[(2U * i1) + 1U]; - - xcIn = pSrc[(2U * i2)]; - ycIn = pSrc[(2U * i2) + 1U]; - - xdIn = pSrc[(2U * i3)]; - ydIn = pSrc[(2U * i3) + 1U]; - - /* xa - xc */ - Xaminusc = xaIn - xcIn; - /* (xb - xd) */ - Xbminusd = xbIn - xdIn; - /* ya - yc */ - Yaminusc = yaIn - ycIn; - /* (yb - yd) */ - Ybminusd = ybIn - ydIn; - - /* xa + xc */ - Xaplusc = xaIn + xcIn; - /* xb + xd */ - Xbplusd = xbIn + xdIn; - /* ya + yc */ - Yaplusc = yaIn + ycIn; - /* yb + yd */ - Ybplusd = ybIn + ydIn; - - /* (xa - xc) - (yb - yd) */ - Xb12C_out = (Xaminusc - Ybminusd); - /* (ya - yc) + (xb - xd) */ - Yb12C_out = (Yaminusc + Xbminusd); - /* xa + xc -(xb + xd) */ - Xc12C_out = (Xaplusc - Xbplusd); - /* (ya + yc) - (yb + yd) */ - Yc12C_out = (Yaplusc - Ybplusd); - /* (xa - xc) + (yb - yd) */ - Xd12C_out = (Xaminusc + Ybminusd); - /* (ya - yc) - (xb - xd) */ - Yd12C_out = (Yaminusc - Xbminusd); - - pSrc[(2U * i0)] = Xaplusc + Xbplusd; - pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd; - - Xb12_out = Xb12C_out * co1; - Yb12_out = Yb12C_out * co1; - Xc12_out = Xc12C_out * co2; - Yc12_out = Yc12C_out * co2; - Xd12_out = Xd12C_out * co3; - Yd12_out = Yd12C_out * co3; - - /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ - //Xb12_out -= Yb12C_out * si1; - p0 = Yb12C_out * si1; - /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ - //Yb12_out += Xb12C_out * si1; - p1 = Xb12C_out * si1; - /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ - //Xc12_out -= Yc12C_out * si2; - p2 = Yc12C_out * si2; - /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ - //Yc12_out += Xc12C_out * si2; - p3 = Xc12C_out * si2; - /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ - //Xd12_out -= Yd12C_out * si3; - p4 = Yd12C_out * si3; - /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ - //Yd12_out += Xd12C_out * si3; - p5 = Xd12C_out * si3; - - Xb12_out -= p0; - Yb12_out += p1; - Xc12_out -= p2; - Yc12_out += p3; - Xd12_out -= p4; - Yd12_out += p5; - - /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ - pSrc[2U * i1] = Xc12_out; - - /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ - pSrc[(2U * i1) + 1U] = Yc12_out; - - /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ - pSrc[2U * i2] = Xb12_out; - - /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ - pSrc[(2U * i2) + 1U] = Yb12_out; - - /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ - pSrc[2U * i3] = Xd12_out; - - /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ - pSrc[(2U * i3) + 1U] = Yd12_out; - - i0 += n1; - } while (i0 < fftLen); - j++; - } while (j <= (n2 - 1U)); - twidCoefModifier <<= 2U; - } - /* Initializations of last stage */ - - j = fftLen >> 2; - ptr1 = &pSrc[0]; - - /* Calculations of last stage */ - do - { - xaIn = ptr1[0]; - yaIn = ptr1[1]; - xbIn = ptr1[2]; - ybIn = ptr1[3]; - xcIn = ptr1[4]; - ycIn = ptr1[5]; - xdIn = ptr1[6]; - ydIn = ptr1[7]; - - /* Butterfly implementation */ - /* xa + xc */ - Xaplusc = xaIn + xcIn; - - /* xa - xc */ - Xaminusc = xaIn - xcIn; - - /* ya + yc */ - Yaplusc = yaIn + ycIn; - - /* ya - yc */ - Yaminusc = yaIn - ycIn; - - /* xb + xd */ - Xbplusd = xbIn + xdIn; - - /* yb + yd */ - Ybplusd = ybIn + ydIn; - - /* (xb-xd) */ - Xbminusd = xbIn - xdIn; - - /* (yb-yd) */ - Ybminusd = ybIn - ydIn; - - /* xa' = (xa+xb+xc+xd) * onebyfftLen */ - a0 = (Xaplusc + Xbplusd); - /* ya' = (ya+yb+yc+yd) * onebyfftLen */ - a1 = (Yaplusc + Ybplusd); - /* xc' = (xa-xb+xc-xd) * onebyfftLen */ - a2 = (Xaplusc - Xbplusd); - /* yc' = (ya-yb+yc-yd) * onebyfftLen */ - a3 = (Yaplusc - Ybplusd); - /* xb' = (xa-yb-xc+yd) * onebyfftLen */ - a4 = (Xaminusc - Ybminusd); - /* yb' = (ya+xb-yc-xd) * onebyfftLen */ - a5 = (Yaminusc + Xbminusd); - /* xd' = (xa-yb-xc+yd) * onebyfftLen */ - a6 = (Xaminusc + Ybminusd); - /* yd' = (ya-xb-yc+xd) * onebyfftLen */ - a7 = (Yaminusc - Xbminusd); - - p0 = a0 * onebyfftLen; - p1 = a1 * onebyfftLen; - p2 = a2 * onebyfftLen; - p3 = a3 * onebyfftLen; - p4 = a4 * onebyfftLen; - p5 = a5 * onebyfftLen; - p6 = a6 * onebyfftLen; - p7 = a7 * onebyfftLen; - - /* xa' = (xa+xb+xc+xd) * onebyfftLen */ - ptr1[0] = p0; - /* ya' = (ya+yb+yc+yd) * onebyfftLen */ - ptr1[1] = p1; - /* xc' = (xa-xb+xc-xd) * onebyfftLen */ - ptr1[2] = p2; - /* yc' = (ya-yb+yc-yd) * onebyfftLen */ - ptr1[3] = p3; - /* xb' = (xa-yb-xc+yd) * onebyfftLen */ - ptr1[4] = p4; - /* yb' = (ya+xb-yc-xd) * onebyfftLen */ - ptr1[5] = p5; - /* xd' = (xa-yb-xc+yd) * onebyfftLen */ - ptr1[6] = p6; - /* yd' = (ya-xb-yc+xd) * onebyfftLen */ - ptr1[7] = p7; - - /* increment source pointer by 8 for next calculations */ - ptr1 = ptr1 + 8U; - - } while (--j); - -#else - - float32_t t1, t2, r1, r2, s1, s2; - - /* Initializations for the first stage */ - n2 = fftLen; - n1 = n2; - - /* Calculation of first stage */ - for (k = fftLen; k > 4U; k >>= 2U) - { - /* Initializations for the first stage */ - n1 = n2; - n2 >>= 2U; - ia1 = 0U; - - /* Calculation of first stage */ - j = 0; - do - { - /* index calculation for the coefficients */ - ia2 = ia1 + ia1; - ia3 = ia2 + ia1; - co1 = pCoef[ia1 * 2U]; - si1 = pCoef[(ia1 * 2U) + 1U]; - co2 = pCoef[ia2 * 2U]; - si2 = pCoef[(ia2 * 2U) + 1U]; - co3 = pCoef[ia3 * 2U]; - si3 = pCoef[(ia3 * 2U) + 1U]; - - /* Twiddle coefficients index modifier */ - ia1 = ia1 + twidCoefModifier; - - i0 = j; - do - { - /* index calculation for the input as, */ - /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ - i1 = i0 + n2; - i2 = i1 + n2; - i3 = i2 + n2; - - /* xa + xc */ - r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)]; - - /* xa - xc */ - r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)]; - - /* ya + yc */ - s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U]; - - /* ya - yc */ - s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U]; - - /* xb + xd */ - t1 = pSrc[2U * i1] + pSrc[2U * i3]; - - /* xa' = xa + xb + xc + xd */ - pSrc[2U * i0] = r1 + t1; - - /* xa + xc -(xb + xd) */ - r1 = r1 - t1; - - /* yb + yd */ - t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U]; - - /* ya' = ya + yb + yc + yd */ - pSrc[(2U * i0) + 1U] = s1 + t2; - - /* (ya + yc) - (yb + yd) */ - s1 = s1 - t2; - - /* (yb - yd) */ - t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U]; - - /* (xb - xd) */ - t2 = pSrc[2U * i1] - pSrc[2U * i3]; - - /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ - pSrc[2U * i1] = (r1 * co2) - (s1 * si2); - - /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ - pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2); - - /* (xa - xc) - (yb - yd) */ - r1 = r2 - t1; - - /* (xa - xc) + (yb - yd) */ - r2 = r2 + t1; - - /* (ya - yc) + (xb - xd) */ - s1 = s2 + t2; - - /* (ya - yc) - (xb - xd) */ - s2 = s2 - t2; - - /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ - pSrc[2U * i2] = (r1 * co1) - (s1 * si1); - - /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ - pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1); - - /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ - pSrc[2U * i3] = (r2 * co3) - (s2 * si3); - - /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ - pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3); - - i0 += n1; - } while ( i0 < fftLen); - j++; - } while (j <= (n2 - 1U)); - twidCoefModifier <<= 2U; - } - /* Initializations of last stage */ - n1 = n2; - n2 >>= 2U; - - /* Calculations of last stage */ - for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1) - { - /* index calculation for the input as, */ - /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ - i1 = i0 + n2; - i2 = i1 + n2; - i3 = i2 + n2; - - /* Butterfly implementation */ - /* xa + xc */ - r1 = pSrc[2U * i0] + pSrc[2U * i2]; - - /* xa - xc */ - r2 = pSrc[2U * i0] - pSrc[2U * i2]; - - /* ya + yc */ - s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U]; - - /* ya - yc */ - s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U]; - - /* xc + xd */ - t1 = pSrc[2U * i1] + pSrc[2U * i3]; - - /* xa' = xa + xb + xc + xd */ - pSrc[2U * i0] = (r1 + t1) * onebyfftLen; - - /* (xa + xb) - (xc + xd) */ - r1 = r1 - t1; - - /* yb + yd */ - t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U]; - - /* ya' = ya + yb + yc + yd */ - pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen; - - /* (ya + yc) - (yb + yd) */ - s1 = s1 - t2; - - /* (yb-yd) */ - t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U]; - - /* (xb-xd) */ - t2 = pSrc[2U * i1] - pSrc[2U * i3]; - - /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ - pSrc[2U * i1] = r1 * onebyfftLen; - - /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ - pSrc[(2U * i1) + 1U] = s1 * onebyfftLen; - - /* (xa - xc) - (yb-yd) */ - r1 = r2 - t1; - - /* (xa - xc) + (yb-yd) */ - r2 = r2 + t1; - - /* (ya - yc) + (xb-xd) */ - s1 = s2 + t2; - - /* (ya - yc) - (xb-xd) */ - s2 = s2 - t2; - - /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ - pSrc[2U * i2] = r1 * onebyfftLen; - - /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ - pSrc[(2U * i2) + 1U] = s1 * onebyfftLen; - - /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ - pSrc[2U * i3] = r2 * onebyfftLen; - - /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ - pSrc[(2U * i3) + 1U] = s2 * onebyfftLen; - } - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ -} - - +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cfft_radix4_f32.c
+ * Description: Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+extern void arm_bitreversal_f32(
+ float32_t * pSrc,
+ uint16_t fftSize,
+ uint16_t bitRevFactor,
+ const uint16_t * pBitRevTab);
+
+void arm_radix4_butterfly_f32(
+ float32_t * pSrc,
+ uint16_t fftLen,
+ const float32_t * pCoef,
+ uint16_t twidCoefModifier);
+
+void arm_radix4_butterfly_inverse_f32(
+ float32_t * pSrc,
+ uint16_t fftLen,
+ const float32_t * pCoef,
+ uint16_t twidCoefModifier,
+ float32_t onebyfftLen);
+
+
+/**
+ @ingroup groupTransforms
+ */
+
+/**
+ @addtogroup ComplexFFT
+ @{
+ */
+
+/**
+ @brief Processing function for the floating-point Radix-4 CFFT/CIFFT.
+ @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future.
+ @param[in] S points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
+ @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
+ @return none
+ */
+
+void arm_cfft_radix4_f32(
+ const arm_cfft_radix4_instance_f32 * S,
+ float32_t * pSrc)
+{
+ if (S->ifftFlag == 1U)
+ {
+ /* Complex IFFT radix-4 */
+ arm_radix4_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
+ }
+ else
+ {
+ /* Complex FFT radix-4 */
+ arm_radix4_butterfly_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
+ }
+
+ if (S->bitReverseFlag == 1U)
+ {
+ /* Bit Reversal */
+ arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
+ }
+
+}
+
+/**
+ @} end of ComplexFFT group
+ */
+
+/* ----------------------------------------------------------------------
+ * Internal helper function used by the FFTs
+ * ---------------------------------------------------------------------- */
+
+/**
+ brief Core function for the floating-point CFFT butterfly process.
+ param[in,out] pSrc points to the in-place buffer of floating-point data type
+ param[in] fftLen length of the FFT
+ param[in] pCoef points to the twiddle coefficient buffer
+ param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
+ return none
+ */
+
+void arm_radix4_butterfly_f32(
+ float32_t * pSrc,
+ uint16_t fftLen,
+ const float32_t * pCoef,
+ uint16_t twidCoefModifier)
+{
+ float32_t co1, co2, co3, si1, si2, si3;
+ uint32_t ia1, ia2, ia3;
+ uint32_t i0, i1, i2, i3;
+ uint32_t n1, n2, j, k;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
+ float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
+ Ybminusd;
+ float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
+ float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
+ float32_t *ptr1;
+ float32_t p0,p1,p2,p3,p4,p5;
+ float32_t a0,a1,a2,a3,a4,a5,a6,a7;
+
+ /* Initializations for the first stage */
+ n2 = fftLen;
+ n1 = n2;
+
+ /* n2 = fftLen/4 */
+ n2 >>= 2U;
+ i0 = 0U;
+ ia1 = 0U;
+
+ j = n2;
+
+ /* Calculation of first stage */
+ do
+ {
+ /* index calculation for the input as, */
+ /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
+ i1 = i0 + n2;
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+
+ xaIn = pSrc[(2U * i0)];
+ yaIn = pSrc[(2U * i0) + 1U];
+
+ xbIn = pSrc[(2U * i1)];
+ ybIn = pSrc[(2U * i1) + 1U];
+
+ xcIn = pSrc[(2U * i2)];
+ ycIn = pSrc[(2U * i2) + 1U];
+
+ xdIn = pSrc[(2U * i3)];
+ ydIn = pSrc[(2U * i3) + 1U];
+
+ /* xa + xc */
+ Xaplusc = xaIn + xcIn;
+ /* xb + xd */
+ Xbplusd = xbIn + xdIn;
+ /* ya + yc */
+ Yaplusc = yaIn + ycIn;
+ /* yb + yd */
+ Ybplusd = ybIn + ydIn;
+
+ /* index calculation for the coefficients */
+ ia2 = ia1 + ia1;
+ co2 = pCoef[ia2 * 2U];
+ si2 = pCoef[(ia2 * 2U) + 1U];
+
+ /* xa - xc */
+ Xaminusc = xaIn - xcIn;
+ /* xb - xd */
+ Xbminusd = xbIn - xdIn;
+ /* ya - yc */
+ Yaminusc = yaIn - ycIn;
+ /* yb - yd */
+ Ybminusd = ybIn - ydIn;
+
+ /* xa' = xa + xb + xc + xd */
+ pSrc[(2U * i0)] = Xaplusc + Xbplusd;
+ /* ya' = ya + yb + yc + yd */
+ pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
+
+ /* (xa - xc) + (yb - yd) */
+ Xb12C_out = (Xaminusc + Ybminusd);
+ /* (ya - yc) + (xb - xd) */
+ Yb12C_out = (Yaminusc - Xbminusd);
+ /* (xa + xc) - (xb + xd) */
+ Xc12C_out = (Xaplusc - Xbplusd);
+ /* (ya + yc) - (yb + yd) */
+ Yc12C_out = (Yaplusc - Ybplusd);
+ /* (xa - xc) - (yb - yd) */
+ Xd12C_out = (Xaminusc - Ybminusd);
+ /* (ya - yc) + (xb - xd) */
+ Yd12C_out = (Xbminusd + Yaminusc);
+
+ co1 = pCoef[ia1 * 2U];
+ si1 = pCoef[(ia1 * 2U) + 1U];
+
+ /* index calculation for the coefficients */
+ ia3 = ia2 + ia1;
+ co3 = pCoef[ia3 * 2U];
+ si3 = pCoef[(ia3 * 2U) + 1U];
+
+ Xb12_out = Xb12C_out * co1;
+ Yb12_out = Yb12C_out * co1;
+ Xc12_out = Xc12C_out * co2;
+ Yc12_out = Yc12C_out * co2;
+ Xd12_out = Xd12C_out * co3;
+ Yd12_out = Yd12C_out * co3;
+
+ /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
+ //Xb12_out -= Yb12C_out * si1;
+ p0 = Yb12C_out * si1;
+ /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
+ //Yb12_out += Xb12C_out * si1;
+ p1 = Xb12C_out * si1;
+ /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
+ //Xc12_out -= Yc12C_out * si2;
+ p2 = Yc12C_out * si2;
+ /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
+ //Yc12_out += Xc12C_out * si2;
+ p3 = Xc12C_out * si2;
+ /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
+ //Xd12_out -= Yd12C_out * si3;
+ p4 = Yd12C_out * si3;
+ /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
+ //Yd12_out += Xd12C_out * si3;
+ p5 = Xd12C_out * si3;
+
+ Xb12_out += p0;
+ Yb12_out -= p1;
+ Xc12_out += p2;
+ Yc12_out -= p3;
+ Xd12_out += p4;
+ Yd12_out -= p5;
+
+ /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
+ pSrc[2U * i1] = Xc12_out;
+
+ /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
+ pSrc[(2U * i1) + 1U] = Yc12_out;
+
+ /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
+ pSrc[2U * i2] = Xb12_out;
+
+ /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
+ pSrc[(2U * i2) + 1U] = Yb12_out;
+
+ /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
+ pSrc[2U * i3] = Xd12_out;
+
+ /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
+ pSrc[(2U * i3) + 1U] = Yd12_out;
+
+ /* Twiddle coefficients index modifier */
+ ia1 += twidCoefModifier;
+
+ /* Updating input index */
+ i0++;
+
+ }
+ while (--j);
+
+ twidCoefModifier <<= 2U;
+
+ /* Calculation of second stage to excluding last stage */
+ for (k = fftLen >> 2U; k > 4U; k >>= 2U)
+ {
+ /* Initializations for the first stage */
+ n1 = n2;
+ n2 >>= 2U;
+ ia1 = 0U;
+
+ /* Calculation of first stage */
+ j = 0;
+ do
+ {
+ /* index calculation for the coefficients */
+ ia2 = ia1 + ia1;
+ ia3 = ia2 + ia1;
+ co1 = pCoef[(ia1 * 2U)];
+ si1 = pCoef[(ia1 * 2U) + 1U];
+ co2 = pCoef[(ia2 * 2U)];
+ si2 = pCoef[(ia2 * 2U) + 1U];
+ co3 = pCoef[(ia3 * 2U)];
+ si3 = pCoef[(ia3 * 2U) + 1U];
+
+ /* Twiddle coefficients index modifier */
+ ia1 += twidCoefModifier;
+
+ i0 = j;
+ do
+ {
+ /* index calculation for the input as, */
+ /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
+ i1 = i0 + n2;
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+
+ xaIn = pSrc[(2U * i0)];
+ yaIn = pSrc[(2U * i0) + 1U];
+
+ xbIn = pSrc[(2U * i1)];
+ ybIn = pSrc[(2U * i1) + 1U];
+
+ xcIn = pSrc[(2U * i2)];
+ ycIn = pSrc[(2U * i2) + 1U];
+
+ xdIn = pSrc[(2U * i3)];
+ ydIn = pSrc[(2U * i3) + 1U];
+
+ /* xa - xc */
+ Xaminusc = xaIn - xcIn;
+ /* (xb - xd) */
+ Xbminusd = xbIn - xdIn;
+ /* ya - yc */
+ Yaminusc = yaIn - ycIn;
+ /* (yb - yd) */
+ Ybminusd = ybIn - ydIn;
+
+ /* xa + xc */
+ Xaplusc = xaIn + xcIn;
+ /* xb + xd */
+ Xbplusd = xbIn + xdIn;
+ /* ya + yc */
+ Yaplusc = yaIn + ycIn;
+ /* yb + yd */
+ Ybplusd = ybIn + ydIn;
+
+ /* (xa - xc) + (yb - yd) */
+ Xb12C_out = (Xaminusc + Ybminusd);
+ /* (ya - yc) - (xb - xd) */
+ Yb12C_out = (Yaminusc - Xbminusd);
+ /* xa + xc -(xb + xd) */
+ Xc12C_out = (Xaplusc - Xbplusd);
+ /* (ya + yc) - (yb + yd) */
+ Yc12C_out = (Yaplusc - Ybplusd);
+ /* (xa - xc) - (yb - yd) */
+ Xd12C_out = (Xaminusc - Ybminusd);
+ /* (ya - yc) + (xb - xd) */
+ Yd12C_out = (Xbminusd + Yaminusc);
+
+ pSrc[(2U * i0)] = Xaplusc + Xbplusd;
+ pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
+
+ Xb12_out = Xb12C_out * co1;
+ Yb12_out = Yb12C_out * co1;
+ Xc12_out = Xc12C_out * co2;
+ Yc12_out = Yc12C_out * co2;
+ Xd12_out = Xd12C_out * co3;
+ Yd12_out = Yd12C_out * co3;
+
+ /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
+ //Xb12_out -= Yb12C_out * si1;
+ p0 = Yb12C_out * si1;
+ /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
+ //Yb12_out += Xb12C_out * si1;
+ p1 = Xb12C_out * si1;
+ /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
+ //Xc12_out -= Yc12C_out * si2;
+ p2 = Yc12C_out * si2;
+ /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
+ //Yc12_out += Xc12C_out * si2;
+ p3 = Xc12C_out * si2;
+ /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
+ //Xd12_out -= Yd12C_out * si3;
+ p4 = Yd12C_out * si3;
+ /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
+ //Yd12_out += Xd12C_out * si3;
+ p5 = Xd12C_out * si3;
+
+ Xb12_out += p0;
+ Yb12_out -= p1;
+ Xc12_out += p2;
+ Yc12_out -= p3;
+ Xd12_out += p4;
+ Yd12_out -= p5;
+
+ /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
+ pSrc[2U * i1] = Xc12_out;
+
+ /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
+ pSrc[(2U * i1) + 1U] = Yc12_out;
+
+ /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
+ pSrc[2U * i2] = Xb12_out;
+
+ /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
+ pSrc[(2U * i2) + 1U] = Yb12_out;
+
+ /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
+ pSrc[2U * i3] = Xd12_out;
+
+ /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
+ pSrc[(2U * i3) + 1U] = Yd12_out;
+
+ i0 += n1;
+ } while (i0 < fftLen);
+ j++;
+ } while (j <= (n2 - 1U));
+ twidCoefModifier <<= 2U;
+ }
+
+ j = fftLen >> 2;
+ ptr1 = &pSrc[0];
+
+ /* Calculations of last stage */
+ do
+ {
+ xaIn = ptr1[0];
+ yaIn = ptr1[1];
+ xbIn = ptr1[2];
+ ybIn = ptr1[3];
+ xcIn = ptr1[4];
+ ycIn = ptr1[5];
+ xdIn = ptr1[6];
+ ydIn = ptr1[7];
+
+ /* xa + xc */
+ Xaplusc = xaIn + xcIn;
+
+ /* xa - xc */
+ Xaminusc = xaIn - xcIn;
+
+ /* ya + yc */
+ Yaplusc = yaIn + ycIn;
+
+ /* ya - yc */
+ Yaminusc = yaIn - ycIn;
+
+ /* xb + xd */
+ Xbplusd = xbIn + xdIn;
+
+ /* yb + yd */
+ Ybplusd = ybIn + ydIn;
+
+ /* (xb-xd) */
+ Xbminusd = xbIn - xdIn;
+
+ /* (yb-yd) */
+ Ybminusd = ybIn - ydIn;
+
+ /* xa' = xa + xb + xc + xd */
+ a0 = (Xaplusc + Xbplusd);
+ /* ya' = ya + yb + yc + yd */
+ a1 = (Yaplusc + Ybplusd);
+ /* xc' = (xa-xb+xc-xd) */
+ a2 = (Xaplusc - Xbplusd);
+ /* yc' = (ya-yb+yc-yd) */
+ a3 = (Yaplusc - Ybplusd);
+ /* xb' = (xa+yb-xc-yd) */
+ a4 = (Xaminusc + Ybminusd);
+ /* yb' = (ya-xb-yc+xd) */
+ a5 = (Yaminusc - Xbminusd);
+ /* xd' = (xa-yb-xc+yd)) */
+ a6 = (Xaminusc - Ybminusd);
+ /* yd' = (ya+xb-yc-xd) */
+ a7 = (Xbminusd + Yaminusc);
+
+ ptr1[0] = a0;
+ ptr1[1] = a1;
+ ptr1[2] = a2;
+ ptr1[3] = a3;
+ ptr1[4] = a4;
+ ptr1[5] = a5;
+ ptr1[6] = a6;
+ ptr1[7] = a7;
+
+ /* increment pointer by 8 */
+ ptr1 += 8U;
+ } while (--j);
+
+#else
+
+ float32_t t1, t2, r1, r2, s1, s2;
+
+ /* Initializations for the fft calculation */
+ n2 = fftLen;
+ n1 = n2;
+ for (k = fftLen; k > 1U; k >>= 2U)
+ {
+ /* Initializations for the fft calculation */
+ n1 = n2;
+ n2 >>= 2U;
+ ia1 = 0U;
+
+ /* FFT Calculation */
+ j = 0;
+ do
+ {
+ /* index calculation for the coefficients */
+ ia2 = ia1 + ia1;
+ ia3 = ia2 + ia1;
+ co1 = pCoef[ia1 * 2U];
+ si1 = pCoef[(ia1 * 2U) + 1U];
+ co2 = pCoef[ia2 * 2U];
+ si2 = pCoef[(ia2 * 2U) + 1U];
+ co3 = pCoef[ia3 * 2U];
+ si3 = pCoef[(ia3 * 2U) + 1U];
+
+ /* Twiddle coefficients index modifier */
+ ia1 = ia1 + twidCoefModifier;
+
+ i0 = j;
+ do
+ {
+ /* index calculation for the input as, */
+ /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
+ i1 = i0 + n2;
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+
+ /* xa + xc */
+ r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
+
+ /* xa - xc */
+ r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
+
+ /* ya + yc */
+ s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
+
+ /* ya - yc */
+ s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
+
+ /* xb + xd */
+ t1 = pSrc[2U * i1] + pSrc[2U * i3];
+
+ /* xa' = xa + xb + xc + xd */
+ pSrc[2U * i0] = r1 + t1;
+
+ /* xa + xc -(xb + xd) */
+ r1 = r1 - t1;
+
+ /* yb + yd */
+ t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
+
+ /* ya' = ya + yb + yc + yd */
+ pSrc[(2U * i0) + 1U] = s1 + t2;
+
+ /* (ya + yc) - (yb + yd) */
+ s1 = s1 - t2;
+
+ /* (yb - yd) */
+ t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
+
+ /* (xb - xd) */
+ t2 = pSrc[2U * i1] - pSrc[2U * i3];
+
+ /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
+ pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
+
+ /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
+ pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
+
+ /* (xa - xc) + (yb - yd) */
+ r1 = r2 + t1;
+
+ /* (xa - xc) - (yb - yd) */
+ r2 = r2 - t1;
+
+ /* (ya - yc) - (xb - xd) */
+ s1 = s2 - t2;
+
+ /* (ya - yc) + (xb - xd) */
+ s2 = s2 + t2;
+
+ /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
+ pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
+
+ /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
+ pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
+
+ /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
+ pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
+
+ /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
+ pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
+
+ i0 += n1;
+ } while ( i0 < fftLen);
+ j++;
+ } while (j <= (n2 - 1U));
+ twidCoefModifier <<= 2U;
+ }
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+}
+
+/**
+ brief Core function for the floating-point CIFFT butterfly process.
+ param[in,out] pSrc points to the in-place buffer of floating-point data type
+ param[in] fftLen length of the FFT
+ param[in] pCoef points to twiddle coefficient buffer
+ param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
+ param[in] onebyfftLen value of 1/fftLen
+ return none
+ */
+
+void arm_radix4_butterfly_inverse_f32(
+ float32_t * pSrc,
+ uint16_t fftLen,
+ const float32_t * pCoef,
+ uint16_t twidCoefModifier,
+ float32_t onebyfftLen)
+{
+ float32_t co1, co2, co3, si1, si2, si3;
+ uint32_t ia1, ia2, ia3;
+ uint32_t i0, i1, i2, i3;
+ uint32_t n1, n2, j, k;
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
+ float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
+ Ybminusd;
+ float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
+ float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
+ float32_t *ptr1;
+ float32_t p0,p1,p2,p3,p4,p5,p6,p7;
+ float32_t a0,a1,a2,a3,a4,a5,a6,a7;
+
+
+ /* Initializations for the first stage */
+ n2 = fftLen;
+ n1 = n2;
+
+ /* n2 = fftLen/4 */
+ n2 >>= 2U;
+ i0 = 0U;
+ ia1 = 0U;
+
+ j = n2;
+
+ /* Calculation of first stage */
+ do
+ {
+ /* index calculation for the input as, */
+ /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
+ i1 = i0 + n2;
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+
+ /* Butterfly implementation */
+ xaIn = pSrc[(2U * i0)];
+ yaIn = pSrc[(2U * i0) + 1U];
+
+ xcIn = pSrc[(2U * i2)];
+ ycIn = pSrc[(2U * i2) + 1U];
+
+ xbIn = pSrc[(2U * i1)];
+ ybIn = pSrc[(2U * i1) + 1U];
+
+ xdIn = pSrc[(2U * i3)];
+ ydIn = pSrc[(2U * i3) + 1U];
+
+ /* xa + xc */
+ Xaplusc = xaIn + xcIn;
+ /* xb + xd */
+ Xbplusd = xbIn + xdIn;
+ /* ya + yc */
+ Yaplusc = yaIn + ycIn;
+ /* yb + yd */
+ Ybplusd = ybIn + ydIn;
+
+ /* index calculation for the coefficients */
+ ia2 = ia1 + ia1;
+ co2 = pCoef[ia2 * 2U];
+ si2 = pCoef[(ia2 * 2U) + 1U];
+
+ /* xa - xc */
+ Xaminusc = xaIn - xcIn;
+ /* xb - xd */
+ Xbminusd = xbIn - xdIn;
+ /* ya - yc */
+ Yaminusc = yaIn - ycIn;
+ /* yb - yd */
+ Ybminusd = ybIn - ydIn;
+
+ /* xa' = xa + xb + xc + xd */
+ pSrc[(2U * i0)] = Xaplusc + Xbplusd;
+
+ /* ya' = ya + yb + yc + yd */
+ pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
+
+ /* (xa - xc) - (yb - yd) */
+ Xb12C_out = (Xaminusc - Ybminusd);
+ /* (ya - yc) + (xb - xd) */
+ Yb12C_out = (Yaminusc + Xbminusd);
+ /* (xa + xc) - (xb + xd) */
+ Xc12C_out = (Xaplusc - Xbplusd);
+ /* (ya + yc) - (yb + yd) */
+ Yc12C_out = (Yaplusc - Ybplusd);
+ /* (xa - xc) + (yb - yd) */
+ Xd12C_out = (Xaminusc + Ybminusd);
+ /* (ya - yc) - (xb - xd) */
+ Yd12C_out = (Yaminusc - Xbminusd);
+
+ co1 = pCoef[ia1 * 2U];
+ si1 = pCoef[(ia1 * 2U) + 1U];
+
+ /* index calculation for the coefficients */
+ ia3 = ia2 + ia1;
+ co3 = pCoef[ia3 * 2U];
+ si3 = pCoef[(ia3 * 2U) + 1U];
+
+ Xb12_out = Xb12C_out * co1;
+ Yb12_out = Yb12C_out * co1;
+ Xc12_out = Xc12C_out * co2;
+ Yc12_out = Yc12C_out * co2;
+ Xd12_out = Xd12C_out * co3;
+ Yd12_out = Yd12C_out * co3;
+
+ /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
+ //Xb12_out -= Yb12C_out * si1;
+ p0 = Yb12C_out * si1;
+ /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
+ //Yb12_out += Xb12C_out * si1;
+ p1 = Xb12C_out * si1;
+ /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
+ //Xc12_out -= Yc12C_out * si2;
+ p2 = Yc12C_out * si2;
+ /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
+ //Yc12_out += Xc12C_out * si2;
+ p3 = Xc12C_out * si2;
+ /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
+ //Xd12_out -= Yd12C_out * si3;
+ p4 = Yd12C_out * si3;
+ /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
+ //Yd12_out += Xd12C_out * si3;
+ p5 = Xd12C_out * si3;
+
+ Xb12_out -= p0;
+ Yb12_out += p1;
+ Xc12_out -= p2;
+ Yc12_out += p3;
+ Xd12_out -= p4;
+ Yd12_out += p5;
+
+ /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
+ pSrc[2U * i1] = Xc12_out;
+
+ /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
+ pSrc[(2U * i1) + 1U] = Yc12_out;
+
+ /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
+ pSrc[2U * i2] = Xb12_out;
+
+ /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
+ pSrc[(2U * i2) + 1U] = Yb12_out;
+
+ /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
+ pSrc[2U * i3] = Xd12_out;
+
+ /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
+ pSrc[(2U * i3) + 1U] = Yd12_out;
+
+ /* Twiddle coefficients index modifier */
+ ia1 = ia1 + twidCoefModifier;
+
+ /* Updating input index */
+ i0 = i0 + 1U;
+
+ } while (--j);
+
+ twidCoefModifier <<= 2U;
+
+ /* Calculation of second stage to excluding last stage */
+ for (k = fftLen >> 2U; k > 4U; k >>= 2U)
+ {
+ /* Initializations for the first stage */
+ n1 = n2;
+ n2 >>= 2U;
+ ia1 = 0U;
+
+ /* Calculation of first stage */
+ j = 0;
+ do
+ {
+ /* index calculation for the coefficients */
+ ia2 = ia1 + ia1;
+ ia3 = ia2 + ia1;
+ co1 = pCoef[ia1 * 2U];
+ si1 = pCoef[(ia1 * 2U) + 1U];
+ co2 = pCoef[ia2 * 2U];
+ si2 = pCoef[(ia2 * 2U) + 1U];
+ co3 = pCoef[ia3 * 2U];
+ si3 = pCoef[(ia3 * 2U) + 1U];
+
+ /* Twiddle coefficients index modifier */
+ ia1 = ia1 + twidCoefModifier;
+
+ i0 = j;
+ do
+ {
+ /* index calculation for the input as, */
+ /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
+ i1 = i0 + n2;
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+
+ xaIn = pSrc[(2U * i0)];
+ yaIn = pSrc[(2U * i0) + 1U];
+
+ xbIn = pSrc[(2U * i1)];
+ ybIn = pSrc[(2U * i1) + 1U];
+
+ xcIn = pSrc[(2U * i2)];
+ ycIn = pSrc[(2U * i2) + 1U];
+
+ xdIn = pSrc[(2U * i3)];
+ ydIn = pSrc[(2U * i3) + 1U];
+
+ /* xa - xc */
+ Xaminusc = xaIn - xcIn;
+ /* (xb - xd) */
+ Xbminusd = xbIn - xdIn;
+ /* ya - yc */
+ Yaminusc = yaIn - ycIn;
+ /* (yb - yd) */
+ Ybminusd = ybIn - ydIn;
+
+ /* xa + xc */
+ Xaplusc = xaIn + xcIn;
+ /* xb + xd */
+ Xbplusd = xbIn + xdIn;
+ /* ya + yc */
+ Yaplusc = yaIn + ycIn;
+ /* yb + yd */
+ Ybplusd = ybIn + ydIn;
+
+ /* (xa - xc) - (yb - yd) */
+ Xb12C_out = (Xaminusc - Ybminusd);
+ /* (ya - yc) + (xb - xd) */
+ Yb12C_out = (Yaminusc + Xbminusd);
+ /* xa + xc -(xb + xd) */
+ Xc12C_out = (Xaplusc - Xbplusd);
+ /* (ya + yc) - (yb + yd) */
+ Yc12C_out = (Yaplusc - Ybplusd);
+ /* (xa - xc) + (yb - yd) */
+ Xd12C_out = (Xaminusc + Ybminusd);
+ /* (ya - yc) - (xb - xd) */
+ Yd12C_out = (Yaminusc - Xbminusd);
+
+ pSrc[(2U * i0)] = Xaplusc + Xbplusd;
+ pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
+
+ Xb12_out = Xb12C_out * co1;
+ Yb12_out = Yb12C_out * co1;
+ Xc12_out = Xc12C_out * co2;
+ Yc12_out = Yc12C_out * co2;
+ Xd12_out = Xd12C_out * co3;
+ Yd12_out = Yd12C_out * co3;
+
+ /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
+ //Xb12_out -= Yb12C_out * si1;
+ p0 = Yb12C_out * si1;
+ /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
+ //Yb12_out += Xb12C_out * si1;
+ p1 = Xb12C_out * si1;
+ /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
+ //Xc12_out -= Yc12C_out * si2;
+ p2 = Yc12C_out * si2;
+ /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
+ //Yc12_out += Xc12C_out * si2;
+ p3 = Xc12C_out * si2;
+ /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
+ //Xd12_out -= Yd12C_out * si3;
+ p4 = Yd12C_out * si3;
+ /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
+ //Yd12_out += Xd12C_out * si3;
+ p5 = Xd12C_out * si3;
+
+ Xb12_out -= p0;
+ Yb12_out += p1;
+ Xc12_out -= p2;
+ Yc12_out += p3;
+ Xd12_out -= p4;
+ Yd12_out += p5;
+
+ /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
+ pSrc[2U * i1] = Xc12_out;
+
+ /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
+ pSrc[(2U * i1) + 1U] = Yc12_out;
+
+ /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
+ pSrc[2U * i2] = Xb12_out;
+
+ /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
+ pSrc[(2U * i2) + 1U] = Yb12_out;
+
+ /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
+ pSrc[2U * i3] = Xd12_out;
+
+ /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
+ pSrc[(2U * i3) + 1U] = Yd12_out;
+
+ i0 += n1;
+ } while (i0 < fftLen);
+ j++;
+ } while (j <= (n2 - 1U));
+ twidCoefModifier <<= 2U;
+ }
+ /* Initializations of last stage */
+
+ j = fftLen >> 2;
+ ptr1 = &pSrc[0];
+
+ /* Calculations of last stage */
+ do
+ {
+ xaIn = ptr1[0];
+ yaIn = ptr1[1];
+ xbIn = ptr1[2];
+ ybIn = ptr1[3];
+ xcIn = ptr1[4];
+ ycIn = ptr1[5];
+ xdIn = ptr1[6];
+ ydIn = ptr1[7];
+
+ /* Butterfly implementation */
+ /* xa + xc */
+ Xaplusc = xaIn + xcIn;
+
+ /* xa - xc */
+ Xaminusc = xaIn - xcIn;
+
+ /* ya + yc */
+ Yaplusc = yaIn + ycIn;
+
+ /* ya - yc */
+ Yaminusc = yaIn - ycIn;
+
+ /* xb + xd */
+ Xbplusd = xbIn + xdIn;
+
+ /* yb + yd */
+ Ybplusd = ybIn + ydIn;
+
+ /* (xb-xd) */
+ Xbminusd = xbIn - xdIn;
+
+ /* (yb-yd) */
+ Ybminusd = ybIn - ydIn;
+
+ /* xa' = (xa+xb+xc+xd) * onebyfftLen */
+ a0 = (Xaplusc + Xbplusd);
+ /* ya' = (ya+yb+yc+yd) * onebyfftLen */
+ a1 = (Yaplusc + Ybplusd);
+ /* xc' = (xa-xb+xc-xd) * onebyfftLen */
+ a2 = (Xaplusc - Xbplusd);
+ /* yc' = (ya-yb+yc-yd) * onebyfftLen */
+ a3 = (Yaplusc - Ybplusd);
+ /* xb' = (xa-yb-xc+yd) * onebyfftLen */
+ a4 = (Xaminusc - Ybminusd);
+ /* yb' = (ya+xb-yc-xd) * onebyfftLen */
+ a5 = (Yaminusc + Xbminusd);
+ /* xd' = (xa-yb-xc+yd) * onebyfftLen */
+ a6 = (Xaminusc + Ybminusd);
+ /* yd' = (ya-xb-yc+xd) * onebyfftLen */
+ a7 = (Yaminusc - Xbminusd);
+
+ p0 = a0 * onebyfftLen;
+ p1 = a1 * onebyfftLen;
+ p2 = a2 * onebyfftLen;
+ p3 = a3 * onebyfftLen;
+ p4 = a4 * onebyfftLen;
+ p5 = a5 * onebyfftLen;
+ p6 = a6 * onebyfftLen;
+ p7 = a7 * onebyfftLen;
+
+ /* xa' = (xa+xb+xc+xd) * onebyfftLen */
+ ptr1[0] = p0;
+ /* ya' = (ya+yb+yc+yd) * onebyfftLen */
+ ptr1[1] = p1;
+ /* xc' = (xa-xb+xc-xd) * onebyfftLen */
+ ptr1[2] = p2;
+ /* yc' = (ya-yb+yc-yd) * onebyfftLen */
+ ptr1[3] = p3;
+ /* xb' = (xa-yb-xc+yd) * onebyfftLen */
+ ptr1[4] = p4;
+ /* yb' = (ya+xb-yc-xd) * onebyfftLen */
+ ptr1[5] = p5;
+ /* xd' = (xa-yb-xc+yd) * onebyfftLen */
+ ptr1[6] = p6;
+ /* yd' = (ya-xb-yc+xd) * onebyfftLen */
+ ptr1[7] = p7;
+
+ /* increment source pointer by 8 for next calculations */
+ ptr1 = ptr1 + 8U;
+
+ } while (--j);
+
+#else
+
+ float32_t t1, t2, r1, r2, s1, s2;
+
+ /* Initializations for the first stage */
+ n2 = fftLen;
+ n1 = n2;
+
+ /* Calculation of first stage */
+ for (k = fftLen; k > 4U; k >>= 2U)
+ {
+ /* Initializations for the first stage */
+ n1 = n2;
+ n2 >>= 2U;
+ ia1 = 0U;
+
+ /* Calculation of first stage */
+ j = 0;
+ do
+ {
+ /* index calculation for the coefficients */
+ ia2 = ia1 + ia1;
+ ia3 = ia2 + ia1;
+ co1 = pCoef[ia1 * 2U];
+ si1 = pCoef[(ia1 * 2U) + 1U];
+ co2 = pCoef[ia2 * 2U];
+ si2 = pCoef[(ia2 * 2U) + 1U];
+ co3 = pCoef[ia3 * 2U];
+ si3 = pCoef[(ia3 * 2U) + 1U];
+
+ /* Twiddle coefficients index modifier */
+ ia1 = ia1 + twidCoefModifier;
+
+ i0 = j;
+ do
+ {
+ /* index calculation for the input as, */
+ /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
+ i1 = i0 + n2;
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+
+ /* xa + xc */
+ r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
+
+ /* xa - xc */
+ r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
+
+ /* ya + yc */
+ s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
+
+ /* ya - yc */
+ s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
+
+ /* xb + xd */
+ t1 = pSrc[2U * i1] + pSrc[2U * i3];
+
+ /* xa' = xa + xb + xc + xd */
+ pSrc[2U * i0] = r1 + t1;
+
+ /* xa + xc -(xb + xd) */
+ r1 = r1 - t1;
+
+ /* yb + yd */
+ t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
+
+ /* ya' = ya + yb + yc + yd */
+ pSrc[(2U * i0) + 1U] = s1 + t2;
+
+ /* (ya + yc) - (yb + yd) */
+ s1 = s1 - t2;
+
+ /* (yb - yd) */
+ t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
+
+ /* (xb - xd) */
+ t2 = pSrc[2U * i1] - pSrc[2U * i3];
+
+ /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
+ pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
+
+ /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
+ pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
+
+ /* (xa - xc) - (yb - yd) */
+ r1 = r2 - t1;
+
+ /* (xa - xc) + (yb - yd) */
+ r2 = r2 + t1;
+
+ /* (ya - yc) + (xb - xd) */
+ s1 = s2 + t2;
+
+ /* (ya - yc) - (xb - xd) */
+ s2 = s2 - t2;
+
+ /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
+ pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
+
+ /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
+ pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
+
+ /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
+ pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
+
+ /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
+ pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
+
+ i0 += n1;
+ } while ( i0 < fftLen);
+ j++;
+ } while (j <= (n2 - 1U));
+ twidCoefModifier <<= 2U;
+ }
+ /* Initializations of last stage */
+ n1 = n2;
+ n2 >>= 2U;
+
+ /* Calculations of last stage */
+ for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
+ {
+ /* index calculation for the input as, */
+ /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
+ i1 = i0 + n2;
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+
+ /* Butterfly implementation */
+ /* xa + xc */
+ r1 = pSrc[2U * i0] + pSrc[2U * i2];
+
+ /* xa - xc */
+ r2 = pSrc[2U * i0] - pSrc[2U * i2];
+
+ /* ya + yc */
+ s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
+
+ /* ya - yc */
+ s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
+
+ /* xc + xd */
+ t1 = pSrc[2U * i1] + pSrc[2U * i3];
+
+ /* xa' = xa + xb + xc + xd */
+ pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
+
+ /* (xa + xb) - (xc + xd) */
+ r1 = r1 - t1;
+
+ /* yb + yd */
+ t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
+
+ /* ya' = ya + yb + yc + yd */
+ pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
+
+ /* (ya + yc) - (yb + yd) */
+ s1 = s1 - t2;
+
+ /* (yb-yd) */
+ t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
+
+ /* (xb-xd) */
+ t2 = pSrc[2U * i1] - pSrc[2U * i3];
+
+ /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
+ pSrc[2U * i1] = r1 * onebyfftLen;
+
+ /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
+ pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
+
+ /* (xa - xc) - (yb-yd) */
+ r1 = r2 - t1;
+
+ /* (xa - xc) + (yb-yd) */
+ r2 = r2 + t1;
+
+ /* (ya - yc) + (xb-xd) */
+ s1 = s2 + t2;
+
+ /* (ya - yc) - (xb-xd) */
+ s2 = s2 - t2;
+
+ /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
+ pSrc[2U * i2] = r1 * onebyfftLen;
+
+ /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
+ pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
+
+ /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
+ pSrc[2U * i3] = r2 * onebyfftLen;
+
+ /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
+ pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
+ }
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+}
+
+
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_f32.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_f32.c index b3aabbb..539206d 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_f32.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_f32.c @@ -1,168 +1,156 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_cfft_radix4_init_f32.c - * Description: Radix-4 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" -#include "arm_common_tables.h" - -/** - @ingroup groupTransforms - */ - -/** - @addtogroup ComplexFFT - @{ - */ - -/** - @brief Initialization function for the floating-point CFFT/CIFFT. - @deprecated Do not use this function. It has been superceded by \ref arm_cfft_f32 and will be removed in the future. - @param[in,out] S points to an instance of the floating-point CFFT/CIFFT structure - @param[in] fftLen length of the FFT - @param[in] ifftFlag flag that selects transform direction - - value = 0: forward transform - - value = 1: inverse transform - @param[in] bitReverseFlag flag that enables / disables bit reversal of output - - value = 0: disables bit reversal of output - - value = 1: enables bit reversal of output - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length - - @par Details - The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed. - Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated - @par - The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order. - Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order. - @par - The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024. - @par - This Function also initializes Twiddle factor table pointer and Bit reversal table pointer. - */ - -arm_status arm_cfft_radix4_init_f32( - arm_cfft_radix4_instance_f32 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag) -{ - /* Initialise the default arm status */ - arm_status status = ARM_MATH_ARGUMENT_ERROR; - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_4096) - - /* Initialise the default arm status */ - status = ARM_MATH_SUCCESS; - - /* Initialise the FFT length */ - S->fftLen = fftLen; - - /* Initialise the Twiddle coefficient pointer */ - S->pTwiddle = (float32_t *) twiddleCoef; - - /* Initialise the Flag for selection of CFFT or CIFFT */ - S->ifftFlag = ifftFlag; - - /* Initialise the Flag for calculation Bit reversal or not */ - S->bitReverseFlag = bitReverseFlag; - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_4096) - - /* Initializations of structure parameters depending on the FFT length */ - switch (S->fftLen) - { - - case 4096U: - /* Initializations of structure parameters for 4096 point FFT */ - - /* Initialise the twiddle coef modifier value */ - S->twidCoefModifier = 1U; - /* Initialise the bit reversal table modifier */ - S->bitRevFactor = 1U; - /* Initialise the bit reversal table pointer */ - S->pBitRevTable = (uint16_t *) armBitRevTable; - /* Initialise the 1/fftLen Value */ - S->onebyfftLen = 0.000244140625; - break; - - case 1024U: - /* Initializations of structure parameters for 1024 point FFT */ - - /* Initialise the twiddle coef modifier value */ - S->twidCoefModifier = 4U; - /* Initialise the bit reversal table modifier */ - S->bitRevFactor = 4U; - /* Initialise the bit reversal table pointer */ - S->pBitRevTable = (uint16_t *) & armBitRevTable[3]; - /* Initialise the 1/fftLen Value */ - S->onebyfftLen = 0.0009765625f; - break; - - - case 256U: - /* Initializations of structure parameters for 256 point FFT */ - S->twidCoefModifier = 16U; - S->bitRevFactor = 16U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[15]; - S->onebyfftLen = 0.00390625f; - break; - - case 64U: - /* Initializations of structure parameters for 64 point FFT */ - S->twidCoefModifier = 64U; - S->bitRevFactor = 64U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[63]; - S->onebyfftLen = 0.015625f; - break; - - case 16U: - /* Initializations of structure parameters for 16 point FFT */ - S->twidCoefModifier = 256U; - S->bitRevFactor = 256U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[255]; - S->onebyfftLen = 0.0625f; - break; - - - default: - /* Reporting argument error if fftSize is not valid value */ - status = ARM_MATH_ARGUMENT_ERROR; - break; - } -#endif -#endif -#endif - - return (status); -} - -/** - @} end of ComplexFFT group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cfft_radix4_init_f32.c
+ * Description: Radix-4 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**
+ @ingroup groupTransforms
+ */
+
+/**
+ @addtogroup ComplexFFT
+ @{
+ */
+
+/**
+ @brief Initialization function for the floating-point CFFT/CIFFT.
+ @deprecated Do not use this function. It has been superceded by \ref arm_cfft_f32 and will be removed in the future.
+ @param[in,out] S points to an instance of the floating-point CFFT/CIFFT structure
+ @param[in] fftLen length of the FFT
+ @param[in] ifftFlag flag that selects transform direction
+ - value = 0: forward transform
+ - value = 1: inverse transform
+ @param[in] bitReverseFlag flag that enables / disables bit reversal of output
+ - value = 0: disables bit reversal of output
+ - value = 1: enables bit reversal of output
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
+
+ @par Details
+ The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
+ Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
+ @par
+ The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
+ Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
+ @par
+ The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
+ @par
+ This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
+ */
+
+arm_status arm_cfft_radix4_init_f32(
+ arm_cfft_radix4_instance_f32 * S,
+ uint16_t fftLen,
+ uint8_t ifftFlag,
+ uint8_t bitReverseFlag)
+{
+ /* Initialise the default arm status */
+ arm_status status = ARM_MATH_SUCCESS;
+
+ /* Initialise the FFT length */
+ S->fftLen = fftLen;
+
+ /* Initialise the Twiddle coefficient pointer */
+ S->pTwiddle = (float32_t *) twiddleCoef;
+
+ /* Initialise the Flag for selection of CFFT or CIFFT */
+ S->ifftFlag = ifftFlag;
+
+ /* Initialise the Flag for calculation Bit reversal or not */
+ S->bitReverseFlag = bitReverseFlag;
+
+ /* Initializations of structure parameters depending on the FFT length */
+ switch (S->fftLen)
+ {
+
+ case 4096U:
+ /* Initializations of structure parameters for 4096 point FFT */
+
+ /* Initialise the twiddle coef modifier value */
+ S->twidCoefModifier = 1U;
+ /* Initialise the bit reversal table modifier */
+ S->bitRevFactor = 1U;
+ /* Initialise the bit reversal table pointer */
+ S->pBitRevTable = (uint16_t *) armBitRevTable;
+ /* Initialise the 1/fftLen Value */
+ S->onebyfftLen = 0.000244140625;
+ break;
+
+ case 1024U:
+ /* Initializations of structure parameters for 1024 point FFT */
+
+ /* Initialise the twiddle coef modifier value */
+ S->twidCoefModifier = 4U;
+ /* Initialise the bit reversal table modifier */
+ S->bitRevFactor = 4U;
+ /* Initialise the bit reversal table pointer */
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
+ /* Initialise the 1/fftLen Value */
+ S->onebyfftLen = 0.0009765625f;
+ break;
+
+
+ case 256U:
+ /* Initializations of structure parameters for 256 point FFT */
+ S->twidCoefModifier = 16U;
+ S->bitRevFactor = 16U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
+ S->onebyfftLen = 0.00390625f;
+ break;
+
+ case 64U:
+ /* Initializations of structure parameters for 64 point FFT */
+ S->twidCoefModifier = 64U;
+ S->bitRevFactor = 64U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
+ S->onebyfftLen = 0.015625f;
+ break;
+
+ case 16U:
+ /* Initializations of structure parameters for 16 point FFT */
+ S->twidCoefModifier = 256U;
+ S->bitRevFactor = 256U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
+ S->onebyfftLen = 0.0625f;
+ break;
+
+
+ default:
+ /* Reporting argument error if fftSize is not valid value */
+ status = ARM_MATH_ARGUMENT_ERROR;
+ break;
+ }
+
+ return (status);
+}
+
+/**
+ @} end of ComplexFFT group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q15.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q15.c index 7774243..1d83009 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q15.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q15.c @@ -1,157 +1,145 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_cfft_radix4_init_q15.c - * Description: Radix-4 Decimation in Frequency Q15 FFT & IFFT initialization function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" -#include "arm_common_tables.h" - -/** - @ingroup groupTransforms - */ - - -/** - @addtogroup ComplexFFT - @{ - */ - - -/** - @brief Initialization function for the Q15 CFFT/CIFFT. - @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed in the future. - @param[in,out] S points to an instance of the Q15 CFFT/CIFFT structure - @param[in] fftLen length of the FFT - @param[in] ifftFlag flag that selects transform direction - - value = 0: forward transform - - value = 1: inverse transform - @param[in] bitReverseFlag flag that enables / disables bit reversal of output - - value = 0: disables bit reversal of output - - value = 1: enables bit reversal of output - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length - - @par Details - The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed. - Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated - @par - The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order. - Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order. - @par - The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024. - @par - This Function also initializes Twiddle factor table pointer and Bit reversal table pointer. - */ - -arm_status arm_cfft_radix4_init_q15( - arm_cfft_radix4_instance_q15 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag) -{ - /* Initialise the default arm status */ - arm_status status = ARM_MATH_ARGUMENT_ERROR; - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) - - /* Initialise the default arm status */ - status = ARM_MATH_SUCCESS; - /* Initialise the FFT length */ - S->fftLen = fftLen; - /* Initialise the Twiddle coefficient pointer */ - S->pTwiddle = (q15_t *) twiddleCoef_4096_q15; - /* Initialise the Flag for selection of CFFT or CIFFT */ - S->ifftFlag = ifftFlag; - /* Initialise the Flag for calculation Bit reversal or not */ - S->bitReverseFlag = bitReverseFlag; - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024) - - /* Initializations of structure parameters depending on the FFT length */ - switch (S->fftLen) - { - case 4096U: - /* Initializations of structure parameters for 4096 point FFT */ - - /* Initialise the twiddle coef modifier value */ - S->twidCoefModifier = 1U; - /* Initialise the bit reversal table modifier */ - S->bitRevFactor = 1U; - /* Initialise the bit reversal table pointer */ - S->pBitRevTable = (uint16_t *) armBitRevTable; - - break; - - case 1024U: - /* Initializations of structure parameters for 1024 point FFT */ - S->twidCoefModifier = 4U; - S->bitRevFactor = 4U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[3]; - - break; - - case 256U: - /* Initializations of structure parameters for 256 point FFT */ - S->twidCoefModifier = 16U; - S->bitRevFactor = 16U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[15]; - - break; - - case 64U: - /* Initializations of structure parameters for 64 point FFT */ - S->twidCoefModifier = 64U; - S->bitRevFactor = 64U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[63]; - - break; - - case 16U: - /* Initializations of structure parameters for 16 point FFT */ - S->twidCoefModifier = 256U; - S->bitRevFactor = 256U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[255]; - - break; - - default: - /* Reporting argument error if fftSize is not valid value */ - status = ARM_MATH_ARGUMENT_ERROR; - break; - } - -#endif -#endif -#endif - return (status); -} - -/** - @} end of ComplexFFT group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cfft_radix4_init_q15.c
+ * Description: Radix-4 Decimation in Frequency Q15 FFT & IFFT initialization function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**
+ @ingroup groupTransforms
+ */
+
+
+/**
+ @addtogroup ComplexFFT
+ @{
+ */
+
+
+/**
+ @brief Initialization function for the Q15 CFFT/CIFFT.
+ @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed in the future.
+ @param[in,out] S points to an instance of the Q15 CFFT/CIFFT structure
+ @param[in] fftLen length of the FFT
+ @param[in] ifftFlag flag that selects transform direction
+ - value = 0: forward transform
+ - value = 1: inverse transform
+ @param[in] bitReverseFlag flag that enables / disables bit reversal of output
+ - value = 0: disables bit reversal of output
+ - value = 1: enables bit reversal of output
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
+
+ @par Details
+ The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
+ Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
+ @par
+ The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
+ Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
+ @par
+ The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
+ @par
+ This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
+ */
+
+arm_status arm_cfft_radix4_init_q15(
+ arm_cfft_radix4_instance_q15 * S,
+ uint16_t fftLen,
+ uint8_t ifftFlag,
+ uint8_t bitReverseFlag)
+{
+ /* Initialise the default arm status */
+ arm_status status = ARM_MATH_SUCCESS;
+ /* Initialise the FFT length */
+ S->fftLen = fftLen;
+ /* Initialise the Twiddle coefficient pointer */
+ S->pTwiddle = (q15_t *) twiddleCoef_4096_q15;
+ /* Initialise the Flag for selection of CFFT or CIFFT */
+ S->ifftFlag = ifftFlag;
+ /* Initialise the Flag for calculation Bit reversal or not */
+ S->bitReverseFlag = bitReverseFlag;
+
+ /* Initializations of structure parameters depending on the FFT length */
+ switch (S->fftLen)
+ {
+ case 4096U:
+ /* Initializations of structure parameters for 4096 point FFT */
+
+ /* Initialise the twiddle coef modifier value */
+ S->twidCoefModifier = 1U;
+ /* Initialise the bit reversal table modifier */
+ S->bitRevFactor = 1U;
+ /* Initialise the bit reversal table pointer */
+ S->pBitRevTable = (uint16_t *) armBitRevTable;
+
+ break;
+
+ case 1024U:
+ /* Initializations of structure parameters for 1024 point FFT */
+ S->twidCoefModifier = 4U;
+ S->bitRevFactor = 4U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
+
+ break;
+
+ case 256U:
+ /* Initializations of structure parameters for 256 point FFT */
+ S->twidCoefModifier = 16U;
+ S->bitRevFactor = 16U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
+
+ break;
+
+ case 64U:
+ /* Initializations of structure parameters for 64 point FFT */
+ S->twidCoefModifier = 64U;
+ S->bitRevFactor = 64U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
+
+ break;
+
+ case 16U:
+ /* Initializations of structure parameters for 16 point FFT */
+ S->twidCoefModifier = 256U;
+ S->bitRevFactor = 256U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
+
+ break;
+
+ default:
+ /* Reporting argument error if fftSize is not valid value */
+ status = ARM_MATH_ARGUMENT_ERROR;
+ break;
+ }
+
+ return (status);
+}
+
+/**
+ @} end of ComplexFFT group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q31.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q31.c index 04ba393..964b4c8 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q31.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q31.c @@ -1,154 +1,141 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_cfft_radix4_init_q31.c - * Description: Radix-4 Decimation in Frequency Q31 FFT & IFFT initialization function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" -#include "arm_common_tables.h" - -/** - @ingroup groupTransforms - */ - -/** - @addtogroup ComplexFFT - @{ - */ - -/** - - @brief Initialization function for the Q31 CFFT/CIFFT. - @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future. - @param[in,out] S points to an instance of the Q31 CFFT/CIFFT structure. - @param[in] fftLen length of the FFT. - @param[in] ifftFlag flag that selects transform direction - - value = 0: forward transform - - value = 1: inverse transform - @param[in] bitReverseFlag flag that enables / disables bit reversal of output - - value = 0: disables bit reversal of output - - value = 1: enables bit reversal of output - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length - - @par Details - The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed. - Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated - @par - The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order. - Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order. - @par - The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024. - @par - This Function also initializes Twiddle factor table pointer and Bit reversal table pointer. -*/ - -arm_status arm_cfft_radix4_init_q31( - arm_cfft_radix4_instance_q31 * S, - uint16_t fftLen, - uint8_t ifftFlag, - uint8_t bitReverseFlag) -{ - - /* Initialise the default arm status */ - arm_status status = ARM_MATH_ARGUMENT_ERROR; - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) - - /* Initialise the default arm status */ - status = ARM_MATH_SUCCESS; - /* Initialise the FFT length */ - S->fftLen = fftLen; - /* Initialise the Twiddle coefficient pointer */ - S->pTwiddle = (q31_t *) twiddleCoef_4096_q31; - /* Initialise the Flag for selection of CFFT or CIFFT */ - S->ifftFlag = ifftFlag; - /* Initialise the Flag for calculation Bit reversal or not */ - S->bitReverseFlag = bitReverseFlag; - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024) - - /* Initializations of Instance structure depending on the FFT length */ - switch (S->fftLen) - { - /* Initializations of structure parameters for 4096 point FFT */ - case 4096U: - /* Initialise the twiddle coef modifier value */ - S->twidCoefModifier = 1U; - /* Initialise the bit reversal table modifier */ - S->bitRevFactor = 1U; - /* Initialise the bit reversal table pointer */ - S->pBitRevTable = (uint16_t *) armBitRevTable; - break; - - /* Initializations of structure parameters for 1024 point FFT */ - case 1024U: - /* Initialise the twiddle coef modifier value */ - S->twidCoefModifier = 4U; - /* Initialise the bit reversal table modifier */ - S->bitRevFactor = 4U; - /* Initialise the bit reversal table pointer */ - S->pBitRevTable = (uint16_t *) & armBitRevTable[3]; - break; - - case 256U: - /* Initializations of structure parameters for 256 point FFT */ - S->twidCoefModifier = 16U; - S->bitRevFactor = 16U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[15]; - break; - - case 64U: - /* Initializations of structure parameters for 64 point FFT */ - S->twidCoefModifier = 64U; - S->bitRevFactor = 64U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[63]; - break; - - case 16U: - /* Initializations of structure parameters for 16 point FFT */ - S->twidCoefModifier = 256U; - S->bitRevFactor = 256U; - S->pBitRevTable = (uint16_t *) & armBitRevTable[255]; - break; - - default: - /* Reporting argument error if fftSize is not valid value */ - status = ARM_MATH_ARGUMENT_ERROR; - break; - } - -#endif -#endif -#endif - return (status); -} - -/** - @} end of ComplexFFT group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cfft_radix4_init_q31.c
+ * Description: Radix-4 Decimation in Frequency Q31 FFT & IFFT initialization function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**
+ @ingroup groupTransforms
+ */
+
+/**
+ @addtogroup ComplexFFT
+ @{
+ */
+
+/**
+
+ @brief Initialization function for the Q31 CFFT/CIFFT.
+ @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
+ @param[in,out] S points to an instance of the Q31 CFFT/CIFFT structure.
+ @param[in] fftLen length of the FFT.
+ @param[in] ifftFlag flag that selects transform direction
+ - value = 0: forward transform
+ - value = 1: inverse transform
+ @param[in] bitReverseFlag flag that enables / disables bit reversal of output
+ - value = 0: disables bit reversal of output
+ - value = 1: enables bit reversal of output
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
+
+ @par Details
+ The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
+ Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
+ @par
+ The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
+ Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
+ @par
+ The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
+ @par
+ This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
+*/
+
+arm_status arm_cfft_radix4_init_q31(
+ arm_cfft_radix4_instance_q31 * S,
+ uint16_t fftLen,
+ uint8_t ifftFlag,
+ uint8_t bitReverseFlag)
+{
+ /* Initialise the default arm status */
+ arm_status status = ARM_MATH_SUCCESS;
+ /* Initialise the FFT length */
+ S->fftLen = fftLen;
+ /* Initialise the Twiddle coefficient pointer */
+ S->pTwiddle = (q31_t *) twiddleCoef_4096_q31;
+ /* Initialise the Flag for selection of CFFT or CIFFT */
+ S->ifftFlag = ifftFlag;
+ /* Initialise the Flag for calculation Bit reversal or not */
+ S->bitReverseFlag = bitReverseFlag;
+
+ /* Initializations of Instance structure depending on the FFT length */
+ switch (S->fftLen)
+ {
+ /* Initializations of structure parameters for 4096 point FFT */
+ case 4096U:
+ /* Initialise the twiddle coef modifier value */
+ S->twidCoefModifier = 1U;
+ /* Initialise the bit reversal table modifier */
+ S->bitRevFactor = 1U;
+ /* Initialise the bit reversal table pointer */
+ S->pBitRevTable = (uint16_t *) armBitRevTable;
+ break;
+
+ /* Initializations of structure parameters for 1024 point FFT */
+ case 1024U:
+ /* Initialise the twiddle coef modifier value */
+ S->twidCoefModifier = 4U;
+ /* Initialise the bit reversal table modifier */
+ S->bitRevFactor = 4U;
+ /* Initialise the bit reversal table pointer */
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
+ break;
+
+ case 256U:
+ /* Initializations of structure parameters for 256 point FFT */
+ S->twidCoefModifier = 16U;
+ S->bitRevFactor = 16U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
+ break;
+
+ case 64U:
+ /* Initializations of structure parameters for 64 point FFT */
+ S->twidCoefModifier = 64U;
+ S->bitRevFactor = 64U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
+ break;
+
+ case 16U:
+ /* Initializations of structure parameters for 16 point FFT */
+ S->twidCoefModifier = 256U;
+ S->bitRevFactor = 256U;
+ S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
+ break;
+
+ default:
+ /* Reporting argument error if fftSize is not valid value */
+ status = ARM_MATH_ARGUMENT_ERROR;
+ break;
+ }
+
+ return (status);
+}
+
+/**
+ @} end of ComplexFFT group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q15.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q15.c index 280acca..825a16b 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q15.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q15.c @@ -1,1809 +1,1809 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_cfft_radix4_q15.c - * Description: This file has function definition of Radix-4 FFT & IFFT function and - * In-place bit reversal using bit reversal table - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" - - -void arm_radix4_butterfly_q15( - q15_t * pSrc16, - uint32_t fftLen, - const q15_t * pCoef16, - uint32_t twidCoefModifier); - -void arm_radix4_butterfly_inverse_q15( - q15_t * pSrc16, - uint32_t fftLen, - const q15_t * pCoef16, - uint32_t twidCoefModifier); - -void arm_bitreversal_q15( - q15_t * pSrc, - uint32_t fftLen, - uint16_t bitRevFactor, - const uint16_t * pBitRevTab); - -/** - @ingroup groupTransforms - */ - -/** - @addtogroup ComplexFFT - @{ - */ - - -/** - @brief Processing function for the Q15 CFFT/CIFFT. - @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed in the future. - @param[in] S points to an instance of the Q15 CFFT/CIFFT structure. - @param[in,out] pSrc points to the complex data buffer. Processing occurs in-place. - @return none - - @par Input and output formats: - Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. - Hence the output format is different for different FFT sizes. - The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT: - @par - \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT" - \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT" - */ - -void arm_cfft_radix4_q15( - const arm_cfft_radix4_instance_q15 * S, - q15_t * pSrc) -{ - if (S->ifftFlag == 1U) - { - /* Complex IFFT radix-4 */ - arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier); - } - else - { - /* Complex FFT radix-4 */ - arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier); - } - - if (S->bitReverseFlag == 1U) - { - /* Bit Reversal */ - arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); - } - -} - -/** - @} end of ComplexFFT group - */ - -/* - * Radix-4 FFT algorithm used is : - * - * Input real and imaginary data: - * x(n) = xa + j * ya - * x(n+N/4 ) = xb + j * yb - * x(n+N/2 ) = xc + j * yc - * x(n+3N 4) = xd + j * yd - * - * - * Output real and imaginary data: - * x(4r) = xa'+ j * ya' - * x(4r+1) = xb'+ j * yb' - * x(4r+2) = xc'+ j * yc' - * x(4r+3) = xd'+ j * yd' - * - * - * Twiddle factors for radix-4 FFT: - * Wn = co1 + j * (- si1) - * W2n = co2 + j * (- si2) - * W3n = co3 + j * (- si3) - - * The real and imaginary output values for the radix-4 butterfly are - * xa' = xa + xb + xc + xd - * ya' = ya + yb + yc + yd - * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) - * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) - * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) - * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) - * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) - * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) - * - */ - -/** - @brief Core function for the Q15 CFFT butterfly process. - @param[in,out] pSrc16 points to the in-place buffer of Q15 data type - @param[in] fftLen length of the FFT - @param[in] pCoef16 points to twiddle coefficient buffer - @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table - @return none - */ - -void arm_radix4_butterfly_q15( - q15_t * pSrc16, - uint32_t fftLen, - const q15_t * pCoef16, - uint32_t twidCoefModifier) -{ - -#if defined (ARM_MATH_DSP) - - q31_t R, S, T, U; - q31_t C1, C2, C3, out1, out2; - uint32_t n1, n2, ic, i0, j, k; - - q15_t *ptr1; - q15_t *pSi0; - q15_t *pSi1; - q15_t *pSi2; - q15_t *pSi3; - - q31_t xaya, xbyb, xcyc, xdyd; - - /* Total process is divided into three stages */ - - /* process first stage, middle stages, & last stage */ - - /* Initializations for the first stage */ - n2 = fftLen; - n1 = n2; - - /* n2 = fftLen/4 */ - n2 >>= 2U; - - /* Index for twiddle coefficient */ - ic = 0U; - - /* Index for input read and output write */ - j = n2; - - pSi0 = pSrc16; - pSi1 = pSi0 + 2 * n2; - pSi2 = pSi1 + 2 * n2; - pSi3 = pSi2 + 2 * n2; - - /* Input is in 1.15(q15) format */ - - /* start of first stage process */ - do - { - /* Butterfly implementation */ - - /* Reading i0, i0+fftLen/2 inputs */ - /* Read ya (real), xa(imag) input */ - T = read_q15x2 (pSi0); - T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */ - T = __SHADD16(T, 0); /* it turns out doing this twice is 2 cycles, the alternative takes 3 cycles */ -/* - in = ((int16_t) (T & 0xFFFF)) >> 2; // alternative code that takes 3 cycles - T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); -*/ - - /* Read yc (real), xc(imag) input */ - S = read_q15x2 (pSi2); - S = __SHADD16(S, 0); - S = __SHADD16(S, 0); - - /* R = packed((ya + yc), (xa + xc) ) */ - R = __QADD16(T, S); - - /* S = packed((ya - yc), (xa - xc) ) */ - S = __QSUB16(T, S); - - /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ - /* Read yb (real), xb(imag) input */ - T = read_q15x2 (pSi1); - T = __SHADD16(T, 0); - T = __SHADD16(T, 0); - - /* Read yd (real), xd(imag) input */ - U = read_q15x2 (pSi3); - U = __SHADD16(U, 0); - U = __SHADD16(U, 0); - - /* T = packed((yb + yd), (xb + xd) ) */ - T = __QADD16(T, U); - - /* writing the butterfly processed i0 sample */ - /* xa' = xa + xb + xc + xd */ - /* ya' = ya + yb + yc + yd */ - write_q15x2_ia (&pSi0, __SHADD16(R, T)); - - /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ - R = __QSUB16(R, T); - - /* co2 & si2 are read from SIMD Coefficient pointer */ - C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic)); - -#ifndef ARM_MATH_BIG_ENDIAN - /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ - out1 = __SMUAD(C2, R) >> 16U; - /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ - out2 = __SMUSDX(C2, R); -#else - /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ - out1 = __SMUSDX(R, C2) >> 16U; - /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ - out2 = __SMUAD(C2, R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* Reading i0+fftLen/4 */ - /* T = packed(yb, xb) */ - T = read_q15x2 (pSi1); - T = __SHADD16(T, 0); - T = __SHADD16(T, 0); - - /* writing the butterfly processed i0 + fftLen/4 sample */ - /* writing output(xc', yc') in little endian format */ - write_q15x2_ia (&pSi1, (q31_t) __PKHBT( out1, out2, 0 )); - - /* Butterfly calculations */ - /* U = packed(yd, xd) */ - U = read_q15x2 (pSi3); - U = __SHADD16(U, 0); - U = __SHADD16(U, 0); - - /* T = packed(yb-yd, xb-xd) */ - T = __QSUB16(T, U); - -#ifndef ARM_MATH_BIG_ENDIAN - /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ - R = __QASX(S, T); - /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ - S = __QSAX(S, T); -#else - /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ - R = __QSAX(S, T); - /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ - S = __QASX(S, T); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* co1 & si1 are read from SIMD Coefficient pointer */ - C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic)); - /* Butterfly process for the i0+fftLen/2 sample */ - -#ifndef ARM_MATH_BIG_ENDIAN - /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ - out1 = __SMUAD(C1, S) >> 16U; - /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ - out2 = __SMUSDX(C1, S); -#else - /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ - out1 = __SMUSDX(S, C1) >> 16U; - /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ - out2 = __SMUAD(C1, S); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* writing output(xb', yb') in little endian format */ - write_q15x2_ia (&pSi2, __PKHBT( out1, out2, 0 )); - - /* co3 & si3 are read from SIMD Coefficient pointer */ - C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic)); - /* Butterfly process for the i0+3fftLen/4 sample */ - -#ifndef ARM_MATH_BIG_ENDIAN - /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ - out1 = __SMUAD(C3, R) >> 16U; - /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ - out2 = __SMUSDX(C3, R); -#else - /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ - out1 = __SMUSDX(R, C3) >> 16U; - /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ - out2 = __SMUAD(C3, R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* writing output(xd', yd') in little endian format */ - write_q15x2_ia (&pSi3, __PKHBT( out1, out2, 0 )); - - /* Twiddle coefficients index modifier */ - ic = ic + twidCoefModifier; - - } while (--j); - /* data is in 4.11(q11) format */ - - /* end of first stage process */ - - - /* start of middle stage process */ - - /* Twiddle coefficients index modifier */ - twidCoefModifier <<= 2U; - - /* Calculation of Middle stage */ - for (k = fftLen / 4U; k > 4U; k >>= 2U) - { - /* Initializations for the middle stage */ - n1 = n2; - n2 >>= 2U; - ic = 0U; - - for (j = 0U; j <= (n2 - 1U); j++) - { - /* index calculation for the coefficients */ - C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic)); - C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic)); - C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic)); - - /* Twiddle coefficients index modifier */ - ic = ic + twidCoefModifier; - - pSi0 = pSrc16 + 2 * j; - pSi1 = pSi0 + 2 * n2; - pSi2 = pSi1 + 2 * n2; - pSi3 = pSi2 + 2 * n2; - - /* Butterfly implementation */ - for (i0 = j; i0 < fftLen; i0 += n1) - { - /* Reading i0, i0+fftLen/2 inputs */ - /* Read ya (real), xa(imag) input */ - T = read_q15x2 (pSi0); - - /* Read yc (real), xc(imag) input */ - S = read_q15x2 (pSi2); - - /* R = packed( (ya + yc), (xa + xc)) */ - R = __QADD16(T, S); - - /* S = packed((ya - yc), (xa - xc)) */ - S = __QSUB16(T, S); - - /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ - /* Read yb (real), xb(imag) input */ - T = read_q15x2 (pSi1); - - /* Read yd (real), xd(imag) input */ - U = read_q15x2 (pSi3); - - /* T = packed( (yb + yd), (xb + xd)) */ - T = __QADD16(T, U); - - /* writing the butterfly processed i0 sample */ - - /* xa' = xa + xb + xc + xd */ - /* ya' = ya + yb + yc + yd */ - out1 = __SHADD16(R, T); - out1 = __SHADD16(out1, 0); - write_q15x2 (pSi0, out1); - pSi0 += 2 * n1; - - /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ - R = __SHSUB16(R, T); - -#ifndef ARM_MATH_BIG_ENDIAN - /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ - out1 = __SMUAD(C2, R) >> 16U; - - /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ - out2 = __SMUSDX(C2, R); -#else - /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ - out1 = __SMUSDX(R, C2) >> 16U; - - /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ - out2 = __SMUAD(C2, R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* Reading i0+3fftLen/4 */ - /* Read yb (real), xb(imag) input */ - T = read_q15x2 (pSi1); - - /* writing the butterfly processed i0 + fftLen/4 sample */ - /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ - /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ - write_q15x2 (pSi1, __PKHBT( out1, out2, 0 )); - pSi1 += 2 * n1; - - /* Butterfly calculations */ - - /* Read yd (real), xd(imag) input */ - U = read_q15x2 (pSi3); - - /* T = packed(yb-yd, xb-xd) */ - T = __QSUB16(T, U); - -#ifndef ARM_MATH_BIG_ENDIAN - /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ - R = __SHASX(S, T); - - /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ - S = __SHSAX(S, T); - - - /* Butterfly process for the i0+fftLen/2 sample */ - out1 = __SMUAD(C1, S) >> 16U; - out2 = __SMUSDX(C1, S); -#else - /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ - R = __SHSAX(S, T); - - /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ - S = __SHASX(S, T); - - - /* Butterfly process for the i0+fftLen/2 sample */ - out1 = __SMUSDX(S, C1) >> 16U; - out2 = __SMUAD(C1, S); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ - /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ - write_q15x2 (pSi2, __PKHBT( out1, out2, 0 )); - pSi2 += 2 * n1; - - /* Butterfly process for the i0+3fftLen/4 sample */ - -#ifndef ARM_MATH_BIG_ENDIAN - out1 = __SMUAD(C3, R) >> 16U; - out2 = __SMUSDX(C3, R); -#else - out1 = __SMUSDX(R, C3) >> 16U; - out2 = __SMUAD(C3, R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ - /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ - write_q15x2 (pSi3, __PKHBT( out1, out2, 0 )); - pSi3 += 2 * n1; - } - } - /* Twiddle coefficients index modifier */ - twidCoefModifier <<= 2U; - } - /* end of middle stage process */ - - - /* data is in 10.6(q6) format for the 1024 point */ - /* data is in 8.8(q8) format for the 256 point */ - /* data is in 6.10(q10) format for the 64 point */ - /* data is in 4.12(q12) format for the 16 point */ - - /* Initializations for the last stage */ - j = fftLen >> 2; - - ptr1 = &pSrc16[0]; - - /* start of last stage process */ - - /* Butterfly implementation */ - do - { - /* Read xa (real), ya(imag) input */ - xaya = read_q15x2_ia (&ptr1); - - /* Read xb (real), yb(imag) input */ - xbyb = read_q15x2_ia (&ptr1); - - /* Read xc (real), yc(imag) input */ - xcyc = read_q15x2_ia (&ptr1); - - /* Read xd (real), yd(imag) input */ - xdyd = read_q15x2_ia (&ptr1); - - /* R = packed((ya + yc), (xa + xc)) */ - R = __QADD16(xaya, xcyc); - - /* T = packed((yb + yd), (xb + xd)) */ - T = __QADD16(xbyb, xdyd); - - /* pointer updation for writing */ - ptr1 = ptr1 - 8U; - - - /* xa' = xa + xb + xc + xd */ - /* ya' = ya + yb + yc + yd */ - write_q15x2_ia (&ptr1, __SHADD16(R, T)); - - /* T = packed((yb + yd), (xb + xd)) */ - T = __QADD16(xbyb, xdyd); - - /* xc' = (xa-xb+xc-xd) */ - /* yc' = (ya-yb+yc-yd) */ - write_q15x2_ia (&ptr1, __SHSUB16(R, T)); - - /* S = packed((ya - yc), (xa - xc)) */ - S = __QSUB16(xaya, xcyc); - - /* Read yd (real), xd(imag) input */ - /* T = packed( (yb - yd), (xb - xd)) */ - U = __QSUB16(xbyb, xdyd); - -#ifndef ARM_MATH_BIG_ENDIAN - /* xb' = (xa+yb-xc-yd) */ - /* yb' = (ya-xb-yc+xd) */ - write_q15x2_ia (&ptr1, __SHSAX(S, U)); - - /* xd' = (xa-yb-xc+yd) */ - /* yd' = (ya+xb-yc-xd) */ - write_q15x2_ia (&ptr1, __SHASX(S, U)); -#else - /* xb' = (xa+yb-xc-yd) */ - /* yb' = (ya-xb-yc+xd) */ - write_q15x2_ia (&ptr1, __SHASX(S, U)); - - /* xd' = (xa-yb-xc+yd) */ - /* yd' = (ya+xb-yc-xd) */ - write_q15x2_ia (&ptr1, __SHSAX(S, U)); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - } while (--j); - - /* end of last stage process */ - - /* output is in 11.5(q5) format for the 1024 point */ - /* output is in 9.7(q7) format for the 256 point */ - /* output is in 7.9(q9) format for the 64 point */ - /* output is in 5.11(q11) format for the 16 point */ - - -#else /* #if defined (ARM_MATH_DSP) */ - - q15_t R0, R1, S0, S1, T0, T1, U0, U1; - q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2; - uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; - - /* Total process is divided into three stages */ - - /* process first stage, middle stages, & last stage */ - - /* Initializations for the first stage */ - n2 = fftLen; - n1 = n2; - - /* n2 = fftLen/4 */ - n2 >>= 2U; - - /* Index for twiddle coefficient */ - ic = 0U; - - /* Index for input read and output write */ - i0 = 0U; - j = n2; - - /* Input is in 1.15(q15) format */ - - /* start of first stage process */ - do - { - /* Butterfly implementation */ - - /* index calculation for the input as, */ - /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ - i1 = i0 + n2; - i2 = i1 + n2; - i3 = i2 + n2; - - /* Reading i0, i0+fftLen/2 inputs */ - - /* input is down scale by 4 to avoid overflow */ - /* Read ya (real), xa(imag) input */ - T0 = pSrc16[i0 * 2U] >> 2U; - T1 = pSrc16[(i0 * 2U) + 1U] >> 2U; - - /* input is down scale by 4 to avoid overflow */ - /* Read yc (real), xc(imag) input */ - S0 = pSrc16[i2 * 2U] >> 2U; - S1 = pSrc16[(i2 * 2U) + 1U] >> 2U; - - /* R0 = (ya + yc) */ - R0 = __SSAT(T0 + S0, 16U); - /* R1 = (xa + xc) */ - R1 = __SSAT(T1 + S1, 16U); - - /* S0 = (ya - yc) */ - S0 = __SSAT(T0 - S0, 16); - /* S1 = (xa - xc) */ - S1 = __SSAT(T1 - S1, 16); - - /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ - /* input is down scale by 4 to avoid overflow */ - /* Read yb (real), xb(imag) input */ - T0 = pSrc16[i1 * 2U] >> 2U; - T1 = pSrc16[(i1 * 2U) + 1U] >> 2U; - - /* input is down scale by 4 to avoid overflow */ - /* Read yd (real), xd(imag) input */ - U0 = pSrc16[i3 * 2U] >> 2U; - U1 = pSrc16[(i3 * 2U) + 1] >> 2U; - - /* T0 = (yb + yd) */ - T0 = __SSAT(T0 + U0, 16U); - /* T1 = (xb + xd) */ - T1 = __SSAT(T1 + U1, 16U); - - /* writing the butterfly processed i0 sample */ - /* ya' = ya + yb + yc + yd */ - /* xa' = xa + xb + xc + xd */ - pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U); - pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U); - - /* R0 = (ya + yc) - (yb + yd) */ - /* R1 = (xa + xc) - (xb + xd) */ - R0 = __SSAT(R0 - T0, 16U); - R1 = __SSAT(R1 - T1, 16U); - - /* co2 & si2 are read from Coefficient pointer */ - Co2 = pCoef16[2U * ic * 2U]; - Si2 = pCoef16[(2U * ic * 2U) + 1]; - - /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ - out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U); - /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ - out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U); - - /* Reading i0+fftLen/4 */ - /* input is down scale by 4 to avoid overflow */ - /* T0 = yb, T1 = xb */ - T0 = pSrc16[i1 * 2U] >> 2; - T1 = pSrc16[(i1 * 2U) + 1] >> 2; - - /* writing the butterfly processed i0 + fftLen/4 sample */ - /* writing output(xc', yc') in little endian format */ - pSrc16[i1 * 2U] = out1; - pSrc16[(i1 * 2U) + 1] = out2; - - /* Butterfly calculations */ - /* input is down scale by 4 to avoid overflow */ - /* U0 = yd, U1 = xd */ - U0 = pSrc16[i3 * 2U] >> 2; - U1 = pSrc16[(i3 * 2U) + 1] >> 2; - /* T0 = yb-yd */ - T0 = __SSAT(T0 - U0, 16); - /* T1 = xb-xd */ - T1 = __SSAT(T1 - U1, 16); - - /* R1 = (ya-yc) + (xb- xd), R0 = (xa-xc) - (yb-yd)) */ - R0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16); - R1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16); - - /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */ - S0 = (q15_t) __SSAT(((q31_t) S0 + T1), 16U); - S1 = (q15_t) __SSAT(((q31_t) S1 - T0), 16U); - - /* co1 & si1 are read from Coefficient pointer */ - Co1 = pCoef16[ic * 2U]; - Si1 = pCoef16[(ic * 2U) + 1]; - /* Butterfly process for the i0+fftLen/2 sample */ - /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ - out1 = (q15_t) ((Si1 * S1 + Co1 * S0) >> 16); - /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ - out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16); - - /* writing output(xb', yb') in little endian format */ - pSrc16[i2 * 2U] = out1; - pSrc16[(i2 * 2U) + 1] = out2; - - /* Co3 & si3 are read from Coefficient pointer */ - Co3 = pCoef16[3U * (ic * 2U)]; - Si3 = pCoef16[(3U * (ic * 2U)) + 1]; - /* Butterfly process for the i0+3fftLen/4 sample */ - /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */ - out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U); - /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */ - out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U); - /* writing output(xd', yd') in little endian format */ - pSrc16[i3 * 2U] = out1; - pSrc16[(i3 * 2U) + 1] = out2; - - /* Twiddle coefficients index modifier */ - ic = ic + twidCoefModifier; - - /* Updating input index */ - i0 = i0 + 1U; - - } while (--j); - /* data is in 4.11(q11) format */ - - /* end of first stage process */ - - - /* start of middle stage process */ - - /* Twiddle coefficients index modifier */ - twidCoefModifier <<= 2U; - - /* Calculation of Middle stage */ - for (k = fftLen / 4U; k > 4U; k >>= 2U) - { - /* Initializations for the middle stage */ - n1 = n2; - n2 >>= 2U; - ic = 0U; - - for (j = 0U; j <= (n2 - 1U); j++) - { - /* index calculation for the coefficients */ - Co1 = pCoef16[ic * 2U]; - Si1 = pCoef16[(ic * 2U) + 1U]; - Co2 = pCoef16[2U * (ic * 2U)]; - Si2 = pCoef16[(2U * (ic * 2U)) + 1U]; - Co3 = pCoef16[3U * (ic * 2U)]; - Si3 = pCoef16[(3U * (ic * 2U)) + 1U]; - - /* Twiddle coefficients index modifier */ - ic = ic + twidCoefModifier; - - /* Butterfly implementation */ - for (i0 = j; i0 < fftLen; i0 += n1) - { - /* index calculation for the input as, */ - /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ - i1 = i0 + n2; - i2 = i1 + n2; - i3 = i2 + n2; - - /* Reading i0, i0+fftLen/2 inputs */ - /* Read ya (real), xa(imag) input */ - T0 = pSrc16[i0 * 2U]; - T1 = pSrc16[(i0 * 2U) + 1U]; - - /* Read yc (real), xc(imag) input */ - S0 = pSrc16[i2 * 2U]; - S1 = pSrc16[(i2 * 2U) + 1U]; - - /* R0 = (ya + yc), R1 = (xa + xc) */ - R0 = __SSAT(T0 + S0, 16); - R1 = __SSAT(T1 + S1, 16); - - /* S0 = (ya - yc), S1 =(xa - xc) */ - S0 = __SSAT(T0 - S0, 16); - S1 = __SSAT(T1 - S1, 16); - - /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ - /* Read yb (real), xb(imag) input */ - T0 = pSrc16[i1 * 2U]; - T1 = pSrc16[(i1 * 2U) + 1U]; - - /* Read yd (real), xd(imag) input */ - U0 = pSrc16[i3 * 2U]; - U1 = pSrc16[(i3 * 2U) + 1U]; - - - /* T0 = (yb + yd), T1 = (xb + xd) */ - T0 = __SSAT(T0 + U0, 16); - T1 = __SSAT(T1 + U1, 16); - - /* writing the butterfly processed i0 sample */ - - /* xa' = xa + xb + xc + xd */ - /* ya' = ya + yb + yc + yd */ - out1 = ((R0 >> 1U) + (T0 >> 1U)) >> 1U; - out2 = ((R1 >> 1U) + (T1 >> 1U)) >> 1U; - - pSrc16[i0 * 2U] = out1; - pSrc16[(2U * i0) + 1U] = out2; - - /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ - R0 = (R0 >> 1U) - (T0 >> 1U); - R1 = (R1 >> 1U) - (T1 >> 1U); - - /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ - out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U); - - /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ - out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U); - - /* Reading i0+3fftLen/4 */ - /* Read yb (real), xb(imag) input */ - T0 = pSrc16[i1 * 2U]; - T1 = pSrc16[(i1 * 2U) + 1U]; - - /* writing the butterfly processed i0 + fftLen/4 sample */ - /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ - /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ - pSrc16[i1 * 2U] = out1; - pSrc16[(i1 * 2U) + 1U] = out2; - - /* Butterfly calculations */ - - /* Read yd (real), xd(imag) input */ - U0 = pSrc16[i3 * 2U]; - U1 = pSrc16[(i3 * 2U) + 1U]; - - /* T0 = yb-yd, T1 = xb-xd */ - T0 = __SSAT(T0 - U0, 16); - T1 = __SSAT(T1 - U1, 16); - - /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */ - R0 = (S0 >> 1U) - (T1 >> 1U); - R1 = (S1 >> 1U) + (T0 >> 1U); - - /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */ - S0 = (S0 >> 1U) + (T1 >> 1U); - S1 = (S1 >> 1U) - (T0 >> 1U); - - /* Butterfly process for the i0+fftLen/2 sample */ - out1 = (q15_t) ((Co1 * S0 + Si1 * S1) >> 16U); - - out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16U); - - /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ - /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ - pSrc16[i2 * 2U] = out1; - pSrc16[(i2 * 2U) + 1U] = out2; - - /* Butterfly process for the i0+3fftLen/4 sample */ - out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U); - - out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U); - /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */ - /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */ - pSrc16[i3 * 2U] = out1; - pSrc16[(i3 * 2U) + 1U] = out2; - } - } - /* Twiddle coefficients index modifier */ - twidCoefModifier <<= 2U; - } - /* end of middle stage process */ - - - /* data is in 10.6(q6) format for the 1024 point */ - /* data is in 8.8(q8) format for the 256 point */ - /* data is in 6.10(q10) format for the 64 point */ - /* data is in 4.12(q12) format for the 16 point */ - - /* Initializations for the last stage */ - n1 = n2; - n2 >>= 2U; - - /* start of last stage process */ - - /* Butterfly implementation */ - for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1) - { - /* index calculation for the input as, */ - /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ - i1 = i0 + n2; - i2 = i1 + n2; - i3 = i2 + n2; - - /* Reading i0, i0+fftLen/2 inputs */ - /* Read ya (real), xa(imag) input */ - T0 = pSrc16[i0 * 2U]; - T1 = pSrc16[(i0 * 2U) + 1U]; - - /* Read yc (real), xc(imag) input */ - S0 = pSrc16[i2 * 2U]; - S1 = pSrc16[(i2 * 2U) + 1U]; - - /* R0 = (ya + yc), R1 = (xa + xc) */ - R0 = __SSAT(T0 + S0, 16U); - R1 = __SSAT(T1 + S1, 16U); - - /* S0 = (ya - yc), S1 = (xa - xc) */ - S0 = __SSAT(T0 - S0, 16U); - S1 = __SSAT(T1 - S1, 16U); - - /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ - /* Read yb (real), xb(imag) input */ - T0 = pSrc16[i1 * 2U]; - T1 = pSrc16[(i1 * 2U) + 1U]; - /* Read yd (real), xd(imag) input */ - U0 = pSrc16[i3 * 2U]; - U1 = pSrc16[(i3 * 2U) + 1U]; - - /* T0 = (yb + yd), T1 = (xb + xd)) */ - T0 = __SSAT(T0 + U0, 16U); - T1 = __SSAT(T1 + U1, 16U); - - /* writing the butterfly processed i0 sample */ - /* xa' = xa + xb + xc + xd */ - /* ya' = ya + yb + yc + yd */ - pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U); - pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U); - - /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ - R0 = (R0 >> 1U) - (T0 >> 1U); - R1 = (R1 >> 1U) - (T1 >> 1U); - /* Read yb (real), xb(imag) input */ - T0 = pSrc16[i1 * 2U]; - T1 = pSrc16[(i1 * 2U) + 1U]; - - /* writing the butterfly processed i0 + fftLen/4 sample */ - /* xc' = (xa-xb+xc-xd) */ - /* yc' = (ya-yb+yc-yd) */ - pSrc16[i1 * 2U] = R0; - pSrc16[(i1 * 2U) + 1U] = R1; - - /* Read yd (real), xd(imag) input */ - U0 = pSrc16[i3 * 2U]; - U1 = pSrc16[(i3 * 2U) + 1U]; - /* T0 = (yb - yd), T1 = (xb - xd) */ - T0 = __SSAT(T0 - U0, 16U); - T1 = __SSAT(T1 - U1, 16U); - - /* writing the butterfly processed i0 + fftLen/2 sample */ - /* xb' = (xa+yb-xc-yd) */ - /* yb' = (ya-xb-yc+xd) */ - pSrc16[i2 * 2U] = (S0 >> 1U) + (T1 >> 1U); - pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U); - - /* writing the butterfly processed i0 + 3fftLen/4 sample */ - /* xd' = (xa-yb-xc+yd) */ - /* yd' = (ya+xb-yc-xd) */ - pSrc16[i3 * 2U] = (S0 >> 1U) - (T1 >> 1U); - pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U); - - } - - /* end of last stage process */ - - /* output is in 11.5(q5) format for the 1024 point */ - /* output is in 9.7(q7) format for the 256 point */ - /* output is in 7.9(q9) format for the 64 point */ - /* output is in 5.11(q11) format for the 16 point */ - -#endif /* #if defined (ARM_MATH_DSP) */ - -} - - -/** - @brief Core function for the Q15 CIFFT butterfly process. - @param[in,out] pSrc16 points to the in-place buffer of Q15 data type - @param[in] fftLen length of the FFT - @param[in] pCoef16 points to twiddle coefficient buffer - @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. - @return none - */ - -/* - * Radix-4 IFFT algorithm used is : - * - * CIFFT uses same twiddle coefficients as CFFT function - * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4] - * - * - * IFFT is implemented with following changes in equations from FFT - * - * Input real and imaginary data: - * x(n) = xa + j * ya - * x(n+N/4 ) = xb + j * yb - * x(n+N/2 ) = xc + j * yc - * x(n+3N 4) = xd + j * yd - * - * - * Output real and imaginary data: - * x(4r) = xa'+ j * ya' - * x(4r+1) = xb'+ j * yb' - * x(4r+2) = xc'+ j * yc' - * x(4r+3) = xd'+ j * yd' - * - * - * Twiddle factors for radix-4 IFFT: - * Wn = co1 + j * (si1) - * W2n = co2 + j * (si2) - * W3n = co3 + j * (si3) - - * The real and imaginary output values for the radix-4 butterfly are - * xa' = xa + xb + xc + xd - * ya' = ya + yb + yc + yd - * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) - * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) - * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) - * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) - * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) - * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) - * - */ - -void arm_radix4_butterfly_inverse_q15( - q15_t * pSrc16, - uint32_t fftLen, - const q15_t * pCoef16, - uint32_t twidCoefModifier) -{ - -#if defined (ARM_MATH_DSP) - - q31_t R, S, T, U; - q31_t C1, C2, C3, out1, out2; - uint32_t n1, n2, ic, i0, j, k; - - q15_t *ptr1; - q15_t *pSi0; - q15_t *pSi1; - q15_t *pSi2; - q15_t *pSi3; - - q31_t xaya, xbyb, xcyc, xdyd; - - /* Total process is divided into three stages */ - - /* process first stage, middle stages, & last stage */ - - /* Initializations for the first stage */ - n2 = fftLen; - n1 = n2; - - /* n2 = fftLen/4 */ - n2 >>= 2U; - - /* Index for twiddle coefficient */ - ic = 0U; - - /* Index for input read and output write */ - j = n2; - - pSi0 = pSrc16; - pSi1 = pSi0 + 2 * n2; - pSi2 = pSi1 + 2 * n2; - pSi3 = pSi2 + 2 * n2; - - /* Input is in 1.15(q15) format */ - - /* start of first stage process */ - do - { - /* Butterfly implementation */ - - /* Reading i0, i0+fftLen/2 inputs */ - /* Read ya (real), xa(imag) input */ - T = read_q15x2 (pSi0); - T = __SHADD16(T, 0); - T = __SHADD16(T, 0); - - /* Read yc (real), xc(imag) input */ - S = read_q15x2 (pSi2); - S = __SHADD16(S, 0); - S = __SHADD16(S, 0); - - /* R = packed((ya + yc), (xa + xc) ) */ - R = __QADD16(T, S); - - /* S = packed((ya - yc), (xa - xc) ) */ - S = __QSUB16(T, S); - - /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ - /* Read yb (real), xb(imag) input */ - T = read_q15x2 (pSi1); - T = __SHADD16(T, 0); - T = __SHADD16(T, 0); - - /* Read yd (real), xd(imag) input */ - U = read_q15x2 (pSi3); - U = __SHADD16(U, 0); - U = __SHADD16(U, 0); - - /* T = packed((yb + yd), (xb + xd) ) */ - T = __QADD16(T, U); - - /* writing the butterfly processed i0 sample */ - /* xa' = xa + xb + xc + xd */ - /* ya' = ya + yb + yc + yd */ - write_q15x2_ia (&pSi0, __SHADD16(R, T)); - - /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ - R = __QSUB16(R, T); - - /* co2 & si2 are read from SIMD Coefficient pointer */ - C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic)); - -#ifndef ARM_MATH_BIG_ENDIAN - /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ - out1 = __SMUSD(C2, R) >> 16U; - /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ - out2 = __SMUADX(C2, R); -#else - /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ - out1 = __SMUADX(C2, R) >> 16U; - /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ - out2 = __SMUSD(__QSUB16(0, C2), R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* Reading i0+fftLen/4 */ - /* T = packed(yb, xb) */ - T = read_q15x2 (pSi1); - T = __SHADD16(T, 0); - T = __SHADD16(T, 0); - - /* writing the butterfly processed i0 + fftLen/4 sample */ - /* writing output(xc', yc') in little endian format */ - write_q15x2_ia (&pSi1, (q31_t) __PKHBT( out1, out2, 0 )); - - /* Butterfly calculations */ - /* U = packed(yd, xd) */ - U = read_q15x2 (pSi3); - U = __SHADD16(U, 0); - U = __SHADD16(U, 0); - - /* T = packed(yb-yd, xb-xd) */ - T = __QSUB16(T, U); - -#ifndef ARM_MATH_BIG_ENDIAN - /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ - R = __QSAX(S, T); - /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */ - S = __QASX(S, T); -#else - /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ - R = __QASX(S, T); - /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ - S = __QSAX(S, T); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* co1 & si1 are read from SIMD Coefficient pointer */ - C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic)); - /* Butterfly process for the i0+fftLen/2 sample */ - -#ifndef ARM_MATH_BIG_ENDIAN - /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ - out1 = __SMUSD(C1, S) >> 16U; - /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ - out2 = __SMUADX(C1, S); -#else - /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ - out1 = __SMUADX(C1, S) >> 16U; - /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ - out2 = __SMUSD(__QSUB16(0, C1), S); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* writing output(xb', yb') in little endian format */ - write_q15x2_ia (&pSi2, __PKHBT( out1, out2, 0 )); - - /* co3 & si3 are read from SIMD Coefficient pointer */ - C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic)); - /* Butterfly process for the i0+3fftLen/4 sample */ - -#ifndef ARM_MATH_BIG_ENDIAN - /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ - out1 = __SMUSD(C3, R) >> 16U; - /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ - out2 = __SMUADX(C3, R); -#else - /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ - out1 = __SMUADX(C3, R) >> 16U; - /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ - out2 = __SMUSD(__QSUB16(0, C3), R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* writing output(xd', yd') in little endian format */ - write_q15x2_ia (&pSi3, __PKHBT( out1, out2, 0 )); - - /* Twiddle coefficients index modifier */ - ic = ic + twidCoefModifier; - - } while (--j); - /* data is in 4.11(q11) format */ - - /* end of first stage process */ - - - /* start of middle stage process */ - - /* Twiddle coefficients index modifier */ - twidCoefModifier <<= 2U; - - /* Calculation of Middle stage */ - for (k = fftLen / 4U; k > 4U; k >>= 2U) - { - /* Initializations for the middle stage */ - n1 = n2; - n2 >>= 2U; - ic = 0U; - - for (j = 0U; j <= (n2 - 1U); j++) - { - /* index calculation for the coefficients */ - C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic)); - C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic)); - C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic)); - - /* Twiddle coefficients index modifier */ - ic = ic + twidCoefModifier; - - pSi0 = pSrc16 + 2 * j; - pSi1 = pSi0 + 2 * n2; - pSi2 = pSi1 + 2 * n2; - pSi3 = pSi2 + 2 * n2; - - /* Butterfly implementation */ - for (i0 = j; i0 < fftLen; i0 += n1) - { - /* Reading i0, i0+fftLen/2 inputs */ - /* Read ya (real), xa(imag) input */ - T = read_q15x2 (pSi0); - - /* Read yc (real), xc(imag) input */ - S = read_q15x2 (pSi2); - - /* R = packed( (ya + yc), (xa + xc)) */ - R = __QADD16(T, S); - - /* S = packed((ya - yc), (xa - xc)) */ - S = __QSUB16(T, S); - - /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ - /* Read yb (real), xb(imag) input */ - T = read_q15x2 (pSi1); - - /* Read yd (real), xd(imag) input */ - U = read_q15x2 (pSi3); - - /* T = packed( (yb + yd), (xb + xd)) */ - T = __QADD16(T, U); - - /* writing the butterfly processed i0 sample */ - - /* xa' = xa + xb + xc + xd */ - /* ya' = ya + yb + yc + yd */ - out1 = __SHADD16(R, T); - out1 = __SHADD16(out1, 0); - write_q15x2 (pSi0, out1); - pSi0 += 2 * n1; - - /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ - R = __SHSUB16(R, T); - -#ifndef ARM_MATH_BIG_ENDIAN - /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ - out1 = __SMUSD(C2, R) >> 16U; - - /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ - out2 = __SMUADX(C2, R); -#else - /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ - out1 = __SMUADX(R, C2) >> 16U; - - /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ - out2 = __SMUSD(__QSUB16(0, C2), R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* Reading i0+3fftLen/4 */ - /* Read yb (real), xb(imag) input */ - T = read_q15x2 (pSi1); - - /* writing the butterfly processed i0 + fftLen/4 sample */ - /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ - /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ - write_q15x2 (pSi1, __PKHBT( out1, out2, 0 )); - pSi1 += 2 * n1; - - /* Butterfly calculations */ - - /* Read yd (real), xd(imag) input */ - U = read_q15x2 (pSi3); - - /* T = packed(yb-yd, xb-xd) */ - T = __QSUB16(T, U); - -#ifndef ARM_MATH_BIG_ENDIAN - /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ - R = __SHSAX(S, T); - - /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ - S = __SHASX(S, T); - - /* Butterfly process for the i0+fftLen/2 sample */ - out1 = __SMUSD(C1, S) >> 16U; - out2 = __SMUADX(C1, S); -#else - /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ - R = __SHASX(S, T); - - /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */ - S = __SHSAX(S, T); - - /* Butterfly process for the i0+fftLen/2 sample */ - out1 = __SMUADX(S, C1) >> 16U; - out2 = __SMUSD(__QSUB16(0, C1), S); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ - /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ - write_q15x2 (pSi2, __PKHBT( out1, out2, 0 )); - pSi2 += 2 * n1; - - /* Butterfly process for the i0+3fftLen/4 sample */ - -#ifndef ARM_MATH_BIG_ENDIAN - out1 = __SMUSD(C3, R) >> 16U; - out2 = __SMUADX(C3, R); -#else - out1 = __SMUADX(C3, R) >> 16U; - out2 = __SMUSD(__QSUB16(0, C3), R); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ - /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ - write_q15x2 (pSi3, __PKHBT( out1, out2, 0 )); - pSi3 += 2 * n1; - } - } - /* Twiddle coefficients index modifier */ - twidCoefModifier <<= 2U; - } - /* end of middle stage process */ - - /* data is in 10.6(q6) format for the 1024 point */ - /* data is in 8.8(q8) format for the 256 point */ - /* data is in 6.10(q10) format for the 64 point */ - /* data is in 4.12(q12) format for the 16 point */ - - /* Initializations for the last stage */ - j = fftLen >> 2; - - ptr1 = &pSrc16[0]; - - /* start of last stage process */ - - /* Butterfly implementation */ - do - { - /* Read xa (real), ya(imag) input */ - xaya = read_q15x2_ia (&ptr1); - - /* Read xb (real), yb(imag) input */ - xbyb = read_q15x2_ia (&ptr1); - - /* Read xc (real), yc(imag) input */ - xcyc = read_q15x2_ia (&ptr1); - - /* Read xd (real), yd(imag) input */ - xdyd = read_q15x2_ia (&ptr1); - - /* R = packed((ya + yc), (xa + xc)) */ - R = __QADD16(xaya, xcyc); - - /* T = packed((yb + yd), (xb + xd)) */ - T = __QADD16(xbyb, xdyd); - - /* pointer updation for writing */ - ptr1 = ptr1 - 8U; - - - /* xa' = xa + xb + xc + xd */ - /* ya' = ya + yb + yc + yd */ - write_q15x2_ia (&ptr1, __SHADD16(R, T)); - - /* T = packed((yb + yd), (xb + xd)) */ - T = __QADD16(xbyb, xdyd); - - /* xc' = (xa-xb+xc-xd) */ - /* yc' = (ya-yb+yc-yd) */ - write_q15x2_ia (&ptr1, __SHSUB16(R, T)); - - /* S = packed((ya - yc), (xa - xc)) */ - S = __QSUB16(xaya, xcyc); - - /* Read yd (real), xd(imag) input */ - /* T = packed( (yb - yd), (xb - xd)) */ - U = __QSUB16(xbyb, xdyd); - -#ifndef ARM_MATH_BIG_ENDIAN - /* xb' = (xa+yb-xc-yd) */ - /* yb' = (ya-xb-yc+xd) */ - write_q15x2_ia (&ptr1, __SHASX(S, U)); - - /* xd' = (xa-yb-xc+yd) */ - /* yd' = (ya+xb-yc-xd) */ - write_q15x2_ia (&ptr1, __SHSAX(S, U)); -#else - /* xb' = (xa+yb-xc-yd) */ - /* yb' = (ya-xb-yc+xd) */ - write_q15x2_ia (&ptr1, __SHSAX(S, U)); - - /* xd' = (xa-yb-xc+yd) */ - /* yd' = (ya+xb-yc-xd) */ - write_q15x2_ia (&ptr1, __SHASX(S, U)); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - } while (--j); - - /* end of last stage process */ - - /* output is in 11.5(q5) format for the 1024 point */ - /* output is in 9.7(q7) format for the 256 point */ - /* output is in 7.9(q9) format for the 64 point */ - /* output is in 5.11(q11) format for the 16 point */ - - -#else /* arm_radix4_butterfly_inverse_q15 */ - - q15_t R0, R1, S0, S1, T0, T1, U0, U1; - q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2; - uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; - - /* Total process is divided into three stages */ - - /* process first stage, middle stages, & last stage */ - - /* Initializations for the first stage */ - n2 = fftLen; - n1 = n2; - - /* n2 = fftLen/4 */ - n2 >>= 2U; - - /* Index for twiddle coefficient */ - ic = 0U; - - /* Index for input read and output write */ - i0 = 0U; - - j = n2; - - /* Input is in 1.15(q15) format */ - - /* Start of first stage process */ - do - { - /* Butterfly implementation */ - - /* index calculation for the input as, */ - /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ - i1 = i0 + n2; - i2 = i1 + n2; - i3 = i2 + n2; - - /* Reading i0, i0+fftLen/2 inputs */ - /* input is down scale by 4 to avoid overflow */ - /* Read ya (real), xa(imag) input */ - T0 = pSrc16[i0 * 2U] >> 2U; - T1 = pSrc16[(i0 * 2U) + 1U] >> 2U; - /* input is down scale by 4 to avoid overflow */ - /* Read yc (real), xc(imag) input */ - S0 = pSrc16[i2 * 2U] >> 2U; - S1 = pSrc16[(i2 * 2U) + 1U] >> 2U; - - /* R0 = (ya + yc), R1 = (xa + xc) */ - R0 = __SSAT(T0 + S0, 16U); - R1 = __SSAT(T1 + S1, 16U); - /* S0 = (ya - yc), S1 = (xa - xc) */ - S0 = __SSAT(T0 - S0, 16U); - S1 = __SSAT(T1 - S1, 16U); - - /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ - /* input is down scale by 4 to avoid overflow */ - /* Read yb (real), xb(imag) input */ - T0 = pSrc16[i1 * 2U] >> 2U; - T1 = pSrc16[(i1 * 2U) + 1U] >> 2U; - /* Read yd (real), xd(imag) input */ - /* input is down scale by 4 to avoid overflow */ - U0 = pSrc16[i3 * 2U] >> 2U; - U1 = pSrc16[(i3 * 2U) + 1U] >> 2U; - - /* T0 = (yb + yd), T1 = (xb + xd) */ - T0 = __SSAT(T0 + U0, 16U); - T1 = __SSAT(T1 + U1, 16U); - - /* writing the butterfly processed i0 sample */ - /* xa' = xa + xb + xc + xd */ - /* ya' = ya + yb + yc + yd */ - pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U); - pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U); - - /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */ - R0 = __SSAT(R0 - T0, 16U); - R1 = __SSAT(R1 - T1, 16U); - /* co2 & si2 are read from Coefficient pointer */ - Co2 = pCoef16[2U * ic * 2U]; - Si2 = pCoef16[(2U * ic * 2U) + 1U]; - /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ - out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16U); - /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ - out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16U); - - /* Reading i0+fftLen/4 */ - /* input is down scale by 4 to avoid overflow */ - /* T0 = yb, T1 = xb */ - T0 = pSrc16[i1 * 2U] >> 2U; - T1 = pSrc16[(i1 * 2U) + 1U] >> 2U; - - /* writing the butterfly processed i0 + fftLen/4 sample */ - /* writing output(xc', yc') in little endian format */ - pSrc16[i1 * 2U] = out1; - pSrc16[(i1 * 2U) + 1U] = out2; - - /* Butterfly calculations */ - /* input is down scale by 4 to avoid overflow */ - /* U0 = yd, U1 = xd) */ - U0 = pSrc16[i3 * 2U] >> 2U; - U1 = pSrc16[(i3 * 2U) + 1U] >> 2U; - - /* T0 = yb-yd, T1 = xb-xd) */ - T0 = __SSAT(T0 - U0, 16U); - T1 = __SSAT(T1 - U1, 16U); - /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */ - R0 = (q15_t) __SSAT((q31_t) (S0 + T1), 16); - R1 = (q15_t) __SSAT((q31_t) (S1 - T0), 16); - /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */ - S0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16); - S1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16); - - /* co1 & si1 are read from Coefficient pointer */ - Co1 = pCoef16[ic * 2U]; - Si1 = pCoef16[(ic * 2U) + 1U]; - /* Butterfly process for the i0+fftLen/2 sample */ - /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ - out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U); - /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ - out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U); - /* writing output(xb', yb') in little endian format */ - pSrc16[i2 * 2U] = out1; - pSrc16[(i2 * 2U) + 1U] = out2; - - /* Co3 & si3 are read from Coefficient pointer */ - Co3 = pCoef16[3U * ic * 2U]; - Si3 = pCoef16[(3U * ic * 2U) + 1U]; - /* Butterfly process for the i0+3fftLen/4 sample */ - /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */ - out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U); - /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */ - out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U); - /* writing output(xd', yd') in little endian format */ - pSrc16[i3 * 2U] = out1; - pSrc16[(i3 * 2U) + 1U] = out2; - - /* Twiddle coefficients index modifier */ - ic = ic + twidCoefModifier; - - /* Updating input index */ - i0 = i0 + 1U; - - } while (--j); - - /* End of first stage process */ - - /* data is in 4.11(q11) format */ - - - /* Start of Middle stage process */ - - /* Twiddle coefficients index modifier */ - twidCoefModifier <<= 2U; - - /* Calculation of Middle stage */ - for (k = fftLen / 4U; k > 4U; k >>= 2U) - { - /* Initializations for the middle stage */ - n1 = n2; - n2 >>= 2U; - ic = 0U; - - for (j = 0U; j <= (n2 - 1U); j++) - { - /* index calculation for the coefficients */ - Co1 = pCoef16[ic * 2U]; - Si1 = pCoef16[(ic * 2U) + 1U]; - Co2 = pCoef16[2U * ic * 2U]; - Si2 = pCoef16[2U * ic * 2U + 1U]; - Co3 = pCoef16[3U * ic * 2U]; - Si3 = pCoef16[(3U * ic * 2U) + 1U]; - - /* Twiddle coefficients index modifier */ - ic = ic + twidCoefModifier; - - /* Butterfly implementation */ - for (i0 = j; i0 < fftLen; i0 += n1) - { - /* index calculation for the input as, */ - /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ - i1 = i0 + n2; - i2 = i1 + n2; - i3 = i2 + n2; - - /* Reading i0, i0+fftLen/2 inputs */ - /* Read ya (real), xa(imag) input */ - T0 = pSrc16[i0 * 2U]; - T1 = pSrc16[(i0 * 2U) + 1U]; - - /* Read yc (real), xc(imag) input */ - S0 = pSrc16[i2 * 2U]; - S1 = pSrc16[(i2 * 2U) + 1U]; - - - /* R0 = (ya + yc), R1 = (xa + xc) */ - R0 = __SSAT(T0 + S0, 16U); - R1 = __SSAT(T1 + S1, 16U); - /* S0 = (ya - yc), S1 = (xa - xc) */ - S0 = __SSAT(T0 - S0, 16U); - S1 = __SSAT(T1 - S1, 16U); - - /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ - /* Read yb (real), xb(imag) input */ - T0 = pSrc16[i1 * 2U]; - T1 = pSrc16[(i1 * 2U) + 1U]; - - /* Read yd (real), xd(imag) input */ - U0 = pSrc16[i3 * 2U]; - U1 = pSrc16[(i3 * 2U) + 1U]; - - /* T0 = (yb + yd), T1 = (xb + xd) */ - T0 = __SSAT(T0 + U0, 16U); - T1 = __SSAT(T1 + U1, 16U); - - /* writing the butterfly processed i0 sample */ - /* xa' = xa + xb + xc + xd */ - /* ya' = ya + yb + yc + yd */ - pSrc16[i0 * 2U] = ((R0 >> 1U) + (T0 >> 1U)) >> 1U; - pSrc16[(i0 * 2U) + 1U] = ((R1 >> 1U) + (T1 >> 1U)) >> 1U; - - /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ - R0 = (R0 >> 1U) - (T0 >> 1U); - R1 = (R1 >> 1U) - (T1 >> 1U); - - /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */ - out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16); - /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ - out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16); - - /* Reading i0+3fftLen/4 */ - /* Read yb (real), xb(imag) input */ - T0 = pSrc16[i1 * 2U]; - T1 = pSrc16[(i1 * 2U) + 1U]; - - /* writing the butterfly processed i0 + fftLen/4 sample */ - /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ - /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ - pSrc16[i1 * 2U] = out1; - pSrc16[(i1 * 2U) + 1U] = out2; - - /* Butterfly calculations */ - /* Read yd (real), xd(imag) input */ - U0 = pSrc16[i3 * 2U]; - U1 = pSrc16[(i3 * 2U) + 1U]; - - /* T0 = yb-yd, T1 = xb-xd) */ - T0 = __SSAT(T0 - U0, 16U); - T1 = __SSAT(T1 - U1, 16U); - - /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */ - R0 = (S0 >> 1U) + (T1 >> 1U); - R1 = (S1 >> 1U) - (T0 >> 1U); - - /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */ - S0 = (S0 >> 1U) - (T1 >> 1U); - S1 = (S1 >> 1U) + (T0 >> 1U); - - /* Butterfly process for the i0+fftLen/2 sample */ - out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U); - out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U); - /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ - /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ - pSrc16[i2 * 2U] = out1; - pSrc16[(i2 * 2U) + 1U] = out2; - - /* Butterfly process for the i0+3fftLen/4 sample */ - out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U); - - out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U); - /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */ - /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */ - pSrc16[i3 * 2U] = out1; - pSrc16[(i3 * 2U) + 1U] = out2; - - - } - } - /* Twiddle coefficients index modifier */ - twidCoefModifier <<= 2U; - } - /* End of Middle stages process */ - - - /* data is in 10.6(q6) format for the 1024 point */ - /* data is in 8.8(q8) format for the 256 point */ - /* data is in 6.10(q10) format for the 64 point */ - /* data is in 4.12(q12) format for the 16 point */ - - /* start of last stage process */ - - - /* Initializations for the last stage */ - n1 = n2; - n2 >>= 2U; - - /* Butterfly implementation */ - for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1) - { - /* index calculation for the input as, */ - /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */ - i1 = i0 + n2; - i2 = i1 + n2; - i3 = i2 + n2; - - /* Reading i0, i0+fftLen/2 inputs */ - /* Read ya (real), xa(imag) input */ - T0 = pSrc16[i0 * 2U]; - T1 = pSrc16[(i0 * 2U) + 1U]; - /* Read yc (real), xc(imag) input */ - S0 = pSrc16[i2 * 2U]; - S1 = pSrc16[(i2 * 2U) + 1U]; - - /* R0 = (ya + yc), R1 = (xa + xc) */ - R0 = __SSAT(T0 + S0, 16U); - R1 = __SSAT(T1 + S1, 16U); - /* S0 = (ya - yc), S1 = (xa - xc) */ - S0 = __SSAT(T0 - S0, 16U); - S1 = __SSAT(T1 - S1, 16U); - - /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ - /* Read yb (real), xb(imag) input */ - T0 = pSrc16[i1 * 2U]; - T1 = pSrc16[(i1 * 2U) + 1U]; - /* Read yd (real), xd(imag) input */ - U0 = pSrc16[i3 * 2U]; - U1 = pSrc16[(i3 * 2U) + 1U]; - - /* T0 = (yb + yd), T1 = (xb + xd) */ - T0 = __SSAT(T0 + U0, 16U); - T1 = __SSAT(T1 + U1, 16U); - - /* writing the butterfly processed i0 sample */ - /* xa' = xa + xb + xc + xd */ - /* ya' = ya + yb + yc + yd */ - pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U); - pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U); - - /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */ - R0 = (R0 >> 1U) - (T0 >> 1U); - R1 = (R1 >> 1U) - (T1 >> 1U); - - /* Read yb (real), xb(imag) input */ - T0 = pSrc16[i1 * 2U]; - T1 = pSrc16[(i1 * 2U) + 1U]; - - /* writing the butterfly processed i0 + fftLen/4 sample */ - /* xc' = (xa-xb+xc-xd) */ - /* yc' = (ya-yb+yc-yd) */ - pSrc16[i1 * 2U] = R0; - pSrc16[(i1 * 2U) + 1U] = R1; - - /* Read yd (real), xd(imag) input */ - U0 = pSrc16[i3 * 2U]; - U1 = pSrc16[(i3 * 2U) + 1U]; - /* T0 = (yb - yd), T1 = (xb - xd) */ - T0 = __SSAT(T0 - U0, 16U); - T1 = __SSAT(T1 - U1, 16U); - - /* writing the butterfly processed i0 + fftLen/2 sample */ - /* xb' = (xa-yb-xc+yd) */ - /* yb' = (ya+xb-yc-xd) */ - pSrc16[i2 * 2U] = (S0 >> 1U) - (T1 >> 1U); - pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U); - - - /* writing the butterfly processed i0 + 3fftLen/4 sample */ - /* xd' = (xa+yb-xc-yd) */ - /* yd' = (ya-xb-yc+xd) */ - pSrc16[i3 * 2U] = (S0 >> 1U) + (T1 >> 1U); - pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U); - } - /* end of last stage process */ - - /* output is in 11.5(q5) format for the 1024 point */ - /* output is in 9.7(q7) format for the 256 point */ - /* output is in 7.9(q9) format for the 64 point */ - /* output is in 5.11(q11) format for the 16 point */ - -#endif /* #if defined (ARM_MATH_DSP) */ - -} +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cfft_radix4_q15.c
+ * Description: This file has function definition of Radix-4 FFT & IFFT function and
+ * In-place bit reversal using bit reversal table
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+
+void arm_radix4_butterfly_q15(
+ q15_t * pSrc16,
+ uint32_t fftLen,
+ const q15_t * pCoef16,
+ uint32_t twidCoefModifier);
+
+void arm_radix4_butterfly_inverse_q15(
+ q15_t * pSrc16,
+ uint32_t fftLen,
+ const q15_t * pCoef16,
+ uint32_t twidCoefModifier);
+
+void arm_bitreversal_q15(
+ q15_t * pSrc,
+ uint32_t fftLen,
+ uint16_t bitRevFactor,
+ const uint16_t * pBitRevTab);
+
+/**
+ @ingroup groupTransforms
+ */
+
+/**
+ @addtogroup ComplexFFT
+ @{
+ */
+
+
+/**
+ @brief Processing function for the Q15 CFFT/CIFFT.
+ @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed in the future.
+ @param[in] S points to an instance of the Q15 CFFT/CIFFT structure.
+ @param[in,out] pSrc points to the complex data buffer. Processing occurs in-place.
+ @return none
+
+ @par Input and output formats:
+ Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
+ Hence the output format is different for different FFT sizes.
+ The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
+ @par
+ \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT"
+ \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT"
+ */
+
+void arm_cfft_radix4_q15(
+ const arm_cfft_radix4_instance_q15 * S,
+ q15_t * pSrc)
+{
+ if (S->ifftFlag == 1U)
+ {
+ /* Complex IFFT radix-4 */
+ arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
+ }
+ else
+ {
+ /* Complex FFT radix-4 */
+ arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
+ }
+
+ if (S->bitReverseFlag == 1U)
+ {
+ /* Bit Reversal */
+ arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
+ }
+
+}
+
+/**
+ @} end of ComplexFFT group
+ */
+
+/*
+ * Radix-4 FFT algorithm used is :
+ *
+ * Input real and imaginary data:
+ * x(n) = xa + j * ya
+ * x(n+N/4 ) = xb + j * yb
+ * x(n+N/2 ) = xc + j * yc
+ * x(n+3N 4) = xd + j * yd
+ *
+ *
+ * Output real and imaginary data:
+ * x(4r) = xa'+ j * ya'
+ * x(4r+1) = xb'+ j * yb'
+ * x(4r+2) = xc'+ j * yc'
+ * x(4r+3) = xd'+ j * yd'
+ *
+ *
+ * Twiddle factors for radix-4 FFT:
+ * Wn = co1 + j * (- si1)
+ * W2n = co2 + j * (- si2)
+ * W3n = co3 + j * (- si3)
+
+ * The real and imaginary output values for the radix-4 butterfly are
+ * xa' = xa + xb + xc + xd
+ * ya' = ya + yb + yc + yd
+ * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
+ * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
+ * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
+ * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
+ * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
+ * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
+ *
+ */
+
+/**
+ @brief Core function for the Q15 CFFT butterfly process.
+ @param[in,out] pSrc16 points to the in-place buffer of Q15 data type
+ @param[in] fftLen length of the FFT
+ @param[in] pCoef16 points to twiddle coefficient buffer
+ @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
+ @return none
+ */
+
+void arm_radix4_butterfly_q15(
+ q15_t * pSrc16,
+ uint32_t fftLen,
+ const q15_t * pCoef16,
+ uint32_t twidCoefModifier)
+{
+
+#if defined (ARM_MATH_DSP)
+
+ q31_t R, S, T, U;
+ q31_t C1, C2, C3, out1, out2;
+ uint32_t n1, n2, ic, i0, j, k;
+
+ q15_t *ptr1;
+ q15_t *pSi0;
+ q15_t *pSi1;
+ q15_t *pSi2;
+ q15_t *pSi3;
+
+ q31_t xaya, xbyb, xcyc, xdyd;
+
+ /* Total process is divided into three stages */
+
+ /* process first stage, middle stages, & last stage */
+
+ /* Initializations for the first stage */
+ n2 = fftLen;
+ n1 = n2;
+
+ /* n2 = fftLen/4 */
+ n2 >>= 2U;
+
+ /* Index for twiddle coefficient */
+ ic = 0U;
+
+ /* Index for input read and output write */
+ j = n2;
+
+ pSi0 = pSrc16;
+ pSi1 = pSi0 + 2 * n2;
+ pSi2 = pSi1 + 2 * n2;
+ pSi3 = pSi2 + 2 * n2;
+
+ /* Input is in 1.15(q15) format */
+
+ /* start of first stage process */
+ do
+ {
+ /* Butterfly implementation */
+
+ /* Reading i0, i0+fftLen/2 inputs */
+ /* Read ya (real), xa(imag) input */
+ T = read_q15x2 (pSi0);
+ T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */
+ T = __SHADD16(T, 0); /* it turns out doing this twice is 2 cycles, the alternative takes 3 cycles */
+/*
+ in = ((int16_t) (T & 0xFFFF)) >> 2; // alternative code that takes 3 cycles
+ T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
+*/
+
+ /* Read yc (real), xc(imag) input */
+ S = read_q15x2 (pSi2);
+ S = __SHADD16(S, 0);
+ S = __SHADD16(S, 0);
+
+ /* R = packed((ya + yc), (xa + xc) ) */
+ R = __QADD16(T, S);
+
+ /* S = packed((ya - yc), (xa - xc) ) */
+ S = __QSUB16(T, S);
+
+ /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
+ /* Read yb (real), xb(imag) input */
+ T = read_q15x2 (pSi1);
+ T = __SHADD16(T, 0);
+ T = __SHADD16(T, 0);
+
+ /* Read yd (real), xd(imag) input */
+ U = read_q15x2 (pSi3);
+ U = __SHADD16(U, 0);
+ U = __SHADD16(U, 0);
+
+ /* T = packed((yb + yd), (xb + xd) ) */
+ T = __QADD16(T, U);
+
+ /* writing the butterfly processed i0 sample */
+ /* xa' = xa + xb + xc + xd */
+ /* ya' = ya + yb + yc + yd */
+ write_q15x2_ia (&pSi0, __SHADD16(R, T));
+
+ /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
+ R = __QSUB16(R, T);
+
+ /* co2 & si2 are read from SIMD Coefficient pointer */
+ C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
+ out1 = __SMUAD(C2, R) >> 16U;
+ /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
+ out2 = __SMUSDX(C2, R);
+#else
+ /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
+ out1 = __SMUSDX(R, C2) >> 16U;
+ /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
+ out2 = __SMUAD(C2, R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* Reading i0+fftLen/4 */
+ /* T = packed(yb, xb) */
+ T = read_q15x2 (pSi1);
+ T = __SHADD16(T, 0);
+ T = __SHADD16(T, 0);
+
+ /* writing the butterfly processed i0 + fftLen/4 sample */
+ /* writing output(xc', yc') in little endian format */
+ write_q15x2_ia (&pSi1, (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+
+ /* Butterfly calculations */
+ /* U = packed(yd, xd) */
+ U = read_q15x2 (pSi3);
+ U = __SHADD16(U, 0);
+ U = __SHADD16(U, 0);
+
+ /* T = packed(yb-yd, xb-xd) */
+ T = __QSUB16(T, U);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
+ R = __QASX(S, T);
+ /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
+ S = __QSAX(S, T);
+#else
+ /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
+ R = __QSAX(S, T);
+ /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
+ S = __QASX(S, T);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* co1 & si1 are read from SIMD Coefficient pointer */
+ C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
+ /* Butterfly process for the i0+fftLen/2 sample */
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
+ out1 = __SMUAD(C1, S) >> 16U;
+ /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
+ out2 = __SMUSDX(C1, S);
+#else
+ /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
+ out1 = __SMUSDX(S, C1) >> 16U;
+ /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
+ out2 = __SMUAD(C1, S);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* writing output(xb', yb') in little endian format */
+ write_q15x2_ia (&pSi2, ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF));
+
+ /* co3 & si3 are read from SIMD Coefficient pointer */
+ C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
+ /* Butterfly process for the i0+3fftLen/4 sample */
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
+ out1 = __SMUAD(C3, R) >> 16U;
+ /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
+ out2 = __SMUSDX(C3, R);
+#else
+ /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
+ out1 = __SMUSDX(R, C3) >> 16U;
+ /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
+ out2 = __SMUAD(C3, R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* writing output(xd', yd') in little endian format */
+ write_q15x2_ia (&pSi3, ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+
+ /* Twiddle coefficients index modifier */
+ ic = ic + twidCoefModifier;
+
+ } while (--j);
+ /* data is in 4.11(q11) format */
+
+ /* end of first stage process */
+
+
+ /* start of middle stage process */
+
+ /* Twiddle coefficients index modifier */
+ twidCoefModifier <<= 2U;
+
+ /* Calculation of Middle stage */
+ for (k = fftLen / 4U; k > 4U; k >>= 2U)
+ {
+ /* Initializations for the middle stage */
+ n1 = n2;
+ n2 >>= 2U;
+ ic = 0U;
+
+ for (j = 0U; j <= (n2 - 1U); j++)
+ {
+ /* index calculation for the coefficients */
+ C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
+ C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
+ C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
+
+ /* Twiddle coefficients index modifier */
+ ic = ic + twidCoefModifier;
+
+ pSi0 = pSrc16 + 2 * j;
+ pSi1 = pSi0 + 2 * n2;
+ pSi2 = pSi1 + 2 * n2;
+ pSi3 = pSi2 + 2 * n2;
+
+ /* Butterfly implementation */
+ for (i0 = j; i0 < fftLen; i0 += n1)
+ {
+ /* Reading i0, i0+fftLen/2 inputs */
+ /* Read ya (real), xa(imag) input */
+ T = read_q15x2 (pSi0);
+
+ /* Read yc (real), xc(imag) input */
+ S = read_q15x2 (pSi2);
+
+ /* R = packed( (ya + yc), (xa + xc)) */
+ R = __QADD16(T, S);
+
+ /* S = packed((ya - yc), (xa - xc)) */
+ S = __QSUB16(T, S);
+
+ /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
+ /* Read yb (real), xb(imag) input */
+ T = read_q15x2 (pSi1);
+
+ /* Read yd (real), xd(imag) input */
+ U = read_q15x2 (pSi3);
+
+ /* T = packed( (yb + yd), (xb + xd)) */
+ T = __QADD16(T, U);
+
+ /* writing the butterfly processed i0 sample */
+
+ /* xa' = xa + xb + xc + xd */
+ /* ya' = ya + yb + yc + yd */
+ out1 = __SHADD16(R, T);
+ out1 = __SHADD16(out1, 0);
+ write_q15x2 (pSi0, out1);
+ pSi0 += 2 * n1;
+
+ /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
+ R = __SHSUB16(R, T);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
+ out1 = __SMUAD(C2, R) >> 16U;
+
+ /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
+ out2 = __SMUSDX(C2, R);
+#else
+ /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
+ out1 = __SMUSDX(R, C2) >> 16U;
+
+ /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
+ out2 = __SMUAD(C2, R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* Reading i0+3fftLen/4 */
+ /* Read yb (real), xb(imag) input */
+ T = read_q15x2 (pSi1);
+
+ /* writing the butterfly processed i0 + fftLen/4 sample */
+ /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
+ /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
+ write_q15x2 (pSi1, ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+ pSi1 += 2 * n1;
+
+ /* Butterfly calculations */
+
+ /* Read yd (real), xd(imag) input */
+ U = read_q15x2 (pSi3);
+
+ /* T = packed(yb-yd, xb-xd) */
+ T = __QSUB16(T, U);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
+ R = __SHASX(S, T);
+
+ /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
+ S = __SHSAX(S, T);
+
+
+ /* Butterfly process for the i0+fftLen/2 sample */
+ out1 = __SMUAD(C1, S) >> 16U;
+ out2 = __SMUSDX(C1, S);
+#else
+ /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
+ R = __SHSAX(S, T);
+
+ /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
+ S = __SHASX(S, T);
+
+
+ /* Butterfly process for the i0+fftLen/2 sample */
+ out1 = __SMUSDX(S, C1) >> 16U;
+ out2 = __SMUAD(C1, S);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
+ /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
+ write_q15x2 (pSi2, ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+ pSi2 += 2 * n1;
+
+ /* Butterfly process for the i0+3fftLen/4 sample */
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ out1 = __SMUAD(C3, R) >> 16U;
+ out2 = __SMUSDX(C3, R);
+#else
+ out1 = __SMUSDX(R, C3) >> 16U;
+ out2 = __SMUAD(C3, R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
+ /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
+ write_q15x2 (pSi3, ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+ pSi3 += 2 * n1;
+ }
+ }
+ /* Twiddle coefficients index modifier */
+ twidCoefModifier <<= 2U;
+ }
+ /* end of middle stage process */
+
+
+ /* data is in 10.6(q6) format for the 1024 point */
+ /* data is in 8.8(q8) format for the 256 point */
+ /* data is in 6.10(q10) format for the 64 point */
+ /* data is in 4.12(q12) format for the 16 point */
+
+ /* Initializations for the last stage */
+ j = fftLen >> 2;
+
+ ptr1 = &pSrc16[0];
+
+ /* start of last stage process */
+
+ /* Butterfly implementation */
+ do
+ {
+ /* Read xa (real), ya(imag) input */
+ xaya = read_q15x2_ia ((q15_t **) &ptr1);
+
+ /* Read xb (real), yb(imag) input */
+ xbyb = read_q15x2_ia ((q15_t **) &ptr1);
+
+ /* Read xc (real), yc(imag) input */
+ xcyc = read_q15x2_ia ((q15_t **) &ptr1);
+
+ /* Read xd (real), yd(imag) input */
+ xdyd = read_q15x2_ia ((q15_t **) &ptr1);
+
+ /* R = packed((ya + yc), (xa + xc)) */
+ R = __QADD16(xaya, xcyc);
+
+ /* T = packed((yb + yd), (xb + xd)) */
+ T = __QADD16(xbyb, xdyd);
+
+ /* pointer updation for writing */
+ ptr1 = ptr1 - 8U;
+
+
+ /* xa' = xa + xb + xc + xd */
+ /* ya' = ya + yb + yc + yd */
+ write_q15x2_ia (&ptr1, __SHADD16(R, T));
+
+ /* T = packed((yb + yd), (xb + xd)) */
+ T = __QADD16(xbyb, xdyd);
+
+ /* xc' = (xa-xb+xc-xd) */
+ /* yc' = (ya-yb+yc-yd) */
+ write_q15x2_ia (&ptr1, __SHSUB16(R, T));
+
+ /* S = packed((ya - yc), (xa - xc)) */
+ S = __QSUB16(xaya, xcyc);
+
+ /* Read yd (real), xd(imag) input */
+ /* T = packed( (yb - yd), (xb - xd)) */
+ U = __QSUB16(xbyb, xdyd);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ /* xb' = (xa+yb-xc-yd) */
+ /* yb' = (ya-xb-yc+xd) */
+ write_q15x2_ia (&ptr1, __SHSAX(S, U));
+
+ /* xd' = (xa-yb-xc+yd) */
+ /* yd' = (ya+xb-yc-xd) */
+ write_q15x2_ia (&ptr1, __SHASX(S, U));
+#else
+ /* xb' = (xa+yb-xc-yd) */
+ /* yb' = (ya-xb-yc+xd) */
+ write_q15x2_ia (&ptr1, __SHASX(S, U));
+
+ /* xd' = (xa-yb-xc+yd) */
+ /* yd' = (ya+xb-yc-xd) */
+ write_q15x2_ia (&ptr1, __SHSAX(S, U));
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ } while (--j);
+
+ /* end of last stage process */
+
+ /* output is in 11.5(q5) format for the 1024 point */
+ /* output is in 9.7(q7) format for the 256 point */
+ /* output is in 7.9(q9) format for the 64 point */
+ /* output is in 5.11(q11) format for the 16 point */
+
+
+#else /* #if defined (ARM_MATH_DSP) */
+
+ q15_t R0, R1, S0, S1, T0, T1, U0, U1;
+ q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
+ uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
+
+ /* Total process is divided into three stages */
+
+ /* process first stage, middle stages, & last stage */
+
+ /* Initializations for the first stage */
+ n2 = fftLen;
+ n1 = n2;
+
+ /* n2 = fftLen/4 */
+ n2 >>= 2U;
+
+ /* Index for twiddle coefficient */
+ ic = 0U;
+
+ /* Index for input read and output write */
+ i0 = 0U;
+ j = n2;
+
+ /* Input is in 1.15(q15) format */
+
+ /* start of first stage process */
+ do
+ {
+ /* Butterfly implementation */
+
+ /* index calculation for the input as, */
+ /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
+ i1 = i0 + n2;
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+
+ /* Reading i0, i0+fftLen/2 inputs */
+
+ /* input is down scale by 4 to avoid overflow */
+ /* Read ya (real), xa(imag) input */
+ T0 = pSrc16[i0 * 2U] >> 2U;
+ T1 = pSrc16[(i0 * 2U) + 1U] >> 2U;
+
+ /* input is down scale by 4 to avoid overflow */
+ /* Read yc (real), xc(imag) input */
+ S0 = pSrc16[i2 * 2U] >> 2U;
+ S1 = pSrc16[(i2 * 2U) + 1U] >> 2U;
+
+ /* R0 = (ya + yc) */
+ R0 = __SSAT(T0 + S0, 16U);
+ /* R1 = (xa + xc) */
+ R1 = __SSAT(T1 + S1, 16U);
+
+ /* S0 = (ya - yc) */
+ S0 = __SSAT(T0 - S0, 16);
+ /* S1 = (xa - xc) */
+ S1 = __SSAT(T1 - S1, 16);
+
+ /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
+ /* input is down scale by 4 to avoid overflow */
+ /* Read yb (real), xb(imag) input */
+ T0 = pSrc16[i1 * 2U] >> 2U;
+ T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
+
+ /* input is down scale by 4 to avoid overflow */
+ /* Read yd (real), xd(imag) input */
+ U0 = pSrc16[i3 * 2U] >> 2U;
+ U1 = pSrc16[(i3 * 2U) + 1] >> 2U;
+
+ /* T0 = (yb + yd) */
+ T0 = __SSAT(T0 + U0, 16U);
+ /* T1 = (xb + xd) */
+ T1 = __SSAT(T1 + U1, 16U);
+
+ /* writing the butterfly processed i0 sample */
+ /* ya' = ya + yb + yc + yd */
+ /* xa' = xa + xb + xc + xd */
+ pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
+ pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
+
+ /* R0 = (ya + yc) - (yb + yd) */
+ /* R1 = (xa + xc) - (xb + xd) */
+ R0 = __SSAT(R0 - T0, 16U);
+ R1 = __SSAT(R1 - T1, 16U);
+
+ /* co2 & si2 are read from Coefficient pointer */
+ Co2 = pCoef16[2U * ic * 2U];
+ Si2 = pCoef16[(2U * ic * 2U) + 1];
+
+ /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
+ out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U);
+ /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
+ out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U);
+
+ /* Reading i0+fftLen/4 */
+ /* input is down scale by 4 to avoid overflow */
+ /* T0 = yb, T1 = xb */
+ T0 = pSrc16[i1 * 2U] >> 2;
+ T1 = pSrc16[(i1 * 2U) + 1] >> 2;
+
+ /* writing the butterfly processed i0 + fftLen/4 sample */
+ /* writing output(xc', yc') in little endian format */
+ pSrc16[i1 * 2U] = out1;
+ pSrc16[(i1 * 2U) + 1] = out2;
+
+ /* Butterfly calculations */
+ /* input is down scale by 4 to avoid overflow */
+ /* U0 = yd, U1 = xd */
+ U0 = pSrc16[i3 * 2U] >> 2;
+ U1 = pSrc16[(i3 * 2U) + 1] >> 2;
+ /* T0 = yb-yd */
+ T0 = __SSAT(T0 - U0, 16);
+ /* T1 = xb-xd */
+ T1 = __SSAT(T1 - U1, 16);
+
+ /* R1 = (ya-yc) + (xb- xd), R0 = (xa-xc) - (yb-yd)) */
+ R0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
+ R1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
+
+ /* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */
+ S0 = (q15_t) __SSAT(((q31_t) S0 + T1), 16U);
+ S1 = (q15_t) __SSAT(((q31_t) S1 - T0), 16U);
+
+ /* co1 & si1 are read from Coefficient pointer */
+ Co1 = pCoef16[ic * 2U];
+ Si1 = pCoef16[(ic * 2U) + 1];
+ /* Butterfly process for the i0+fftLen/2 sample */
+ /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
+ out1 = (q15_t) ((Si1 * S1 + Co1 * S0) >> 16);
+ /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
+ out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16);
+
+ /* writing output(xb', yb') in little endian format */
+ pSrc16[i2 * 2U] = out1;
+ pSrc16[(i2 * 2U) + 1] = out2;
+
+ /* Co3 & si3 are read from Coefficient pointer */
+ Co3 = pCoef16[3U * (ic * 2U)];
+ Si3 = pCoef16[(3U * (ic * 2U)) + 1];
+ /* Butterfly process for the i0+3fftLen/4 sample */
+ /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
+ out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U);
+ /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
+ out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U);
+ /* writing output(xd', yd') in little endian format */
+ pSrc16[i3 * 2U] = out1;
+ pSrc16[(i3 * 2U) + 1] = out2;
+
+ /* Twiddle coefficients index modifier */
+ ic = ic + twidCoefModifier;
+
+ /* Updating input index */
+ i0 = i0 + 1U;
+
+ } while (--j);
+ /* data is in 4.11(q11) format */
+
+ /* end of first stage process */
+
+
+ /* start of middle stage process */
+
+ /* Twiddle coefficients index modifier */
+ twidCoefModifier <<= 2U;
+
+ /* Calculation of Middle stage */
+ for (k = fftLen / 4U; k > 4U; k >>= 2U)
+ {
+ /* Initializations for the middle stage */
+ n1 = n2;
+ n2 >>= 2U;
+ ic = 0U;
+
+ for (j = 0U; j <= (n2 - 1U); j++)
+ {
+ /* index calculation for the coefficients */
+ Co1 = pCoef16[ic * 2U];
+ Si1 = pCoef16[(ic * 2U) + 1U];
+ Co2 = pCoef16[2U * (ic * 2U)];
+ Si2 = pCoef16[(2U * (ic * 2U)) + 1U];
+ Co3 = pCoef16[3U * (ic * 2U)];
+ Si3 = pCoef16[(3U * (ic * 2U)) + 1U];
+
+ /* Twiddle coefficients index modifier */
+ ic = ic + twidCoefModifier;
+
+ /* Butterfly implementation */
+ for (i0 = j; i0 < fftLen; i0 += n1)
+ {
+ /* index calculation for the input as, */
+ /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
+ i1 = i0 + n2;
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+
+ /* Reading i0, i0+fftLen/2 inputs */
+ /* Read ya (real), xa(imag) input */
+ T0 = pSrc16[i0 * 2U];
+ T1 = pSrc16[(i0 * 2U) + 1U];
+
+ /* Read yc (real), xc(imag) input */
+ S0 = pSrc16[i2 * 2U];
+ S1 = pSrc16[(i2 * 2U) + 1U];
+
+ /* R0 = (ya + yc), R1 = (xa + xc) */
+ R0 = __SSAT(T0 + S0, 16);
+ R1 = __SSAT(T1 + S1, 16);
+
+ /* S0 = (ya - yc), S1 =(xa - xc) */
+ S0 = __SSAT(T0 - S0, 16);
+ S1 = __SSAT(T1 - S1, 16);
+
+ /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
+ /* Read yb (real), xb(imag) input */
+ T0 = pSrc16[i1 * 2U];
+ T1 = pSrc16[(i1 * 2U) + 1U];
+
+ /* Read yd (real), xd(imag) input */
+ U0 = pSrc16[i3 * 2U];
+ U1 = pSrc16[(i3 * 2U) + 1U];
+
+
+ /* T0 = (yb + yd), T1 = (xb + xd) */
+ T0 = __SSAT(T0 + U0, 16);
+ T1 = __SSAT(T1 + U1, 16);
+
+ /* writing the butterfly processed i0 sample */
+
+ /* xa' = xa + xb + xc + xd */
+ /* ya' = ya + yb + yc + yd */
+ out1 = ((R0 >> 1U) + (T0 >> 1U)) >> 1U;
+ out2 = ((R1 >> 1U) + (T1 >> 1U)) >> 1U;
+
+ pSrc16[i0 * 2U] = out1;
+ pSrc16[(2U * i0) + 1U] = out2;
+
+ /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
+ R0 = (R0 >> 1U) - (T0 >> 1U);
+ R1 = (R1 >> 1U) - (T1 >> 1U);
+
+ /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
+ out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U);
+
+ /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
+ out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U);
+
+ /* Reading i0+3fftLen/4 */
+ /* Read yb (real), xb(imag) input */
+ T0 = pSrc16[i1 * 2U];
+ T1 = pSrc16[(i1 * 2U) + 1U];
+
+ /* writing the butterfly processed i0 + fftLen/4 sample */
+ /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
+ /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
+ pSrc16[i1 * 2U] = out1;
+ pSrc16[(i1 * 2U) + 1U] = out2;
+
+ /* Butterfly calculations */
+
+ /* Read yd (real), xd(imag) input */
+ U0 = pSrc16[i3 * 2U];
+ U1 = pSrc16[(i3 * 2U) + 1U];
+
+ /* T0 = yb-yd, T1 = xb-xd */
+ T0 = __SSAT(T0 - U0, 16);
+ T1 = __SSAT(T1 - U1, 16);
+
+ /* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */
+ R0 = (S0 >> 1U) - (T1 >> 1U);
+ R1 = (S1 >> 1U) + (T0 >> 1U);
+
+ /* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */
+ S0 = (S0 >> 1U) + (T1 >> 1U);
+ S1 = (S1 >> 1U) - (T0 >> 1U);
+
+ /* Butterfly process for the i0+fftLen/2 sample */
+ out1 = (q15_t) ((Co1 * S0 + Si1 * S1) >> 16U);
+
+ out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16U);
+
+ /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
+ /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
+ pSrc16[i2 * 2U] = out1;
+ pSrc16[(i2 * 2U) + 1U] = out2;
+
+ /* Butterfly process for the i0+3fftLen/4 sample */
+ out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U);
+
+ out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U);
+ /* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
+ /* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
+ pSrc16[i3 * 2U] = out1;
+ pSrc16[(i3 * 2U) + 1U] = out2;
+ }
+ }
+ /* Twiddle coefficients index modifier */
+ twidCoefModifier <<= 2U;
+ }
+ /* end of middle stage process */
+
+
+ /* data is in 10.6(q6) format for the 1024 point */
+ /* data is in 8.8(q8) format for the 256 point */
+ /* data is in 6.10(q10) format for the 64 point */
+ /* data is in 4.12(q12) format for the 16 point */
+
+ /* Initializations for the last stage */
+ n1 = n2;
+ n2 >>= 2U;
+
+ /* start of last stage process */
+
+ /* Butterfly implementation */
+ for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
+ {
+ /* index calculation for the input as, */
+ /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
+ i1 = i0 + n2;
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+
+ /* Reading i0, i0+fftLen/2 inputs */
+ /* Read ya (real), xa(imag) input */
+ T0 = pSrc16[i0 * 2U];
+ T1 = pSrc16[(i0 * 2U) + 1U];
+
+ /* Read yc (real), xc(imag) input */
+ S0 = pSrc16[i2 * 2U];
+ S1 = pSrc16[(i2 * 2U) + 1U];
+
+ /* R0 = (ya + yc), R1 = (xa + xc) */
+ R0 = __SSAT(T0 + S0, 16U);
+ R1 = __SSAT(T1 + S1, 16U);
+
+ /* S0 = (ya - yc), S1 = (xa - xc) */
+ S0 = __SSAT(T0 - S0, 16U);
+ S1 = __SSAT(T1 - S1, 16U);
+
+ /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
+ /* Read yb (real), xb(imag) input */
+ T0 = pSrc16[i1 * 2U];
+ T1 = pSrc16[(i1 * 2U) + 1U];
+ /* Read yd (real), xd(imag) input */
+ U0 = pSrc16[i3 * 2U];
+ U1 = pSrc16[(i3 * 2U) + 1U];
+
+ /* T0 = (yb + yd), T1 = (xb + xd)) */
+ T0 = __SSAT(T0 + U0, 16U);
+ T1 = __SSAT(T1 + U1, 16U);
+
+ /* writing the butterfly processed i0 sample */
+ /* xa' = xa + xb + xc + xd */
+ /* ya' = ya + yb + yc + yd */
+ pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
+ pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
+
+ /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
+ R0 = (R0 >> 1U) - (T0 >> 1U);
+ R1 = (R1 >> 1U) - (T1 >> 1U);
+ /* Read yb (real), xb(imag) input */
+ T0 = pSrc16[i1 * 2U];
+ T1 = pSrc16[(i1 * 2U) + 1U];
+
+ /* writing the butterfly processed i0 + fftLen/4 sample */
+ /* xc' = (xa-xb+xc-xd) */
+ /* yc' = (ya-yb+yc-yd) */
+ pSrc16[i1 * 2U] = R0;
+ pSrc16[(i1 * 2U) + 1U] = R1;
+
+ /* Read yd (real), xd(imag) input */
+ U0 = pSrc16[i3 * 2U];
+ U1 = pSrc16[(i3 * 2U) + 1U];
+ /* T0 = (yb - yd), T1 = (xb - xd) */
+ T0 = __SSAT(T0 - U0, 16U);
+ T1 = __SSAT(T1 - U1, 16U);
+
+ /* writing the butterfly processed i0 + fftLen/2 sample */
+ /* xb' = (xa+yb-xc-yd) */
+ /* yb' = (ya-xb-yc+xd) */
+ pSrc16[i2 * 2U] = (S0 >> 1U) + (T1 >> 1U);
+ pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U);
+
+ /* writing the butterfly processed i0 + 3fftLen/4 sample */
+ /* xd' = (xa-yb-xc+yd) */
+ /* yd' = (ya+xb-yc-xd) */
+ pSrc16[i3 * 2U] = (S0 >> 1U) - (T1 >> 1U);
+ pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U);
+
+ }
+
+ /* end of last stage process */
+
+ /* output is in 11.5(q5) format for the 1024 point */
+ /* output is in 9.7(q7) format for the 256 point */
+ /* output is in 7.9(q9) format for the 64 point */
+ /* output is in 5.11(q11) format for the 16 point */
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+}
+
+
+/**
+ @brief Core function for the Q15 CIFFT butterfly process.
+ @param[in,out] pSrc16 points to the in-place buffer of Q15 data type
+ @param[in] fftLen length of the FFT
+ @param[in] pCoef16 points to twiddle coefficient buffer
+ @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
+ @return none
+ */
+
+/*
+ * Radix-4 IFFT algorithm used is :
+ *
+ * CIFFT uses same twiddle coefficients as CFFT function
+ * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
+ *
+ *
+ * IFFT is implemented with following changes in equations from FFT
+ *
+ * Input real and imaginary data:
+ * x(n) = xa + j * ya
+ * x(n+N/4 ) = xb + j * yb
+ * x(n+N/2 ) = xc + j * yc
+ * x(n+3N 4) = xd + j * yd
+ *
+ *
+ * Output real and imaginary data:
+ * x(4r) = xa'+ j * ya'
+ * x(4r+1) = xb'+ j * yb'
+ * x(4r+2) = xc'+ j * yc'
+ * x(4r+3) = xd'+ j * yd'
+ *
+ *
+ * Twiddle factors for radix-4 IFFT:
+ * Wn = co1 + j * (si1)
+ * W2n = co2 + j * (si2)
+ * W3n = co3 + j * (si3)
+
+ * The real and imaginary output values for the radix-4 butterfly are
+ * xa' = xa + xb + xc + xd
+ * ya' = ya + yb + yc + yd
+ * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
+ * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
+ * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
+ * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
+ * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
+ * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
+ *
+ */
+
+void arm_radix4_butterfly_inverse_q15(
+ q15_t * pSrc16,
+ uint32_t fftLen,
+ const q15_t * pCoef16,
+ uint32_t twidCoefModifier)
+{
+
+#if defined (ARM_MATH_DSP)
+
+ q31_t R, S, T, U;
+ q31_t C1, C2, C3, out1, out2;
+ uint32_t n1, n2, ic, i0, j, k;
+
+ q15_t *ptr1;
+ q15_t *pSi0;
+ q15_t *pSi1;
+ q15_t *pSi2;
+ q15_t *pSi3;
+
+ q31_t xaya, xbyb, xcyc, xdyd;
+
+ /* Total process is divided into three stages */
+
+ /* process first stage, middle stages, & last stage */
+
+ /* Initializations for the first stage */
+ n2 = fftLen;
+ n1 = n2;
+
+ /* n2 = fftLen/4 */
+ n2 >>= 2U;
+
+ /* Index for twiddle coefficient */
+ ic = 0U;
+
+ /* Index for input read and output write */
+ j = n2;
+
+ pSi0 = pSrc16;
+ pSi1 = pSi0 + 2 * n2;
+ pSi2 = pSi1 + 2 * n2;
+ pSi3 = pSi2 + 2 * n2;
+
+ /* Input is in 1.15(q15) format */
+
+ /* start of first stage process */
+ do
+ {
+ /* Butterfly implementation */
+
+ /* Reading i0, i0+fftLen/2 inputs */
+ /* Read ya (real), xa(imag) input */
+ T = read_q15x2 (pSi0);
+ T = __SHADD16(T, 0);
+ T = __SHADD16(T, 0);
+
+ /* Read yc (real), xc(imag) input */
+ S = read_q15x2 (pSi2);
+ S = __SHADD16(S, 0);
+ S = __SHADD16(S, 0);
+
+ /* R = packed((ya + yc), (xa + xc) ) */
+ R = __QADD16(T, S);
+
+ /* S = packed((ya - yc), (xa - xc) ) */
+ S = __QSUB16(T, S);
+
+ /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
+ /* Read yb (real), xb(imag) input */
+ T = read_q15x2 (pSi1);
+ T = __SHADD16(T, 0);
+ T = __SHADD16(T, 0);
+
+ /* Read yd (real), xd(imag) input */
+ U = read_q15x2 (pSi3);
+ U = __SHADD16(U, 0);
+ U = __SHADD16(U, 0);
+
+ /* T = packed((yb + yd), (xb + xd) ) */
+ T = __QADD16(T, U);
+
+ /* writing the butterfly processed i0 sample */
+ /* xa' = xa + xb + xc + xd */
+ /* ya' = ya + yb + yc + yd */
+ write_q15x2_ia (&pSi0, __SHADD16(R, T));
+
+ /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
+ R = __QSUB16(R, T);
+
+ /* co2 & si2 are read from SIMD Coefficient pointer */
+ C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
+ out1 = __SMUSD(C2, R) >> 16U;
+ /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
+ out2 = __SMUADX(C2, R);
+#else
+ /* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
+ out1 = __SMUADX(C2, R) >> 16U;
+ /* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
+ out2 = __SMUSD(__QSUB16(0, C2), R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* Reading i0+fftLen/4 */
+ /* T = packed(yb, xb) */
+ T = read_q15x2 (pSi1);
+ T = __SHADD16(T, 0);
+ T = __SHADD16(T, 0);
+
+ /* writing the butterfly processed i0 + fftLen/4 sample */
+ /* writing output(xc', yc') in little endian format */
+ write_q15x2_ia (&pSi1, (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+
+ /* Butterfly calculations */
+ /* U = packed(yd, xd) */
+ U = read_q15x2 (pSi3);
+ U = __SHADD16(U, 0);
+ U = __SHADD16(U, 0);
+
+ /* T = packed(yb-yd, xb-xd) */
+ T = __QSUB16(T, U);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
+ R = __QSAX(S, T);
+ /* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */
+ S = __QASX(S, T);
+#else
+ /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
+ R = __QASX(S, T);
+ /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
+ S = __QSAX(S, T);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* co1 & si1 are read from SIMD Coefficient pointer */
+ C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
+ /* Butterfly process for the i0+fftLen/2 sample */
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
+ out1 = __SMUSD(C1, S) >> 16U;
+ /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
+ out2 = __SMUADX(C1, S);
+#else
+ /* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
+ out1 = __SMUADX(C1, S) >> 16U;
+ /* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
+ out2 = __SMUSD(__QSUB16(0, C1), S);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* writing output(xb', yb') in little endian format */
+ write_q15x2_ia (&pSi2, ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF));
+
+ /* co3 & si3 are read from SIMD Coefficient pointer */
+ C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
+ /* Butterfly process for the i0+3fftLen/4 sample */
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
+ out1 = __SMUSD(C3, R) >> 16U;
+ /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
+ out2 = __SMUADX(C3, R);
+#else
+ /* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
+ out1 = __SMUADX(C3, R) >> 16U;
+ /* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
+ out2 = __SMUSD(__QSUB16(0, C3), R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* writing output(xd', yd') in little endian format */
+ write_q15x2_ia (&pSi3, ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+
+ /* Twiddle coefficients index modifier */
+ ic = ic + twidCoefModifier;
+
+ } while (--j);
+ /* data is in 4.11(q11) format */
+
+ /* end of first stage process */
+
+
+ /* start of middle stage process */
+
+ /* Twiddle coefficients index modifier */
+ twidCoefModifier <<= 2U;
+
+ /* Calculation of Middle stage */
+ for (k = fftLen / 4U; k > 4U; k >>= 2U)
+ {
+ /* Initializations for the middle stage */
+ n1 = n2;
+ n2 >>= 2U;
+ ic = 0U;
+
+ for (j = 0U; j <= (n2 - 1U); j++)
+ {
+ /* index calculation for the coefficients */
+ C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
+ C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
+ C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
+
+ /* Twiddle coefficients index modifier */
+ ic = ic + twidCoefModifier;
+
+ pSi0 = pSrc16 + 2 * j;
+ pSi1 = pSi0 + 2 * n2;
+ pSi2 = pSi1 + 2 * n2;
+ pSi3 = pSi2 + 2 * n2;
+
+ /* Butterfly implementation */
+ for (i0 = j; i0 < fftLen; i0 += n1)
+ {
+ /* Reading i0, i0+fftLen/2 inputs */
+ /* Read ya (real), xa(imag) input */
+ T = read_q15x2 (pSi0);
+
+ /* Read yc (real), xc(imag) input */
+ S = read_q15x2 (pSi2);
+
+ /* R = packed( (ya + yc), (xa + xc)) */
+ R = __QADD16(T, S);
+
+ /* S = packed((ya - yc), (xa - xc)) */
+ S = __QSUB16(T, S);
+
+ /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
+ /* Read yb (real), xb(imag) input */
+ T = read_q15x2 (pSi1);
+
+ /* Read yd (real), xd(imag) input */
+ U = read_q15x2 (pSi3);
+
+ /* T = packed( (yb + yd), (xb + xd)) */
+ T = __QADD16(T, U);
+
+ /* writing the butterfly processed i0 sample */
+
+ /* xa' = xa + xb + xc + xd */
+ /* ya' = ya + yb + yc + yd */
+ out1 = __SHADD16(R, T);
+ out1 = __SHADD16(out1, 0);
+ write_q15x2 (pSi0, out1);
+ pSi0 += 2 * n1;
+
+ /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
+ R = __SHSUB16(R, T);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
+ out1 = __SMUSD(C2, R) >> 16U;
+
+ /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
+ out2 = __SMUADX(C2, R);
+#else
+ /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
+ out1 = __SMUADX(R, C2) >> 16U;
+
+ /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
+ out2 = __SMUSD(__QSUB16(0, C2), R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* Reading i0+3fftLen/4 */
+ /* Read yb (real), xb(imag) input */
+ T = read_q15x2 (pSi1);
+
+ /* writing the butterfly processed i0 + fftLen/4 sample */
+ /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
+ /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
+ write_q15x2 (pSi1, ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+ pSi1 += 2 * n1;
+
+ /* Butterfly calculations */
+
+ /* Read yd (real), xd(imag) input */
+ U = read_q15x2 (pSi3);
+
+ /* T = packed(yb-yd, xb-xd) */
+ T = __QSUB16(T, U);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
+ R = __SHSAX(S, T);
+
+ /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
+ S = __SHASX(S, T);
+
+ /* Butterfly process for the i0+fftLen/2 sample */
+ out1 = __SMUSD(C1, S) >> 16U;
+ out2 = __SMUADX(C1, S);
+#else
+ /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
+ R = __SHASX(S, T);
+
+ /* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
+ S = __SHSAX(S, T);
+
+ /* Butterfly process for the i0+fftLen/2 sample */
+ out1 = __SMUADX(S, C1) >> 16U;
+ out2 = __SMUSD(__QSUB16(0, C1), S);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
+ /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
+ write_q15x2 (pSi2, ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+ pSi2 += 2 * n1;
+
+ /* Butterfly process for the i0+3fftLen/4 sample */
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ out1 = __SMUSD(C3, R) >> 16U;
+ out2 = __SMUADX(C3, R);
+#else
+ out1 = __SMUADX(C3, R) >> 16U;
+ out2 = __SMUSD(__QSUB16(0, C3), R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
+ /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
+ write_q15x2 (pSi3, ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+ pSi3 += 2 * n1;
+ }
+ }
+ /* Twiddle coefficients index modifier */
+ twidCoefModifier <<= 2U;
+ }
+ /* end of middle stage process */
+
+ /* data is in 10.6(q6) format for the 1024 point */
+ /* data is in 8.8(q8) format for the 256 point */
+ /* data is in 6.10(q10) format for the 64 point */
+ /* data is in 4.12(q12) format for the 16 point */
+
+ /* Initializations for the last stage */
+ j = fftLen >> 2;
+
+ ptr1 = &pSrc16[0];
+
+ /* start of last stage process */
+
+ /* Butterfly implementation */
+ do
+ {
+ /* Read xa (real), ya(imag) input */
+ xaya = read_q15x2_ia ((q15_t **) &ptr1);
+
+ /* Read xb (real), yb(imag) input */
+ xbyb = read_q15x2_ia ((q15_t **) &ptr1);
+
+ /* Read xc (real), yc(imag) input */
+ xcyc = read_q15x2_ia ((q15_t **) &ptr1);
+
+ /* Read xd (real), yd(imag) input */
+ xdyd = read_q15x2_ia ((q15_t **) &ptr1);
+
+ /* R = packed((ya + yc), (xa + xc)) */
+ R = __QADD16(xaya, xcyc);
+
+ /* T = packed((yb + yd), (xb + xd)) */
+ T = __QADD16(xbyb, xdyd);
+
+ /* pointer updation for writing */
+ ptr1 = ptr1 - 8U;
+
+
+ /* xa' = xa + xb + xc + xd */
+ /* ya' = ya + yb + yc + yd */
+ write_q15x2_ia (&ptr1, __SHADD16(R, T));
+
+ /* T = packed((yb + yd), (xb + xd)) */
+ T = __QADD16(xbyb, xdyd);
+
+ /* xc' = (xa-xb+xc-xd) */
+ /* yc' = (ya-yb+yc-yd) */
+ write_q15x2_ia (&ptr1, __SHSUB16(R, T));
+
+ /* S = packed((ya - yc), (xa - xc)) */
+ S = __QSUB16(xaya, xcyc);
+
+ /* Read yd (real), xd(imag) input */
+ /* T = packed( (yb - yd), (xb - xd)) */
+ U = __QSUB16(xbyb, xdyd);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ /* xb' = (xa+yb-xc-yd) */
+ /* yb' = (ya-xb-yc+xd) */
+ write_q15x2_ia (&ptr1, __SHASX(S, U));
+
+ /* xd' = (xa-yb-xc+yd) */
+ /* yd' = (ya+xb-yc-xd) */
+ write_q15x2_ia (&ptr1, __SHSAX(S, U));
+#else
+ /* xb' = (xa+yb-xc-yd) */
+ /* yb' = (ya-xb-yc+xd) */
+ write_q15x2_ia (&ptr1, __SHSAX(S, U));
+
+ /* xd' = (xa-yb-xc+yd) */
+ /* yd' = (ya+xb-yc-xd) */
+ write_q15x2_ia (&ptr1, __SHASX(S, U));
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ } while (--j);
+
+ /* end of last stage process */
+
+ /* output is in 11.5(q5) format for the 1024 point */
+ /* output is in 9.7(q7) format for the 256 point */
+ /* output is in 7.9(q9) format for the 64 point */
+ /* output is in 5.11(q11) format for the 16 point */
+
+
+#else /* arm_radix4_butterfly_inverse_q15 */
+
+ q15_t R0, R1, S0, S1, T0, T1, U0, U1;
+ q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
+ uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
+
+ /* Total process is divided into three stages */
+
+ /* process first stage, middle stages, & last stage */
+
+ /* Initializations for the first stage */
+ n2 = fftLen;
+ n1 = n2;
+
+ /* n2 = fftLen/4 */
+ n2 >>= 2U;
+
+ /* Index for twiddle coefficient */
+ ic = 0U;
+
+ /* Index for input read and output write */
+ i0 = 0U;
+
+ j = n2;
+
+ /* Input is in 1.15(q15) format */
+
+ /* Start of first stage process */
+ do
+ {
+ /* Butterfly implementation */
+
+ /* index calculation for the input as, */
+ /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
+ i1 = i0 + n2;
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+
+ /* Reading i0, i0+fftLen/2 inputs */
+ /* input is down scale by 4 to avoid overflow */
+ /* Read ya (real), xa(imag) input */
+ T0 = pSrc16[i0 * 2U] >> 2U;
+ T1 = pSrc16[(i0 * 2U) + 1U] >> 2U;
+ /* input is down scale by 4 to avoid overflow */
+ /* Read yc (real), xc(imag) input */
+ S0 = pSrc16[i2 * 2U] >> 2U;
+ S1 = pSrc16[(i2 * 2U) + 1U] >> 2U;
+
+ /* R0 = (ya + yc), R1 = (xa + xc) */
+ R0 = __SSAT(T0 + S0, 16U);
+ R1 = __SSAT(T1 + S1, 16U);
+ /* S0 = (ya - yc), S1 = (xa - xc) */
+ S0 = __SSAT(T0 - S0, 16U);
+ S1 = __SSAT(T1 - S1, 16U);
+
+ /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
+ /* input is down scale by 4 to avoid overflow */
+ /* Read yb (real), xb(imag) input */
+ T0 = pSrc16[i1 * 2U] >> 2U;
+ T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
+ /* Read yd (real), xd(imag) input */
+ /* input is down scale by 4 to avoid overflow */
+ U0 = pSrc16[i3 * 2U] >> 2U;
+ U1 = pSrc16[(i3 * 2U) + 1U] >> 2U;
+
+ /* T0 = (yb + yd), T1 = (xb + xd) */
+ T0 = __SSAT(T0 + U0, 16U);
+ T1 = __SSAT(T1 + U1, 16U);
+
+ /* writing the butterfly processed i0 sample */
+ /* xa' = xa + xb + xc + xd */
+ /* ya' = ya + yb + yc + yd */
+ pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
+ pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
+
+ /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */
+ R0 = __SSAT(R0 - T0, 16U);
+ R1 = __SSAT(R1 - T1, 16U);
+ /* co2 & si2 are read from Coefficient pointer */
+ Co2 = pCoef16[2U * ic * 2U];
+ Si2 = pCoef16[(2U * ic * 2U) + 1U];
+ /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
+ out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16U);
+ /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
+ out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16U);
+
+ /* Reading i0+fftLen/4 */
+ /* input is down scale by 4 to avoid overflow */
+ /* T0 = yb, T1 = xb */
+ T0 = pSrc16[i1 * 2U] >> 2U;
+ T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
+
+ /* writing the butterfly processed i0 + fftLen/4 sample */
+ /* writing output(xc', yc') in little endian format */
+ pSrc16[i1 * 2U] = out1;
+ pSrc16[(i1 * 2U) + 1U] = out2;
+
+ /* Butterfly calculations */
+ /* input is down scale by 4 to avoid overflow */
+ /* U0 = yd, U1 = xd) */
+ U0 = pSrc16[i3 * 2U] >> 2U;
+ U1 = pSrc16[(i3 * 2U) + 1U] >> 2U;
+
+ /* T0 = yb-yd, T1 = xb-xd) */
+ T0 = __SSAT(T0 - U0, 16U);
+ T1 = __SSAT(T1 - U1, 16U);
+ /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
+ R0 = (q15_t) __SSAT((q31_t) (S0 + T1), 16);
+ R1 = (q15_t) __SSAT((q31_t) (S1 - T0), 16);
+ /* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
+ S0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
+ S1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
+
+ /* co1 & si1 are read from Coefficient pointer */
+ Co1 = pCoef16[ic * 2U];
+ Si1 = pCoef16[(ic * 2U) + 1U];
+ /* Butterfly process for the i0+fftLen/2 sample */
+ /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
+ out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U);
+ /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
+ out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U);
+ /* writing output(xb', yb') in little endian format */
+ pSrc16[i2 * 2U] = out1;
+ pSrc16[(i2 * 2U) + 1U] = out2;
+
+ /* Co3 & si3 are read from Coefficient pointer */
+ Co3 = pCoef16[3U * ic * 2U];
+ Si3 = pCoef16[(3U * ic * 2U) + 1U];
+ /* Butterfly process for the i0+3fftLen/4 sample */
+ /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
+ out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U);
+ /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
+ out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U);
+ /* writing output(xd', yd') in little endian format */
+ pSrc16[i3 * 2U] = out1;
+ pSrc16[(i3 * 2U) + 1U] = out2;
+
+ /* Twiddle coefficients index modifier */
+ ic = ic + twidCoefModifier;
+
+ /* Updating input index */
+ i0 = i0 + 1U;
+
+ } while (--j);
+
+ /* End of first stage process */
+
+ /* data is in 4.11(q11) format */
+
+
+ /* Start of Middle stage process */
+
+ /* Twiddle coefficients index modifier */
+ twidCoefModifier <<= 2U;
+
+ /* Calculation of Middle stage */
+ for (k = fftLen / 4U; k > 4U; k >>= 2U)
+ {
+ /* Initializations for the middle stage */
+ n1 = n2;
+ n2 >>= 2U;
+ ic = 0U;
+
+ for (j = 0U; j <= (n2 - 1U); j++)
+ {
+ /* index calculation for the coefficients */
+ Co1 = pCoef16[ic * 2U];
+ Si1 = pCoef16[(ic * 2U) + 1U];
+ Co2 = pCoef16[2U * ic * 2U];
+ Si2 = pCoef16[2U * ic * 2U + 1U];
+ Co3 = pCoef16[3U * ic * 2U];
+ Si3 = pCoef16[(3U * ic * 2U) + 1U];
+
+ /* Twiddle coefficients index modifier */
+ ic = ic + twidCoefModifier;
+
+ /* Butterfly implementation */
+ for (i0 = j; i0 < fftLen; i0 += n1)
+ {
+ /* index calculation for the input as, */
+ /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
+ i1 = i0 + n2;
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+
+ /* Reading i0, i0+fftLen/2 inputs */
+ /* Read ya (real), xa(imag) input */
+ T0 = pSrc16[i0 * 2U];
+ T1 = pSrc16[(i0 * 2U) + 1U];
+
+ /* Read yc (real), xc(imag) input */
+ S0 = pSrc16[i2 * 2U];
+ S1 = pSrc16[(i2 * 2U) + 1U];
+
+
+ /* R0 = (ya + yc), R1 = (xa + xc) */
+ R0 = __SSAT(T0 + S0, 16U);
+ R1 = __SSAT(T1 + S1, 16U);
+ /* S0 = (ya - yc), S1 = (xa - xc) */
+ S0 = __SSAT(T0 - S0, 16U);
+ S1 = __SSAT(T1 - S1, 16U);
+
+ /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
+ /* Read yb (real), xb(imag) input */
+ T0 = pSrc16[i1 * 2U];
+ T1 = pSrc16[(i1 * 2U) + 1U];
+
+ /* Read yd (real), xd(imag) input */
+ U0 = pSrc16[i3 * 2U];
+ U1 = pSrc16[(i3 * 2U) + 1U];
+
+ /* T0 = (yb + yd), T1 = (xb + xd) */
+ T0 = __SSAT(T0 + U0, 16U);
+ T1 = __SSAT(T1 + U1, 16U);
+
+ /* writing the butterfly processed i0 sample */
+ /* xa' = xa + xb + xc + xd */
+ /* ya' = ya + yb + yc + yd */
+ pSrc16[i0 * 2U] = ((R0 >> 1U) + (T0 >> 1U)) >> 1U;
+ pSrc16[(i0 * 2U) + 1U] = ((R1 >> 1U) + (T1 >> 1U)) >> 1U;
+
+ /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
+ R0 = (R0 >> 1U) - (T0 >> 1U);
+ R1 = (R1 >> 1U) - (T1 >> 1U);
+
+ /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
+ out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16);
+ /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
+ out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16);
+
+ /* Reading i0+3fftLen/4 */
+ /* Read yb (real), xb(imag) input */
+ T0 = pSrc16[i1 * 2U];
+ T1 = pSrc16[(i1 * 2U) + 1U];
+
+ /* writing the butterfly processed i0 + fftLen/4 sample */
+ /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
+ /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
+ pSrc16[i1 * 2U] = out1;
+ pSrc16[(i1 * 2U) + 1U] = out2;
+
+ /* Butterfly calculations */
+ /* Read yd (real), xd(imag) input */
+ U0 = pSrc16[i3 * 2U];
+ U1 = pSrc16[(i3 * 2U) + 1U];
+
+ /* T0 = yb-yd, T1 = xb-xd) */
+ T0 = __SSAT(T0 - U0, 16U);
+ T1 = __SSAT(T1 - U1, 16U);
+
+ /* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
+ R0 = (S0 >> 1U) + (T1 >> 1U);
+ R1 = (S1 >> 1U) - (T0 >> 1U);
+
+ /* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
+ S0 = (S0 >> 1U) - (T1 >> 1U);
+ S1 = (S1 >> 1U) + (T0 >> 1U);
+
+ /* Butterfly process for the i0+fftLen/2 sample */
+ out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U);
+ out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U);
+ /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
+ /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
+ pSrc16[i2 * 2U] = out1;
+ pSrc16[(i2 * 2U) + 1U] = out2;
+
+ /* Butterfly process for the i0+3fftLen/4 sample */
+ out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U);
+
+ out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U);
+ /* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
+ /* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
+ pSrc16[i3 * 2U] = out1;
+ pSrc16[(i3 * 2U) + 1U] = out2;
+
+
+ }
+ }
+ /* Twiddle coefficients index modifier */
+ twidCoefModifier <<= 2U;
+ }
+ /* End of Middle stages process */
+
+
+ /* data is in 10.6(q6) format for the 1024 point */
+ /* data is in 8.8(q8) format for the 256 point */
+ /* data is in 6.10(q10) format for the 64 point */
+ /* data is in 4.12(q12) format for the 16 point */
+
+ /* start of last stage process */
+
+
+ /* Initializations for the last stage */
+ n1 = n2;
+ n2 >>= 2U;
+
+ /* Butterfly implementation */
+ for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
+ {
+ /* index calculation for the input as, */
+ /* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
+ i1 = i0 + n2;
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+
+ /* Reading i0, i0+fftLen/2 inputs */
+ /* Read ya (real), xa(imag) input */
+ T0 = pSrc16[i0 * 2U];
+ T1 = pSrc16[(i0 * 2U) + 1U];
+ /* Read yc (real), xc(imag) input */
+ S0 = pSrc16[i2 * 2U];
+ S1 = pSrc16[(i2 * 2U) + 1U];
+
+ /* R0 = (ya + yc), R1 = (xa + xc) */
+ R0 = __SSAT(T0 + S0, 16U);
+ R1 = __SSAT(T1 + S1, 16U);
+ /* S0 = (ya - yc), S1 = (xa - xc) */
+ S0 = __SSAT(T0 - S0, 16U);
+ S1 = __SSAT(T1 - S1, 16U);
+
+ /* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
+ /* Read yb (real), xb(imag) input */
+ T0 = pSrc16[i1 * 2U];
+ T1 = pSrc16[(i1 * 2U) + 1U];
+ /* Read yd (real), xd(imag) input */
+ U0 = pSrc16[i3 * 2U];
+ U1 = pSrc16[(i3 * 2U) + 1U];
+
+ /* T0 = (yb + yd), T1 = (xb + xd) */
+ T0 = __SSAT(T0 + U0, 16U);
+ T1 = __SSAT(T1 + U1, 16U);
+
+ /* writing the butterfly processed i0 sample */
+ /* xa' = xa + xb + xc + xd */
+ /* ya' = ya + yb + yc + yd */
+ pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
+ pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
+
+ /* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
+ R0 = (R0 >> 1U) - (T0 >> 1U);
+ R1 = (R1 >> 1U) - (T1 >> 1U);
+
+ /* Read yb (real), xb(imag) input */
+ T0 = pSrc16[i1 * 2U];
+ T1 = pSrc16[(i1 * 2U) + 1U];
+
+ /* writing the butterfly processed i0 + fftLen/4 sample */
+ /* xc' = (xa-xb+xc-xd) */
+ /* yc' = (ya-yb+yc-yd) */
+ pSrc16[i1 * 2U] = R0;
+ pSrc16[(i1 * 2U) + 1U] = R1;
+
+ /* Read yd (real), xd(imag) input */
+ U0 = pSrc16[i3 * 2U];
+ U1 = pSrc16[(i3 * 2U) + 1U];
+ /* T0 = (yb - yd), T1 = (xb - xd) */
+ T0 = __SSAT(T0 - U0, 16U);
+ T1 = __SSAT(T1 - U1, 16U);
+
+ /* writing the butterfly processed i0 + fftLen/2 sample */
+ /* xb' = (xa-yb-xc+yd) */
+ /* yb' = (ya+xb-yc-xd) */
+ pSrc16[i2 * 2U] = (S0 >> 1U) - (T1 >> 1U);
+ pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U);
+
+
+ /* writing the butterfly processed i0 + 3fftLen/4 sample */
+ /* xd' = (xa+yb-xc-yd) */
+ /* yd' = (ya-xb-yc+xd) */
+ pSrc16[i3 * 2U] = (S0 >> 1U) + (T1 >> 1U);
+ pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U);
+ }
+ /* end of last stage process */
+
+ /* output is in 11.5(q5) format for the 1024 point */
+ /* output is in 9.7(q7) format for the 256 point */
+ /* output is in 7.9(q9) format for the 64 point */
+ /* output is in 5.11(q11) format for the 16 point */
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+}
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q31.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q31.c index 46c1e47..b7ea7e5 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q31.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q31.c @@ -1,827 +1,827 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_cfft_radix4_q31.c - * Description: This file has function definition of Radix-4 FFT & IFFT function and - * In-place bit reversal using bit reversal table - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" - -void arm_radix4_butterfly_inverse_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pCoef, - uint32_t twidCoefModifier); - -void arm_radix4_butterfly_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pCoef, - uint32_t twidCoefModifier); - -void arm_bitreversal_q31( - q31_t * pSrc, - uint32_t fftLen, - uint16_t bitRevFactor, - const uint16_t * pBitRevTab); - -/** - @ingroup groupTransforms - */ - -/** - @addtogroup ComplexFFT - @{ - */ - -/** - @brief Processing function for the Q31 CFFT/CIFFT. - @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future. - @param[in] S points to an instance of the Q31 CFFT/CIFFT structure - @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place - @return none - - @par Input and output formats: - Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. - Hence the output format is different for different FFT sizes. - The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT: - @par - \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT" - \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT" - */ - -void arm_cfft_radix4_q31( - const arm_cfft_radix4_instance_q31 * S, - q31_t * pSrc) -{ - if (S->ifftFlag == 1U) - { - /* Complex IFFT radix-4 */ - arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier); - } - else - { - /* Complex FFT radix-4 */ - arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier); - } - - if (S->bitReverseFlag == 1U) - { - /* Bit Reversal */ - arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); - } - -} - -/** - @} end of ComplexFFT group - */ - -/* - * Radix-4 FFT algorithm used is : - * - * Input real and imaginary data: - * x(n) = xa + j * ya - * x(n+N/4 ) = xb + j * yb - * x(n+N/2 ) = xc + j * yc - * x(n+3N 4) = xd + j * yd - * - * - * Output real and imaginary data: - * x(4r) = xa'+ j * ya' - * x(4r+1) = xb'+ j * yb' - * x(4r+2) = xc'+ j * yc' - * x(4r+3) = xd'+ j * yd' - * - * - * Twiddle factors for radix-4 FFT: - * Wn = co1 + j * (- si1) - * W2n = co2 + j * (- si2) - * W3n = co3 + j * (- si3) - * - * Butterfly implementation: - * xa' = xa + xb + xc + xd - * ya' = ya + yb + yc + yd - * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) - * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) - * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) - * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) - * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) - * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) - * - */ - -/** - @brief Core function for the Q31 CFFT butterfly process. - @param[in,out] pSrc points to the in-place buffer of Q31 data type. - @param[in] fftLen length of the FFT. - @param[in] pCoef points to twiddle coefficient buffer. - @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. - @return none - */ - -void arm_radix4_butterfly_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pCoef, - uint32_t twidCoefModifier) -{ - uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; - q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; - - q31_t xa, xb, xc, xd; - q31_t ya, yb, yc, yd; - q31_t xa_out, xb_out, xc_out, xd_out; - q31_t ya_out, yb_out, yc_out, yd_out; - - q31_t *ptr1; - - /* Total process is divided into three stages */ - - /* process first stage, middle stages, & last stage */ - - - /* start of first stage process */ - - /* Initializations for the first stage */ - n2 = fftLen; - n1 = n2; - /* n2 = fftLen/4 */ - n2 >>= 2U; - i0 = 0U; - ia1 = 0U; - - j = n2; - - /* Calculation of first stage */ - do - { - /* index calculation for the input as, */ - /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */ - i1 = i0 + n2; - i2 = i1 + n2; - i3 = i2 + n2; - - /* input is in 1.31(q31) format and provide 4 guard bits for the input */ - - /* Butterfly implementation */ - /* xa + xc */ - r1 = (pSrc[(2U * i0)] >> 4U) + (pSrc[(2U * i2)] >> 4U); - /* xa - xc */ - r2 = (pSrc[(2U * i0)] >> 4U) - (pSrc[(2U * i2)] >> 4U); - - /* xb + xd */ - t1 = (pSrc[(2U * i1)] >> 4U) + (pSrc[(2U * i3)] >> 4U); - - /* ya + yc */ - s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U); - /* ya - yc */ - s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U); - - /* xa' = xa + xb + xc + xd */ - pSrc[2U * i0] = (r1 + t1); - /* (xa + xc) - (xb + xd) */ - r1 = r1 - t1; - /* yb + yd */ - t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U); - - /* ya' = ya + yb + yc + yd */ - pSrc[(2U * i0) + 1U] = (s1 + t2); - - /* (ya + yc) - (yb + yd) */ - s1 = s1 - t2; - - /* yb - yd */ - t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U); - /* xb - xd */ - t2 = (pSrc[(2U * i1)] >> 4U) - (pSrc[(2U * i3)] >> 4U); - - /* index calculation for the coefficients */ - ia2 = 2U * ia1; - co2 = pCoef[(ia2 * 2U)]; - si2 = pCoef[(ia2 * 2U) + 1U]; - - /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ - pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + - ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U; - - /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ - pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - - ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U; - - /* (xa - xc) + (yb - yd) */ - r1 = r2 + t1; - /* (xa - xc) - (yb - yd) */ - r2 = r2 - t1; - - /* (ya - yc) - (xb - xd) */ - s1 = s2 - t2; - /* (ya - yc) + (xb - xd) */ - s2 = s2 + t2; - - co1 = pCoef[(ia1 * 2U)]; - si1 = pCoef[(ia1 * 2U) + 1U]; - - /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ - pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + - ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U; - - /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ - pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - - ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U; - - /* index calculation for the coefficients */ - ia3 = 3U * ia1; - co3 = pCoef[(ia3 * 2U)]; - si3 = pCoef[(ia3 * 2U) + 1U]; - - /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ - pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + - ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U; - - /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ - pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - - ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U; - - /* Twiddle coefficients index modifier */ - ia1 = ia1 + twidCoefModifier; - - /* Updating input index */ - i0 = i0 + 1U; - - } while (--j); - - /* end of first stage process */ - - /* data is in 5.27(q27) format */ - - - /* start of Middle stages process */ - - - /* each stage in middle stages provides two down scaling of the input */ - - twidCoefModifier <<= 2U; - - - for (k = fftLen / 4U; k > 4U; k >>= 2U) - { - /* Initializations for the first stage */ - n1 = n2; - n2 >>= 2U; - ia1 = 0U; - - /* Calculation of first stage */ - for (j = 0U; j <= (n2 - 1U); j++) - { - /* index calculation for the coefficients */ - ia2 = ia1 + ia1; - ia3 = ia2 + ia1; - co1 = pCoef[(ia1 * 2U)]; - si1 = pCoef[(ia1 * 2U) + 1U]; - co2 = pCoef[(ia2 * 2U)]; - si2 = pCoef[(ia2 * 2U) + 1U]; - co3 = pCoef[(ia3 * 2U)]; - si3 = pCoef[(ia3 * 2U) + 1U]; - /* Twiddle coefficients index modifier */ - ia1 = ia1 + twidCoefModifier; - - for (i0 = j; i0 < fftLen; i0 += n1) - { - /* index calculation for the input as, */ - /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */ - i1 = i0 + n2; - i2 = i1 + n2; - i3 = i2 + n2; - - /* Butterfly implementation */ - /* xa + xc */ - r1 = pSrc[2U * i0] + pSrc[2U * i2]; - /* xa - xc */ - r2 = pSrc[2U * i0] - pSrc[2U * i2]; - - /* ya + yc */ - s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U]; - /* ya - yc */ - s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U]; - - /* xb + xd */ - t1 = pSrc[2U * i1] + pSrc[2U * i3]; - - /* xa' = xa + xb + xc + xd */ - pSrc[2U * i0] = (r1 + t1) >> 2U; - /* xa + xc -(xb + xd) */ - r1 = r1 - t1; - - /* yb + yd */ - t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U]; - /* ya' = ya + yb + yc + yd */ - pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U; - - /* (ya + yc) - (yb + yd) */ - s1 = s1 - t2; - - /* (yb - yd) */ - t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U]; - /* (xb - xd) */ - t2 = pSrc[2U * i1] - pSrc[2U * i3]; - - /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ - pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + - ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1U; - - /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ - pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - - ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1U; - - /* (xa - xc) + (yb - yd) */ - r1 = r2 + t1; - /* (xa - xc) - (yb - yd) */ - r2 = r2 - t1; - - /* (ya - yc) - (xb - xd) */ - s1 = s2 - t2; - /* (ya - yc) + (xb - xd) */ - s2 = s2 + t2; - - /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ - pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + - ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U; - - /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ - pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - - ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U; - - /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ - pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + - ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U; - - /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ - pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - - ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U; - } - } - twidCoefModifier <<= 2U; - } - - /* End of Middle stages process */ - - /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ - /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ - /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ - /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ - - - /* start of Last stage process */ - /* Initializations for the last stage */ - j = fftLen >> 2; - ptr1 = &pSrc[0]; - - /* Calculations of last stage */ - do - { - /* Read xa (real), ya(imag) input */ - xa = *ptr1++; - ya = *ptr1++; - - /* Read xb (real), yb(imag) input */ - xb = *ptr1++; - yb = *ptr1++; - - /* Read xc (real), yc(imag) input */ - xc = *ptr1++; - yc = *ptr1++; - - /* Read xc (real), yc(imag) input */ - xd = *ptr1++; - yd = *ptr1++; - - /* xa' = xa + xb + xc + xd */ - xa_out = xa + xb + xc + xd; - - /* ya' = ya + yb + yc + yd */ - ya_out = ya + yb + yc + yd; - - /* pointer updation for writing */ - ptr1 = ptr1 - 8U; - - /* writing xa' and ya' */ - *ptr1++ = xa_out; - *ptr1++ = ya_out; - - xc_out = (xa - xb + xc - xd); - yc_out = (ya - yb + yc - yd); - - /* writing xc' and yc' */ - *ptr1++ = xc_out; - *ptr1++ = yc_out; - - xb_out = (xa + yb - xc - yd); - yb_out = (ya - xb - yc + xd); - - /* writing xb' and yb' */ - *ptr1++ = xb_out; - *ptr1++ = yb_out; - - xd_out = (xa - yb - xc + yd); - yd_out = (ya + xb - yc - xd); - - /* writing xd' and yd' */ - *ptr1++ = xd_out; - *ptr1++ = yd_out; - - - } while (--j); - - /* output is in 11.21(q21) format for the 1024 point */ - /* output is in 9.23(q23) format for the 256 point */ - /* output is in 7.25(q25) format for the 64 point */ - /* output is in 5.27(q27) format for the 16 point */ - - /* End of last stage process */ - -} - - -/** - @brief Core function for the Q31 CIFFT butterfly process. - @param[in,out] pSrc points to the in-place buffer of Q31 data type. - @param[in] fftLen length of the FFT. - @param[in] pCoef points to twiddle coefficient buffer. - @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. - @return none - */ - -/* - * Radix-4 IFFT algorithm used is : - * - * CIFFT uses same twiddle coefficients as CFFT Function - * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4] - * - * - * IFFT is implemented with following changes in equations from FFT - * - * Input real and imaginary data: - * x(n) = xa + j * ya - * x(n+N/4 ) = xb + j * yb - * x(n+N/2 ) = xc + j * yc - * x(n+3N 4) = xd + j * yd - * - * - * Output real and imaginary data: - * x(4r) = xa'+ j * ya' - * x(4r+1) = xb'+ j * yb' - * x(4r+2) = xc'+ j * yc' - * x(4r+3) = xd'+ j * yd' - * - * - * Twiddle factors for radix-4 IFFT: - * Wn = co1 + j * (si1) - * W2n = co2 + j * (si2) - * W3n = co3 + j * (si3) - - * The real and imaginary output values for the radix-4 butterfly are - * xa' = xa + xb + xc + xd - * ya' = ya + yb + yc + yd - * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) - * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) - * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) - * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) - * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) - * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) - * - */ - -void arm_radix4_butterfly_inverse_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pCoef, - uint32_t twidCoefModifier) -{ - uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; - q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; - q31_t xa, xb, xc, xd; - q31_t ya, yb, yc, yd; - q31_t xa_out, xb_out, xc_out, xd_out; - q31_t ya_out, yb_out, yc_out, yd_out; - - q31_t *ptr1; - - /* input is be 1.31(q31) format for all FFT sizes */ - /* Total process is divided into three stages */ - /* process first stage, middle stages, & last stage */ - - /* Start of first stage process */ - - /* Initializations for the first stage */ - n2 = fftLen; - n1 = n2; - /* n2 = fftLen/4 */ - n2 >>= 2U; - i0 = 0U; - ia1 = 0U; - - j = n2; - - do - { - /* input is in 1.31(q31) format and provide 4 guard bits for the input */ - - /* index calculation for the input as, */ - /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */ - i1 = i0 + n2; - i2 = i1 + n2; - i3 = i2 + n2; - - /* Butterfly implementation */ - /* xa + xc */ - r1 = (pSrc[2U * i0] >> 4U) + (pSrc[2U * i2] >> 4U); - /* xa - xc */ - r2 = (pSrc[2U * i0] >> 4U) - (pSrc[2U * i2] >> 4U); - - /* xb + xd */ - t1 = (pSrc[2U * i1] >> 4U) + (pSrc[2U * i3] >> 4U); - - /* ya + yc */ - s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U); - /* ya - yc */ - s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U); - - /* xa' = xa + xb + xc + xd */ - pSrc[2U * i0] = (r1 + t1); - /* (xa + xc) - (xb + xd) */ - r1 = r1 - t1; - /* yb + yd */ - t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U); - /* ya' = ya + yb + yc + yd */ - pSrc[(2U * i0) + 1U] = (s1 + t2); - - /* (ya + yc) - (yb + yd) */ - s1 = s1 - t2; - - /* yb - yd */ - t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U); - /* xb - xd */ - t2 = (pSrc[2U * i1] >> 4U) - (pSrc[2U * i3] >> 4U); - - /* index calculation for the coefficients */ - ia2 = 2U * ia1; - co2 = pCoef[ia2 * 2U]; - si2 = pCoef[(ia2 * 2U) + 1U]; - - /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ - pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) - - ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U; - - /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ - pSrc[2U * i1 + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) + - ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U; - - /* (xa - xc) - (yb - yd) */ - r1 = r2 - t1; - /* (xa - xc) + (yb - yd) */ - r2 = r2 + t1; - - /* (ya - yc) + (xb - xd) */ - s1 = s2 + t2; - /* (ya - yc) - (xb - xd) */ - s2 = s2 - t2; - - co1 = pCoef[ia1 * 2U]; - si1 = pCoef[(ia1 * 2U) + 1U]; - - /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ - pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - - ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U; - - /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ - pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + - ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U; - - /* index calculation for the coefficients */ - ia3 = 3U * ia1; - co3 = pCoef[ia3 * 2U]; - si3 = pCoef[(ia3 * 2U) + 1U]; - - /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ - pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - - ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U; - - /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ - pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + - ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U; - - /* Twiddle coefficients index modifier */ - ia1 = ia1 + twidCoefModifier; - - /* Updating input index */ - i0 = i0 + 1U; - - } while (--j); - - /* data is in 5.27(q27) format */ - /* each stage provides two down scaling of the input */ - - - /* Start of Middle stages process */ - - twidCoefModifier <<= 2U; - - /* Calculation of second stage to excluding last stage */ - for (k = fftLen / 4U; k > 4U; k >>= 2U) - { - /* Initializations for the first stage */ - n1 = n2; - n2 >>= 2U; - ia1 = 0U; - - for (j = 0; j <= (n2 - 1U); j++) - { - /* index calculation for the coefficients */ - ia2 = ia1 + ia1; - ia3 = ia2 + ia1; - co1 = pCoef[(ia1 * 2U)]; - si1 = pCoef[(ia1 * 2U) + 1U]; - co2 = pCoef[(ia2 * 2U)]; - si2 = pCoef[(ia2 * 2U) + 1U]; - co3 = pCoef[(ia3 * 2U)]; - si3 = pCoef[(ia3 * 2U) + 1U]; - /* Twiddle coefficients index modifier */ - ia1 = ia1 + twidCoefModifier; - - for (i0 = j; i0 < fftLen; i0 += n1) - { - /* index calculation for the input as, */ - /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */ - i1 = i0 + n2; - i2 = i1 + n2; - i3 = i2 + n2; - - /* Butterfly implementation */ - /* xa + xc */ - r1 = pSrc[2U * i0] + pSrc[2U * i2]; - /* xa - xc */ - r2 = pSrc[2U * i0] - pSrc[2U * i2]; - - /* ya + yc */ - s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U]; - /* ya - yc */ - s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U]; - - /* xb + xd */ - t1 = pSrc[2U * i1] + pSrc[2U * i3]; - - /* xa' = xa + xb + xc + xd */ - pSrc[2U * i0] = (r1 + t1) >> 2U; - /* xa + xc -(xb + xd) */ - r1 = r1 - t1; - /* yb + yd */ - t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U]; - /* ya' = ya + yb + yc + yd */ - pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U; - - /* (ya + yc) - (yb + yd) */ - s1 = s1 - t2; - - /* (yb - yd) */ - t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U]; - /* (xb - xd) */ - t2 = pSrc[2U * i1] - pSrc[2U * i3]; - - /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ - pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32U)) - - ((int32_t) (((q63_t) s1 * si2) >> 32U))) >> 1U; - - /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ - pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32U)) + - ((int32_t) (((q63_t) r1 * si2) >> 32U))) >> 1U; - - /* (xa - xc) - (yb - yd) */ - r1 = r2 - t1; - /* (xa - xc) + (yb - yd) */ - r2 = r2 + t1; - - /* (ya - yc) + (xb - xd) */ - s1 = s2 + t2; - /* (ya - yc) - (xb - xd) */ - s2 = s2 - t2; - - /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ - pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - - ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U; - - /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ - pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + - ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U; - - /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ - pSrc[(2U * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - - ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U; - - /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ - pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + - ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U; - } - } - twidCoefModifier <<= 2U; - } - - /* End of Middle stages process */ - - /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ - /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ - /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ - /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ - - - /* Start of last stage process */ - - - /* Initializations for the last stage */ - j = fftLen >> 2; - ptr1 = &pSrc[0]; - - /* Calculations of last stage */ - do - { - /* Read xa (real), ya(imag) input */ - xa = *ptr1++; - ya = *ptr1++; - - /* Read xb (real), yb(imag) input */ - xb = *ptr1++; - yb = *ptr1++; - - /* Read xc (real), yc(imag) input */ - xc = *ptr1++; - yc = *ptr1++; - - /* Read xc (real), yc(imag) input */ - xd = *ptr1++; - yd = *ptr1++; - - /* xa' = xa + xb + xc + xd */ - xa_out = xa + xb + xc + xd; - - /* ya' = ya + yb + yc + yd */ - ya_out = ya + yb + yc + yd; - - /* pointer updation for writing */ - ptr1 = ptr1 - 8U; - - /* writing xa' and ya' */ - *ptr1++ = xa_out; - *ptr1++ = ya_out; - - xc_out = (xa - xb + xc - xd); - yc_out = (ya - yb + yc - yd); - - /* writing xc' and yc' */ - *ptr1++ = xc_out; - *ptr1++ = yc_out; - - xb_out = (xa - yb - xc + yd); - yb_out = (ya + xb - yc - xd); - - /* writing xb' and yb' */ - *ptr1++ = xb_out; - *ptr1++ = yb_out; - - xd_out = (xa + yb - xc - yd); - yd_out = (ya - xb - yc + xd); - - /* writing xd' and yd' */ - *ptr1++ = xd_out; - *ptr1++ = yd_out; - - } while (--j); - - /* output is in 11.21(q21) format for the 1024 point */ - /* output is in 9.23(q23) format for the 256 point */ - /* output is in 7.25(q25) format for the 64 point */ - /* output is in 5.27(q27) format for the 16 point */ - - /* End of last stage process */ -} +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cfft_radix4_q31.c
+ * Description: This file has function definition of Radix-4 FFT & IFFT function and
+ * In-place bit reversal using bit reversal table
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+void arm_radix4_butterfly_inverse_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pCoef,
+ uint32_t twidCoefModifier);
+
+void arm_radix4_butterfly_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pCoef,
+ uint32_t twidCoefModifier);
+
+void arm_bitreversal_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ uint16_t bitRevFactor,
+ const uint16_t * pBitRevTab);
+
+/**
+ @ingroup groupTransforms
+ */
+
+/**
+ @addtogroup ComplexFFT
+ @{
+ */
+
+/**
+ @brief Processing function for the Q31 CFFT/CIFFT.
+ @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
+ @param[in] S points to an instance of the Q31 CFFT/CIFFT structure
+ @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
+ @return none
+
+ @par Input and output formats:
+ Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
+ Hence the output format is different for different FFT sizes.
+ The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
+ @par
+ \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"
+ \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"
+ */
+
+void arm_cfft_radix4_q31(
+ const arm_cfft_radix4_instance_q31 * S,
+ q31_t * pSrc)
+{
+ if (S->ifftFlag == 1U)
+ {
+ /* Complex IFFT radix-4 */
+ arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
+ }
+ else
+ {
+ /* Complex FFT radix-4 */
+ arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
+ }
+
+ if (S->bitReverseFlag == 1U)
+ {
+ /* Bit Reversal */
+ arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
+ }
+
+}
+
+/**
+ @} end of ComplexFFT group
+ */
+
+/*
+ * Radix-4 FFT algorithm used is :
+ *
+ * Input real and imaginary data:
+ * x(n) = xa + j * ya
+ * x(n+N/4 ) = xb + j * yb
+ * x(n+N/2 ) = xc + j * yc
+ * x(n+3N 4) = xd + j * yd
+ *
+ *
+ * Output real and imaginary data:
+ * x(4r) = xa'+ j * ya'
+ * x(4r+1) = xb'+ j * yb'
+ * x(4r+2) = xc'+ j * yc'
+ * x(4r+3) = xd'+ j * yd'
+ *
+ *
+ * Twiddle factors for radix-4 FFT:
+ * Wn = co1 + j * (- si1)
+ * W2n = co2 + j * (- si2)
+ * W3n = co3 + j * (- si3)
+ *
+ * Butterfly implementation:
+ * xa' = xa + xb + xc + xd
+ * ya' = ya + yb + yc + yd
+ * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
+ * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
+ * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
+ * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
+ * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
+ * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
+ *
+ */
+
+/**
+ @brief Core function for the Q31 CFFT butterfly process.
+ @param[in,out] pSrc points to the in-place buffer of Q31 data type.
+ @param[in] fftLen length of the FFT.
+ @param[in] pCoef points to twiddle coefficient buffer.
+ @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
+ @return none
+ */
+
+void arm_radix4_butterfly_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pCoef,
+ uint32_t twidCoefModifier)
+{
+ uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
+ q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
+
+ q31_t xa, xb, xc, xd;
+ q31_t ya, yb, yc, yd;
+ q31_t xa_out, xb_out, xc_out, xd_out;
+ q31_t ya_out, yb_out, yc_out, yd_out;
+
+ q31_t *ptr1;
+
+ /* Total process is divided into three stages */
+
+ /* process first stage, middle stages, & last stage */
+
+
+ /* start of first stage process */
+
+ /* Initializations for the first stage */
+ n2 = fftLen;
+ n1 = n2;
+ /* n2 = fftLen/4 */
+ n2 >>= 2U;
+ i0 = 0U;
+ ia1 = 0U;
+
+ j = n2;
+
+ /* Calculation of first stage */
+ do
+ {
+ /* index calculation for the input as, */
+ /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
+ i1 = i0 + n2;
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+
+ /* input is in 1.31(q31) format and provide 4 guard bits for the input */
+
+ /* Butterfly implementation */
+ /* xa + xc */
+ r1 = (pSrc[(2U * i0)] >> 4U) + (pSrc[(2U * i2)] >> 4U);
+ /* xa - xc */
+ r2 = (pSrc[(2U * i0)] >> 4U) - (pSrc[(2U * i2)] >> 4U);
+
+ /* xb + xd */
+ t1 = (pSrc[(2U * i1)] >> 4U) + (pSrc[(2U * i3)] >> 4U);
+
+ /* ya + yc */
+ s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U);
+ /* ya - yc */
+ s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U);
+
+ /* xa' = xa + xb + xc + xd */
+ pSrc[2U * i0] = (r1 + t1);
+ /* (xa + xc) - (xb + xd) */
+ r1 = r1 - t1;
+ /* yb + yd */
+ t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U);
+
+ /* ya' = ya + yb + yc + yd */
+ pSrc[(2U * i0) + 1U] = (s1 + t2);
+
+ /* (ya + yc) - (yb + yd) */
+ s1 = s1 - t2;
+
+ /* yb - yd */
+ t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U);
+ /* xb - xd */
+ t2 = (pSrc[(2U * i1)] >> 4U) - (pSrc[(2U * i3)] >> 4U);
+
+ /* index calculation for the coefficients */
+ ia2 = 2U * ia1;
+ co2 = pCoef[(ia2 * 2U)];
+ si2 = pCoef[(ia2 * 2U) + 1U];
+
+ /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
+ pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
+ ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U;
+
+ /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
+ pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
+ ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U;
+
+ /* (xa - xc) + (yb - yd) */
+ r1 = r2 + t1;
+ /* (xa - xc) - (yb - yd) */
+ r2 = r2 - t1;
+
+ /* (ya - yc) - (xb - xd) */
+ s1 = s2 - t2;
+ /* (ya - yc) + (xb - xd) */
+ s2 = s2 + t2;
+
+ co1 = pCoef[(ia1 * 2U)];
+ si1 = pCoef[(ia1 * 2U) + 1U];
+
+ /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
+ pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
+ ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U;
+
+ /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
+ pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
+ ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U;
+
+ /* index calculation for the coefficients */
+ ia3 = 3U * ia1;
+ co3 = pCoef[(ia3 * 2U)];
+ si3 = pCoef[(ia3 * 2U) + 1U];
+
+ /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
+ pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
+ ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U;
+
+ /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
+ pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
+ ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U;
+
+ /* Twiddle coefficients index modifier */
+ ia1 = ia1 + twidCoefModifier;
+
+ /* Updating input index */
+ i0 = i0 + 1U;
+
+ } while (--j);
+
+ /* end of first stage process */
+
+ /* data is in 5.27(q27) format */
+
+
+ /* start of Middle stages process */
+
+
+ /* each stage in middle stages provides two down scaling of the input */
+
+ twidCoefModifier <<= 2U;
+
+
+ for (k = fftLen / 4U; k > 4U; k >>= 2U)
+ {
+ /* Initializations for the first stage */
+ n1 = n2;
+ n2 >>= 2U;
+ ia1 = 0U;
+
+ /* Calculation of first stage */
+ for (j = 0U; j <= (n2 - 1U); j++)
+ {
+ /* index calculation for the coefficients */
+ ia2 = ia1 + ia1;
+ ia3 = ia2 + ia1;
+ co1 = pCoef[(ia1 * 2U)];
+ si1 = pCoef[(ia1 * 2U) + 1U];
+ co2 = pCoef[(ia2 * 2U)];
+ si2 = pCoef[(ia2 * 2U) + 1U];
+ co3 = pCoef[(ia3 * 2U)];
+ si3 = pCoef[(ia3 * 2U) + 1U];
+ /* Twiddle coefficients index modifier */
+ ia1 = ia1 + twidCoefModifier;
+
+ for (i0 = j; i0 < fftLen; i0 += n1)
+ {
+ /* index calculation for the input as, */
+ /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
+ i1 = i0 + n2;
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+
+ /* Butterfly implementation */
+ /* xa + xc */
+ r1 = pSrc[2U * i0] + pSrc[2U * i2];
+ /* xa - xc */
+ r2 = pSrc[2U * i0] - pSrc[2U * i2];
+
+ /* ya + yc */
+ s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
+ /* ya - yc */
+ s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
+
+ /* xb + xd */
+ t1 = pSrc[2U * i1] + pSrc[2U * i3];
+
+ /* xa' = xa + xb + xc + xd */
+ pSrc[2U * i0] = (r1 + t1) >> 2U;
+ /* xa + xc -(xb + xd) */
+ r1 = r1 - t1;
+
+ /* yb + yd */
+ t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
+ /* ya' = ya + yb + yc + yd */
+ pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U;
+
+ /* (ya + yc) - (yb + yd) */
+ s1 = s1 - t2;
+
+ /* (yb - yd) */
+ t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
+ /* (xb - xd) */
+ t2 = pSrc[2U * i1] - pSrc[2U * i3];
+
+ /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
+ pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
+ ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1U;
+
+ /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
+ pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
+ ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1U;
+
+ /* (xa - xc) + (yb - yd) */
+ r1 = r2 + t1;
+ /* (xa - xc) - (yb - yd) */
+ r2 = r2 - t1;
+
+ /* (ya - yc) - (xb - xd) */
+ s1 = s2 - t2;
+ /* (ya - yc) + (xb - xd) */
+ s2 = s2 + t2;
+
+ /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
+ pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
+ ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U;
+
+ /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
+ pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
+ ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U;
+
+ /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
+ pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
+ ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U;
+
+ /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
+ pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
+ ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U;
+ }
+ }
+ twidCoefModifier <<= 2U;
+ }
+
+ /* End of Middle stages process */
+
+ /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
+ /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
+ /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
+ /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
+
+
+ /* start of Last stage process */
+ /* Initializations for the last stage */
+ j = fftLen >> 2;
+ ptr1 = &pSrc[0];
+
+ /* Calculations of last stage */
+ do
+ {
+ /* Read xa (real), ya(imag) input */
+ xa = *ptr1++;
+ ya = *ptr1++;
+
+ /* Read xb (real), yb(imag) input */
+ xb = *ptr1++;
+ yb = *ptr1++;
+
+ /* Read xc (real), yc(imag) input */
+ xc = *ptr1++;
+ yc = *ptr1++;
+
+ /* Read xc (real), yc(imag) input */
+ xd = *ptr1++;
+ yd = *ptr1++;
+
+ /* xa' = xa + xb + xc + xd */
+ xa_out = xa + xb + xc + xd;
+
+ /* ya' = ya + yb + yc + yd */
+ ya_out = ya + yb + yc + yd;
+
+ /* pointer updation for writing */
+ ptr1 = ptr1 - 8U;
+
+ /* writing xa' and ya' */
+ *ptr1++ = xa_out;
+ *ptr1++ = ya_out;
+
+ xc_out = (xa - xb + xc - xd);
+ yc_out = (ya - yb + yc - yd);
+
+ /* writing xc' and yc' */
+ *ptr1++ = xc_out;
+ *ptr1++ = yc_out;
+
+ xb_out = (xa + yb - xc - yd);
+ yb_out = (ya - xb - yc + xd);
+
+ /* writing xb' and yb' */
+ *ptr1++ = xb_out;
+ *ptr1++ = yb_out;
+
+ xd_out = (xa - yb - xc + yd);
+ yd_out = (ya + xb - yc - xd);
+
+ /* writing xd' and yd' */
+ *ptr1++ = xd_out;
+ *ptr1++ = yd_out;
+
+
+ } while (--j);
+
+ /* output is in 11.21(q21) format for the 1024 point */
+ /* output is in 9.23(q23) format for the 256 point */
+ /* output is in 7.25(q25) format for the 64 point */
+ /* output is in 5.27(q27) format for the 16 point */
+
+ /* End of last stage process */
+
+}
+
+
+/**
+ @brief Core function for the Q31 CIFFT butterfly process.
+ @param[in,out] pSrc points to the in-place buffer of Q31 data type.
+ @param[in] fftLen length of the FFT.
+ @param[in] pCoef points to twiddle coefficient buffer.
+ @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
+ @return none
+ */
+
+/*
+ * Radix-4 IFFT algorithm used is :
+ *
+ * CIFFT uses same twiddle coefficients as CFFT Function
+ * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
+ *
+ *
+ * IFFT is implemented with following changes in equations from FFT
+ *
+ * Input real and imaginary data:
+ * x(n) = xa + j * ya
+ * x(n+N/4 ) = xb + j * yb
+ * x(n+N/2 ) = xc + j * yc
+ * x(n+3N 4) = xd + j * yd
+ *
+ *
+ * Output real and imaginary data:
+ * x(4r) = xa'+ j * ya'
+ * x(4r+1) = xb'+ j * yb'
+ * x(4r+2) = xc'+ j * yc'
+ * x(4r+3) = xd'+ j * yd'
+ *
+ *
+ * Twiddle factors for radix-4 IFFT:
+ * Wn = co1 + j * (si1)
+ * W2n = co2 + j * (si2)
+ * W3n = co3 + j * (si3)
+
+ * The real and imaginary output values for the radix-4 butterfly are
+ * xa' = xa + xb + xc + xd
+ * ya' = ya + yb + yc + yd
+ * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
+ * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
+ * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
+ * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
+ * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
+ * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
+ *
+ */
+
+void arm_radix4_butterfly_inverse_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pCoef,
+ uint32_t twidCoefModifier)
+{
+ uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
+ q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
+ q31_t xa, xb, xc, xd;
+ q31_t ya, yb, yc, yd;
+ q31_t xa_out, xb_out, xc_out, xd_out;
+ q31_t ya_out, yb_out, yc_out, yd_out;
+
+ q31_t *ptr1;
+
+ /* input is be 1.31(q31) format for all FFT sizes */
+ /* Total process is divided into three stages */
+ /* process first stage, middle stages, & last stage */
+
+ /* Start of first stage process */
+
+ /* Initializations for the first stage */
+ n2 = fftLen;
+ n1 = n2;
+ /* n2 = fftLen/4 */
+ n2 >>= 2U;
+ i0 = 0U;
+ ia1 = 0U;
+
+ j = n2;
+
+ do
+ {
+ /* input is in 1.31(q31) format and provide 4 guard bits for the input */
+
+ /* index calculation for the input as, */
+ /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
+ i1 = i0 + n2;
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+
+ /* Butterfly implementation */
+ /* xa + xc */
+ r1 = (pSrc[2U * i0] >> 4U) + (pSrc[2U * i2] >> 4U);
+ /* xa - xc */
+ r2 = (pSrc[2U * i0] >> 4U) - (pSrc[2U * i2] >> 4U);
+
+ /* xb + xd */
+ t1 = (pSrc[2U * i1] >> 4U) + (pSrc[2U * i3] >> 4U);
+
+ /* ya + yc */
+ s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U);
+ /* ya - yc */
+ s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U);
+
+ /* xa' = xa + xb + xc + xd */
+ pSrc[2U * i0] = (r1 + t1);
+ /* (xa + xc) - (xb + xd) */
+ r1 = r1 - t1;
+ /* yb + yd */
+ t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U);
+ /* ya' = ya + yb + yc + yd */
+ pSrc[(2U * i0) + 1U] = (s1 + t2);
+
+ /* (ya + yc) - (yb + yd) */
+ s1 = s1 - t2;
+
+ /* yb - yd */
+ t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U);
+ /* xb - xd */
+ t2 = (pSrc[2U * i1] >> 4U) - (pSrc[2U * i3] >> 4U);
+
+ /* index calculation for the coefficients */
+ ia2 = 2U * ia1;
+ co2 = pCoef[ia2 * 2U];
+ si2 = pCoef[(ia2 * 2U) + 1U];
+
+ /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
+ pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
+ ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U;
+
+ /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
+ pSrc[2U * i1 + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
+ ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U;
+
+ /* (xa - xc) - (yb - yd) */
+ r1 = r2 - t1;
+ /* (xa - xc) + (yb - yd) */
+ r2 = r2 + t1;
+
+ /* (ya - yc) + (xb - xd) */
+ s1 = s2 + t2;
+ /* (ya - yc) - (xb - xd) */
+ s2 = s2 - t2;
+
+ co1 = pCoef[ia1 * 2U];
+ si1 = pCoef[(ia1 * 2U) + 1U];
+
+ /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
+ pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
+ ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U;
+
+ /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
+ pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
+ ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U;
+
+ /* index calculation for the coefficients */
+ ia3 = 3U * ia1;
+ co3 = pCoef[ia3 * 2U];
+ si3 = pCoef[(ia3 * 2U) + 1U];
+
+ /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
+ pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
+ ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U;
+
+ /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
+ pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
+ ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U;
+
+ /* Twiddle coefficients index modifier */
+ ia1 = ia1 + twidCoefModifier;
+
+ /* Updating input index */
+ i0 = i0 + 1U;
+
+ } while (--j);
+
+ /* data is in 5.27(q27) format */
+ /* each stage provides two down scaling of the input */
+
+
+ /* Start of Middle stages process */
+
+ twidCoefModifier <<= 2U;
+
+ /* Calculation of second stage to excluding last stage */
+ for (k = fftLen / 4U; k > 4U; k >>= 2U)
+ {
+ /* Initializations for the first stage */
+ n1 = n2;
+ n2 >>= 2U;
+ ia1 = 0U;
+
+ for (j = 0; j <= (n2 - 1U); j++)
+ {
+ /* index calculation for the coefficients */
+ ia2 = ia1 + ia1;
+ ia3 = ia2 + ia1;
+ co1 = pCoef[(ia1 * 2U)];
+ si1 = pCoef[(ia1 * 2U) + 1U];
+ co2 = pCoef[(ia2 * 2U)];
+ si2 = pCoef[(ia2 * 2U) + 1U];
+ co3 = pCoef[(ia3 * 2U)];
+ si3 = pCoef[(ia3 * 2U) + 1U];
+ /* Twiddle coefficients index modifier */
+ ia1 = ia1 + twidCoefModifier;
+
+ for (i0 = j; i0 < fftLen; i0 += n1)
+ {
+ /* index calculation for the input as, */
+ /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
+ i1 = i0 + n2;
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+
+ /* Butterfly implementation */
+ /* xa + xc */
+ r1 = pSrc[2U * i0] + pSrc[2U * i2];
+ /* xa - xc */
+ r2 = pSrc[2U * i0] - pSrc[2U * i2];
+
+ /* ya + yc */
+ s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
+ /* ya - yc */
+ s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
+
+ /* xb + xd */
+ t1 = pSrc[2U * i1] + pSrc[2U * i3];
+
+ /* xa' = xa + xb + xc + xd */
+ pSrc[2U * i0] = (r1 + t1) >> 2U;
+ /* xa + xc -(xb + xd) */
+ r1 = r1 - t1;
+ /* yb + yd */
+ t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
+ /* ya' = ya + yb + yc + yd */
+ pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U;
+
+ /* (ya + yc) - (yb + yd) */
+ s1 = s1 - t2;
+
+ /* (yb - yd) */
+ t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
+ /* (xb - xd) */
+ t2 = pSrc[2U * i1] - pSrc[2U * i3];
+
+ /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
+ pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32U)) -
+ ((int32_t) (((q63_t) s1 * si2) >> 32U))) >> 1U;
+
+ /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
+ pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32U)) +
+ ((int32_t) (((q63_t) r1 * si2) >> 32U))) >> 1U;
+
+ /* (xa - xc) - (yb - yd) */
+ r1 = r2 - t1;
+ /* (xa - xc) + (yb - yd) */
+ r2 = r2 + t1;
+
+ /* (ya - yc) + (xb - xd) */
+ s1 = s2 + t2;
+ /* (ya - yc) - (xb - xd) */
+ s2 = s2 - t2;
+
+ /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
+ pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
+ ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U;
+
+ /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
+ pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
+ ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U;
+
+ /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
+ pSrc[(2U * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
+ ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U;
+
+ /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
+ pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
+ ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U;
+ }
+ }
+ twidCoefModifier <<= 2U;
+ }
+
+ /* End of Middle stages process */
+
+ /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
+ /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
+ /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
+ /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
+
+
+ /* Start of last stage process */
+
+
+ /* Initializations for the last stage */
+ j = fftLen >> 2;
+ ptr1 = &pSrc[0];
+
+ /* Calculations of last stage */
+ do
+ {
+ /* Read xa (real), ya(imag) input */
+ xa = *ptr1++;
+ ya = *ptr1++;
+
+ /* Read xb (real), yb(imag) input */
+ xb = *ptr1++;
+ yb = *ptr1++;
+
+ /* Read xc (real), yc(imag) input */
+ xc = *ptr1++;
+ yc = *ptr1++;
+
+ /* Read xc (real), yc(imag) input */
+ xd = *ptr1++;
+ yd = *ptr1++;
+
+ /* xa' = xa + xb + xc + xd */
+ xa_out = xa + xb + xc + xd;
+
+ /* ya' = ya + yb + yc + yd */
+ ya_out = ya + yb + yc + yd;
+
+ /* pointer updation for writing */
+ ptr1 = ptr1 - 8U;
+
+ /* writing xa' and ya' */
+ *ptr1++ = xa_out;
+ *ptr1++ = ya_out;
+
+ xc_out = (xa - xb + xc - xd);
+ yc_out = (ya - yb + yc - yd);
+
+ /* writing xc' and yc' */
+ *ptr1++ = xc_out;
+ *ptr1++ = yc_out;
+
+ xb_out = (xa - yb - xc + yd);
+ yb_out = (ya + xb - yc - xd);
+
+ /* writing xb' and yb' */
+ *ptr1++ = xb_out;
+ *ptr1++ = yb_out;
+
+ xd_out = (xa + yb - xc - yd);
+ yd_out = (ya - xb - yc + xd);
+
+ /* writing xd' and yd' */
+ *ptr1++ = xd_out;
+ *ptr1++ = yd_out;
+
+ } while (--j);
+
+ /* output is in 11.21(q21) format for the 1024 point */
+ /* output is in 9.23(q23) format for the 256 point */
+ /* output is in 7.25(q25) format for the 64 point */
+ /* output is in 5.27(q27) format for the 16 point */
+
+ /* End of last stage process */
+}
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix8_f32.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix8_f32.c index 7990d47..ae74977 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix8_f32.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix8_f32.c @@ -1,285 +1,285 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_cfft_radix8_f32.c - * Description: Radix-8 Decimation in Frequency CFFT & CIFFT Floating point processing function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" - - -/* ---------------------------------------------------------------------- - * Internal helper function used by the FFTs - * -------------------------------------------------------------------- */ - -/** - brief Core function for the floating-point CFFT butterfly process. - param[in,out] pSrc points to the in-place buffer of floating-point data type. - param[in] fftLen length of the FFT. - param[in] pCoef points to the twiddle coefficient buffer. - param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. - return none -*/ - -void arm_radix8_butterfly_f32( - float32_t * pSrc, - uint16_t fftLen, - const float32_t * pCoef, - uint16_t twidCoefModifier) -{ - uint32_t ia1, ia2, ia3, ia4, ia5, ia6, ia7; - uint32_t i1, i2, i3, i4, i5, i6, i7, i8; - uint32_t id; - uint32_t n1, n2, j; - - float32_t r1, r2, r3, r4, r5, r6, r7, r8; - float32_t t1, t2; - float32_t s1, s2, s3, s4, s5, s6, s7, s8; - float32_t p1, p2, p3, p4; - float32_t co2, co3, co4, co5, co6, co7, co8; - float32_t si2, si3, si4, si5, si6, si7, si8; - const float32_t C81 = 0.70710678118f; - - n2 = fftLen; - - do - { - n1 = n2; - n2 = n2 >> 3; - i1 = 0; - - do - { - i2 = i1 + n2; - i3 = i2 + n2; - i4 = i3 + n2; - i5 = i4 + n2; - i6 = i5 + n2; - i7 = i6 + n2; - i8 = i7 + n2; - r1 = pSrc[2 * i1] + pSrc[2 * i5]; - r5 = pSrc[2 * i1] - pSrc[2 * i5]; - r2 = pSrc[2 * i2] + pSrc[2 * i6]; - r6 = pSrc[2 * i2] - pSrc[2 * i6]; - r3 = pSrc[2 * i3] + pSrc[2 * i7]; - r7 = pSrc[2 * i3] - pSrc[2 * i7]; - r4 = pSrc[2 * i4] + pSrc[2 * i8]; - r8 = pSrc[2 * i4] - pSrc[2 * i8]; - t1 = r1 - r3; - r1 = r1 + r3; - r3 = r2 - r4; - r2 = r2 + r4; - pSrc[2 * i1] = r1 + r2; - pSrc[2 * i5] = r1 - r2; - r1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1]; - s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1]; - r2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1]; - s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1]; - s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1]; - s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1]; - r4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1]; - s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1]; - t2 = r1 - s3; - r1 = r1 + s3; - s3 = r2 - r4; - r2 = r2 + r4; - pSrc[2 * i1 + 1] = r1 + r2; - pSrc[2 * i5 + 1] = r1 - r2; - pSrc[2 * i3] = t1 + s3; - pSrc[2 * i7] = t1 - s3; - pSrc[2 * i3 + 1] = t2 - r3; - pSrc[2 * i7 + 1] = t2 + r3; - r1 = (r6 - r8) * C81; - r6 = (r6 + r8) * C81; - r2 = (s6 - s8) * C81; - s6 = (s6 + s8) * C81; - t1 = r5 - r1; - r5 = r5 + r1; - r8 = r7 - r6; - r7 = r7 + r6; - t2 = s5 - r2; - s5 = s5 + r2; - s8 = s7 - s6; - s7 = s7 + s6; - pSrc[2 * i2] = r5 + s7; - pSrc[2 * i8] = r5 - s7; - pSrc[2 * i6] = t1 + s8; - pSrc[2 * i4] = t1 - s8; - pSrc[2 * i2 + 1] = s5 - r7; - pSrc[2 * i8 + 1] = s5 + r7; - pSrc[2 * i6 + 1] = t2 - r8; - pSrc[2 * i4 + 1] = t2 + r8; - - i1 += n1; - } while (i1 < fftLen); - - if (n2 < 8) - break; - - ia1 = 0; - j = 1; - - do - { - /* index calculation for the coefficients */ - id = ia1 + twidCoefModifier; - ia1 = id; - ia2 = ia1 + id; - ia3 = ia2 + id; - ia4 = ia3 + id; - ia5 = ia4 + id; - ia6 = ia5 + id; - ia7 = ia6 + id; - - co2 = pCoef[2 * ia1]; - co3 = pCoef[2 * ia2]; - co4 = pCoef[2 * ia3]; - co5 = pCoef[2 * ia4]; - co6 = pCoef[2 * ia5]; - co7 = pCoef[2 * ia6]; - co8 = pCoef[2 * ia7]; - si2 = pCoef[2 * ia1 + 1]; - si3 = pCoef[2 * ia2 + 1]; - si4 = pCoef[2 * ia3 + 1]; - si5 = pCoef[2 * ia4 + 1]; - si6 = pCoef[2 * ia5 + 1]; - si7 = pCoef[2 * ia6 + 1]; - si8 = pCoef[2 * ia7 + 1]; - - i1 = j; - - do - { - /* index calculation for the input */ - i2 = i1 + n2; - i3 = i2 + n2; - i4 = i3 + n2; - i5 = i4 + n2; - i6 = i5 + n2; - i7 = i6 + n2; - i8 = i7 + n2; - r1 = pSrc[2 * i1] + pSrc[2 * i5]; - r5 = pSrc[2 * i1] - pSrc[2 * i5]; - r2 = pSrc[2 * i2] + pSrc[2 * i6]; - r6 = pSrc[2 * i2] - pSrc[2 * i6]; - r3 = pSrc[2 * i3] + pSrc[2 * i7]; - r7 = pSrc[2 * i3] - pSrc[2 * i7]; - r4 = pSrc[2 * i4] + pSrc[2 * i8]; - r8 = pSrc[2 * i4] - pSrc[2 * i8]; - t1 = r1 - r3; - r1 = r1 + r3; - r3 = r2 - r4; - r2 = r2 + r4; - pSrc[2 * i1] = r1 + r2; - r2 = r1 - r2; - s1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1]; - s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1]; - s2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1]; - s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1]; - s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1]; - s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1]; - s4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1]; - s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1]; - t2 = s1 - s3; - s1 = s1 + s3; - s3 = s2 - s4; - s2 = s2 + s4; - r1 = t1 + s3; - t1 = t1 - s3; - pSrc[2 * i1 + 1] = s1 + s2; - s2 = s1 - s2; - s1 = t2 - r3; - t2 = t2 + r3; - p1 = co5 * r2; - p2 = si5 * s2; - p3 = co5 * s2; - p4 = si5 * r2; - pSrc[2 * i5] = p1 + p2; - pSrc[2 * i5 + 1] = p3 - p4; - p1 = co3 * r1; - p2 = si3 * s1; - p3 = co3 * s1; - p4 = si3 * r1; - pSrc[2 * i3] = p1 + p2; - pSrc[2 * i3 + 1] = p3 - p4; - p1 = co7 * t1; - p2 = si7 * t2; - p3 = co7 * t2; - p4 = si7 * t1; - pSrc[2 * i7] = p1 + p2; - pSrc[2 * i7 + 1] = p3 - p4; - r1 = (r6 - r8) * C81; - r6 = (r6 + r8) * C81; - s1 = (s6 - s8) * C81; - s6 = (s6 + s8) * C81; - t1 = r5 - r1; - r5 = r5 + r1; - r8 = r7 - r6; - r7 = r7 + r6; - t2 = s5 - s1; - s5 = s5 + s1; - s8 = s7 - s6; - s7 = s7 + s6; - r1 = r5 + s7; - r5 = r5 - s7; - r6 = t1 + s8; - t1 = t1 - s8; - s1 = s5 - r7; - s5 = s5 + r7; - s6 = t2 - r8; - t2 = t2 + r8; - p1 = co2 * r1; - p2 = si2 * s1; - p3 = co2 * s1; - p4 = si2 * r1; - pSrc[2 * i2] = p1 + p2; - pSrc[2 * i2 + 1] = p3 - p4; - p1 = co8 * r5; - p2 = si8 * s5; - p3 = co8 * s5; - p4 = si8 * r5; - pSrc[2 * i8] = p1 + p2; - pSrc[2 * i8 + 1] = p3 - p4; - p1 = co6 * r6; - p2 = si6 * s6; - p3 = co6 * s6; - p4 = si6 * r6; - pSrc[2 * i6] = p1 + p2; - pSrc[2 * i6 + 1] = p3 - p4; - p1 = co4 * t1; - p2 = si4 * t2; - p3 = co4 * t2; - p4 = si4 * t1; - pSrc[2 * i4] = p1 + p2; - pSrc[2 * i4 + 1] = p3 - p4; - - i1 += n1; - } while (i1 < fftLen); - - j++; - } while (j < n2); - - twidCoefModifier <<= 3; - } while (n2 > 7); -} +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cfft_radix8_f32.c
+ * Description: Radix-8 Decimation in Frequency CFFT & CIFFT Floating point processing function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+
+/* ----------------------------------------------------------------------
+ * Internal helper function used by the FFTs
+ * -------------------------------------------------------------------- */
+
+/**
+ brief Core function for the floating-point CFFT butterfly process.
+ param[in,out] pSrc points to the in-place buffer of floating-point data type.
+ param[in] fftLen length of the FFT.
+ param[in] pCoef points to the twiddle coefficient buffer.
+ param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
+ return none
+*/
+
+void arm_radix8_butterfly_f32(
+ float32_t * pSrc,
+ uint16_t fftLen,
+ const float32_t * pCoef,
+ uint16_t twidCoefModifier)
+{
+ uint32_t ia1, ia2, ia3, ia4, ia5, ia6, ia7;
+ uint32_t i1, i2, i3, i4, i5, i6, i7, i8;
+ uint32_t id;
+ uint32_t n1, n2, j;
+
+ float32_t r1, r2, r3, r4, r5, r6, r7, r8;
+ float32_t t1, t2;
+ float32_t s1, s2, s3, s4, s5, s6, s7, s8;
+ float32_t p1, p2, p3, p4;
+ float32_t co2, co3, co4, co5, co6, co7, co8;
+ float32_t si2, si3, si4, si5, si6, si7, si8;
+ const float32_t C81 = 0.70710678118f;
+
+ n2 = fftLen;
+
+ do
+ {
+ n1 = n2;
+ n2 = n2 >> 3;
+ i1 = 0;
+
+ do
+ {
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+ i4 = i3 + n2;
+ i5 = i4 + n2;
+ i6 = i5 + n2;
+ i7 = i6 + n2;
+ i8 = i7 + n2;
+ r1 = pSrc[2 * i1] + pSrc[2 * i5];
+ r5 = pSrc[2 * i1] - pSrc[2 * i5];
+ r2 = pSrc[2 * i2] + pSrc[2 * i6];
+ r6 = pSrc[2 * i2] - pSrc[2 * i6];
+ r3 = pSrc[2 * i3] + pSrc[2 * i7];
+ r7 = pSrc[2 * i3] - pSrc[2 * i7];
+ r4 = pSrc[2 * i4] + pSrc[2 * i8];
+ r8 = pSrc[2 * i4] - pSrc[2 * i8];
+ t1 = r1 - r3;
+ r1 = r1 + r3;
+ r3 = r2 - r4;
+ r2 = r2 + r4;
+ pSrc[2 * i1] = r1 + r2;
+ pSrc[2 * i5] = r1 - r2;
+ r1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
+ s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
+ r2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
+ s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
+ s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
+ s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
+ r4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
+ s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
+ t2 = r1 - s3;
+ r1 = r1 + s3;
+ s3 = r2 - r4;
+ r2 = r2 + r4;
+ pSrc[2 * i1 + 1] = r1 + r2;
+ pSrc[2 * i5 + 1] = r1 - r2;
+ pSrc[2 * i3] = t1 + s3;
+ pSrc[2 * i7] = t1 - s3;
+ pSrc[2 * i3 + 1] = t2 - r3;
+ pSrc[2 * i7 + 1] = t2 + r3;
+ r1 = (r6 - r8) * C81;
+ r6 = (r6 + r8) * C81;
+ r2 = (s6 - s8) * C81;
+ s6 = (s6 + s8) * C81;
+ t1 = r5 - r1;
+ r5 = r5 + r1;
+ r8 = r7 - r6;
+ r7 = r7 + r6;
+ t2 = s5 - r2;
+ s5 = s5 + r2;
+ s8 = s7 - s6;
+ s7 = s7 + s6;
+ pSrc[2 * i2] = r5 + s7;
+ pSrc[2 * i8] = r5 - s7;
+ pSrc[2 * i6] = t1 + s8;
+ pSrc[2 * i4] = t1 - s8;
+ pSrc[2 * i2 + 1] = s5 - r7;
+ pSrc[2 * i8 + 1] = s5 + r7;
+ pSrc[2 * i6 + 1] = t2 - r8;
+ pSrc[2 * i4 + 1] = t2 + r8;
+
+ i1 += n1;
+ } while (i1 < fftLen);
+
+ if (n2 < 8)
+ break;
+
+ ia1 = 0;
+ j = 1;
+
+ do
+ {
+ /* index calculation for the coefficients */
+ id = ia1 + twidCoefModifier;
+ ia1 = id;
+ ia2 = ia1 + id;
+ ia3 = ia2 + id;
+ ia4 = ia3 + id;
+ ia5 = ia4 + id;
+ ia6 = ia5 + id;
+ ia7 = ia6 + id;
+
+ co2 = pCoef[2 * ia1];
+ co3 = pCoef[2 * ia2];
+ co4 = pCoef[2 * ia3];
+ co5 = pCoef[2 * ia4];
+ co6 = pCoef[2 * ia5];
+ co7 = pCoef[2 * ia6];
+ co8 = pCoef[2 * ia7];
+ si2 = pCoef[2 * ia1 + 1];
+ si3 = pCoef[2 * ia2 + 1];
+ si4 = pCoef[2 * ia3 + 1];
+ si5 = pCoef[2 * ia4 + 1];
+ si6 = pCoef[2 * ia5 + 1];
+ si7 = pCoef[2 * ia6 + 1];
+ si8 = pCoef[2 * ia7 + 1];
+
+ i1 = j;
+
+ do
+ {
+ /* index calculation for the input */
+ i2 = i1 + n2;
+ i3 = i2 + n2;
+ i4 = i3 + n2;
+ i5 = i4 + n2;
+ i6 = i5 + n2;
+ i7 = i6 + n2;
+ i8 = i7 + n2;
+ r1 = pSrc[2 * i1] + pSrc[2 * i5];
+ r5 = pSrc[2 * i1] - pSrc[2 * i5];
+ r2 = pSrc[2 * i2] + pSrc[2 * i6];
+ r6 = pSrc[2 * i2] - pSrc[2 * i6];
+ r3 = pSrc[2 * i3] + pSrc[2 * i7];
+ r7 = pSrc[2 * i3] - pSrc[2 * i7];
+ r4 = pSrc[2 * i4] + pSrc[2 * i8];
+ r8 = pSrc[2 * i4] - pSrc[2 * i8];
+ t1 = r1 - r3;
+ r1 = r1 + r3;
+ r3 = r2 - r4;
+ r2 = r2 + r4;
+ pSrc[2 * i1] = r1 + r2;
+ r2 = r1 - r2;
+ s1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
+ s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
+ s2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
+ s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
+ s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
+ s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
+ s4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
+ s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
+ t2 = s1 - s3;
+ s1 = s1 + s3;
+ s3 = s2 - s4;
+ s2 = s2 + s4;
+ r1 = t1 + s3;
+ t1 = t1 - s3;
+ pSrc[2 * i1 + 1] = s1 + s2;
+ s2 = s1 - s2;
+ s1 = t2 - r3;
+ t2 = t2 + r3;
+ p1 = co5 * r2;
+ p2 = si5 * s2;
+ p3 = co5 * s2;
+ p4 = si5 * r2;
+ pSrc[2 * i5] = p1 + p2;
+ pSrc[2 * i5 + 1] = p3 - p4;
+ p1 = co3 * r1;
+ p2 = si3 * s1;
+ p3 = co3 * s1;
+ p4 = si3 * r1;
+ pSrc[2 * i3] = p1 + p2;
+ pSrc[2 * i3 + 1] = p3 - p4;
+ p1 = co7 * t1;
+ p2 = si7 * t2;
+ p3 = co7 * t2;
+ p4 = si7 * t1;
+ pSrc[2 * i7] = p1 + p2;
+ pSrc[2 * i7 + 1] = p3 - p4;
+ r1 = (r6 - r8) * C81;
+ r6 = (r6 + r8) * C81;
+ s1 = (s6 - s8) * C81;
+ s6 = (s6 + s8) * C81;
+ t1 = r5 - r1;
+ r5 = r5 + r1;
+ r8 = r7 - r6;
+ r7 = r7 + r6;
+ t2 = s5 - s1;
+ s5 = s5 + s1;
+ s8 = s7 - s6;
+ s7 = s7 + s6;
+ r1 = r5 + s7;
+ r5 = r5 - s7;
+ r6 = t1 + s8;
+ t1 = t1 - s8;
+ s1 = s5 - r7;
+ s5 = s5 + r7;
+ s6 = t2 - r8;
+ t2 = t2 + r8;
+ p1 = co2 * r1;
+ p2 = si2 * s1;
+ p3 = co2 * s1;
+ p4 = si2 * r1;
+ pSrc[2 * i2] = p1 + p2;
+ pSrc[2 * i2 + 1] = p3 - p4;
+ p1 = co8 * r5;
+ p2 = si8 * s5;
+ p3 = co8 * s5;
+ p4 = si8 * r5;
+ pSrc[2 * i8] = p1 + p2;
+ pSrc[2 * i8 + 1] = p3 - p4;
+ p1 = co6 * r6;
+ p2 = si6 * s6;
+ p3 = co6 * s6;
+ p4 = si6 * r6;
+ pSrc[2 * i6] = p1 + p2;
+ pSrc[2 * i6 + 1] = p3 - p4;
+ p1 = co4 * t1;
+ p2 = si4 * t2;
+ p3 = co4 * t2;
+ p4 = si4 * t1;
+ pSrc[2 * i4] = p1 + p2;
+ pSrc[2 * i4 + 1] = p3 - p4;
+
+ i1 += n1;
+ } while (i1 < fftLen);
+
+ j++;
+ } while (j < n2);
+
+ twidCoefModifier <<= 3;
+ } while (n2 > 7);
+}
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_f32.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_f32.c index b9dff3f..729ebf1 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_f32.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_f32.c @@ -1,448 +1,448 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_dct4_f32.c - * Description: Processing function of DCT4 & IDCT4 F32 - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" - -/** - @ingroup groupTransforms - */ - -/** - @defgroup DCT4_IDCT4 DCT Type IV Functions - - Representation of signals by minimum number of values is important for storage and transmission. - The possibility of large discontinuity between the beginning and end of a period of a signal - in DFT can be avoided by extending the signal so that it is even-symmetric. - Discrete Cosine Transform (DCT) is constructed such that its energy is heavily concentrated in the lower part of the - spectrum and is very widely used in signal and image coding applications. - The family of DCTs (DCT type- 1,2,3,4) is the outcome of different combinations of homogeneous boundary conditions. - DCT has an excellent energy-packing capability, hence has many applications and in data compression in particular. - - DCT is essentially the Discrete Fourier Transform(DFT) of an even-extended real signal. - Reordering of the input data makes the computation of DCT just a problem of - computing the DFT of a real signal with a few additional operations. - This approach provides regular, simple, and very efficient DCT algorithms for practical hardware and software implementations. - - DCT type-II can be implemented using Fast fourier transform (FFT) internally, as the transform is applied on real values, Real FFT can be used. - DCT4 is implemented using DCT2 as their implementations are similar except with some added pre-processing and post-processing. - DCT2 implementation can be described in the following steps: - - Re-ordering input - - Calculating Real FFT - - Multiplication of weights and Real FFT output and getting real part from the product. - - This process is explained by the block diagram below: - \image html DCT4.gif "Discrete Cosine Transform - type-IV" - - @par Algorithm - The N-point type-IV DCT is defined as a real, linear transformation by the formula: - \image html DCT4Equation.gif - where <code>k = 0, 1, 2, ..., N-1</code> - @par - Its inverse is defined as follows: - \image html IDCT4Equation.gif - where <code>n = 0, 1, 2, ..., N-1</code> - @par - The DCT4 matrices become involutory (i.e. they are self-inverse) by multiplying with an overall scale factor of sqrt(2/N). - The symmetry of the transform matrix indicates that the fast algorithms for the forward - and inverse transform computation are identical. - Note that the implementation of Inverse DCT4 and DCT4 is same, hence same process function can be used for both. - - @par Lengths supported by the transform: - As DCT4 internally uses Real FFT, it supports all the lengths 128, 512, 2048 and 8192. - The library provides separate functions for Q15, Q31, and floating-point data types. - - @par Instance Structure - The instances for Real FFT and FFT, cosine values table and twiddle factor table are stored in an instance data structure. - A separate instance structure must be defined for each transform. - There are separate instance structure declarations for each of the 3 supported data types. - - @par Initialization Functions - There is also an associated initialization function for each data type. - The initialization function performs the following operations: - - Sets the values of the internal structure fields. - - Initializes Real FFT as its process function is used internally in DCT4, by calling \ref arm_rfft_init_f32(). - @par - Use of the initialization function is optional. - However, if the initialization function is used, then the instance structure cannot be placed into a const data section. - To place an instance structure into a const data section, the instance structure must be manually initialized. - Manually initialize the instance structure as follows: - <pre> - arm_dct4_instance_f32 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft}; - arm_dct4_instance_q31 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft}; - arm_dct4_instance_q15 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft}; - </pre> - where \c N is the length of the DCT4; \c Nby2 is half of the length of the DCT4; - \c normalize is normalizing factor used and is equal to <code>sqrt(2/N)</code>; - \c pTwiddle points to the twiddle factor table; - \c pCosFactor points to the cosFactor table; - \c pRfft points to the real FFT instance; - \c pCfft points to the complex FFT instance; - The CFFT and RFFT structures also needs to be initialized, refer to arm_cfft_radix4_f32() - and arm_rfft_f32() respectively for details regarding static initialization. - - @par Fixed-Point Behavior - Care must be taken when using the fixed-point versions of the DCT4 transform functions. - In particular, the overflow and saturation behavior of the accumulator used in each function must be considered. - Refer to the function specific documentation below for usage guidelines. - */ - - /** - @addtogroup DCT4_IDCT4 - @{ - */ - -/** - @brief Processing function for the floating-point DCT4/IDCT4. - @param[in] S points to an instance of the floating-point DCT4/IDCT4 structure - @param[in] pState points to state buffer - @param[in,out] pInlineBuffer points to the in-place input and output buffer - @return none - */ - -void arm_dct4_f32( - const arm_dct4_instance_f32 * S, - float32_t * pState, - float32_t * pInlineBuffer) -{ - const float32_t *weights = S->pTwiddle; /* Pointer to the Weights table */ - const float32_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */ - float32_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */ - float32_t in; /* Temporary variable */ - uint32_t i; /* Loop counter */ - - - /* DCT4 computation involves DCT2 (which is calculated using RFFT) - * along with some pre-processing and post-processing. - * Computational procedure is explained as follows: - * (a) Pre-processing involves multiplying input with cos factor, - * r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n)) - * where, - * r(n) -- output of preprocessing - * u(n) -- input to preprocessing(actual Source buffer) - * (b) Calculation of DCT2 using FFT is divided into three steps: - * Step1: Re-ordering of even and odd elements of input. - * Step2: Calculating FFT of the re-ordered input. - * Step3: Taking the real part of the product of FFT output and weights. - * (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation: - * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0) - * where, - * Y4 -- DCT4 output, Y2 -- DCT2 output - * (d) Multiplying the output with the normalizing factor sqrt(2/N). - */ - - /*-------- Pre-processing ------------*/ - /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */ - arm_scale_f32(pInlineBuffer, 2.0f, pInlineBuffer, S->N); - arm_mult_f32(pInlineBuffer, cosFact, pInlineBuffer, S->N); - - /* ---------------------------------------------------------------- - * Step1: Re-ordering of even and odd elements as - * pState[i] = pInlineBuffer[2*i] and - * pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2 - ---------------------------------------------------------------------*/ - - /* pS1 initialized to pState */ - pS1 = pState; - - /* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */ - pS2 = pState + (S->N - 1U); - - /* pbuff initialized to input buffer */ - pbuff = pInlineBuffer; - - -#if defined (ARM_MATH_LOOPUNROLL) - - /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */ - i = S->Nby2 >> 2U; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ - do - { - /* Re-ordering of even and odd elements */ - /* pState[i] = pInlineBuffer[2*i] */ - *pS1++ = *pbuff++; - /* pState[N-i-1] = pInlineBuffer[2*i+1] */ - *pS2-- = *pbuff++; - - *pS1++ = *pbuff++; - *pS2-- = *pbuff++; - - *pS1++ = *pbuff++; - *pS2-- = *pbuff++; - - *pS1++ = *pbuff++; - *pS2-- = *pbuff++; - - /* Decrement loop counter */ - i--; - } while (i > 0U); - - /* pbuff initialized to input buffer */ - pbuff = pInlineBuffer; - - /* pS1 initialized to pState */ - pS1 = pState; - - /* Initializing the loop counter to N/4 instead of N for loop unrolling */ - i = S->N >> 2U; - - /* Processing with loop unrolling 4 times as N is always multiple of 4. - * Compute 4 outputs at a time */ - do - { - /* Writing the re-ordered output back to inplace input buffer */ - *pbuff++ = *pS1++; - *pbuff++ = *pS1++; - *pbuff++ = *pS1++; - *pbuff++ = *pS1++; - - /* Decrement the loop counter */ - i--; - } while (i > 0U); - - - /* --------------------------------------------------------- - * Step2: Calculate RFFT for N-point input - * ---------------------------------------------------------- */ - /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */ - arm_rfft_f32 (S->pRfft, pInlineBuffer, pState); - - /*---------------------------------------------------------------------- - * Step3: Multiply the FFT output with the weights. - *----------------------------------------------------------------------*/ - arm_cmplx_mult_cmplx_f32 (pState, weights, pState, S->N); - - /* ----------- Post-processing ---------- */ - /* DCT-IV can be obtained from DCT-II by the equation, - * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0) - * Hence, Y4(0) = Y2(0)/2 */ - /* Getting only real part from the output and Converting to DCT-IV */ - - /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */ - i = (S->N - 1U) >> 2U; - - /* pbuff initialized to input buffer. */ - pbuff = pInlineBuffer; - - /* pS1 initialized to pState */ - pS1 = pState; - - /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */ - in = *pS1++ * (float32_t) 0.5; - /* input buffer acts as inplace, so output values are stored in the input itself. */ - *pbuff++ = in; - - /* pState pointer is incremented twice as the real values are located alternatively in the array */ - pS1++; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ - do - { - /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ - /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ - in = *pS1++ - in; - *pbuff++ = in; - /* points to the next real value */ - pS1++; - - in = *pS1++ - in; - *pbuff++ = in; - pS1++; - - in = *pS1++ - in; - *pbuff++ = in; - pS1++; - - in = *pS1++ - in; - *pbuff++ = in; - pS1++; - - /* Decrement the loop counter */ - i--; - } while (i > 0U); - - /* If the blockSize is not a multiple of 4, compute any remaining output samples here. - ** No loop unrolling is used. */ - i = (S->N - 1U) % 0x4U; - - while (i > 0U) - { - /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ - /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ - in = *pS1++ - in; - *pbuff++ = in; - - /* points to the next real value */ - pS1++; - - /* Decrement the loop counter */ - i--; - } - - - /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/ - - /* Initializing the loop counter to N/4 instead of N for loop unrolling */ - i = S->N >> 2U; - - /* pbuff initialized to the pInlineBuffer(now contains the output values) */ - pbuff = pInlineBuffer; - - /* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */ - do - { - /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */ - in = *pbuff; - *pbuff++ = in * S->normalize; - - in = *pbuff; - *pbuff++ = in * S->normalize; - - in = *pbuff; - *pbuff++ = in * S->normalize; - - in = *pbuff; - *pbuff++ = in * S->normalize; - - /* Decrement the loop counter */ - i--; - } while (i > 0U); - - -#else - - /* Initializing the loop counter to N/2 */ - i = S->Nby2; - - do - { - /* Re-ordering of even and odd elements */ - /* pState[i] = pInlineBuffer[2*i] */ - *pS1++ = *pbuff++; - /* pState[N-i-1] = pInlineBuffer[2*i+1] */ - *pS2-- = *pbuff++; - - /* Decrement the loop counter */ - i--; - } while (i > 0U); - - /* pbuff initialized to input buffer */ - pbuff = pInlineBuffer; - - /* pS1 initialized to pState */ - pS1 = pState; - - /* Initializing the loop counter */ - i = S->N; - - do - { - /* Writing the re-ordered output back to inplace input buffer */ - *pbuff++ = *pS1++; - - /* Decrement the loop counter */ - i--; - } while (i > 0U); - - - /* --------------------------------------------------------- - * Step2: Calculate RFFT for N-point input - * ---------------------------------------------------------- */ - /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */ - arm_rfft_f32 (S->pRfft, pInlineBuffer, pState); - - /*---------------------------------------------------------------------- - * Step3: Multiply the FFT output with the weights. - *----------------------------------------------------------------------*/ - arm_cmplx_mult_cmplx_f32 (pState, weights, pState, S->N); - - /* ----------- Post-processing ---------- */ - /* DCT-IV can be obtained from DCT-II by the equation, - * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0) - * Hence, Y4(0) = Y2(0)/2 */ - /* Getting only real part from the output and Converting to DCT-IV */ - - /* pbuff initialized to input buffer. */ - pbuff = pInlineBuffer; - - /* pS1 initialized to pState */ - pS1 = pState; - - /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */ - in = *pS1++ * (float32_t) 0.5; - /* input buffer acts as inplace, so output values are stored in the input itself. */ - *pbuff++ = in; - - /* pState pointer is incremented twice as the real values are located alternatively in the array */ - pS1++; - - /* Initializing the loop counter */ - i = (S->N - 1U); - - do - { - /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ - /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ - in = *pS1++ - in; - *pbuff++ = in; - - /* points to the next real value */ - pS1++; - - /* Decrement loop counter */ - i--; - } while (i > 0U); - - /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/ - - /* Initializing loop counter */ - i = S->N; - - /* pbuff initialized to the pInlineBuffer (now contains the output values) */ - pbuff = pInlineBuffer; - - do - { - /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */ - in = *pbuff; - *pbuff++ = in * S->normalize; - - /* Decrement loop counter */ - i--; - } while (i > 0U); - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - -} - -/** - @} end of DCT4_IDCT4 group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_dct4_f32.c
+ * Description: Processing function of DCT4 & IDCT4 F32
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @ingroup groupTransforms
+ */
+
+/**
+ @defgroup DCT4_IDCT4 DCT Type IV Functions
+
+ Representation of signals by minimum number of values is important for storage and transmission.
+ The possibility of large discontinuity between the beginning and end of a period of a signal
+ in DFT can be avoided by extending the signal so that it is even-symmetric.
+ Discrete Cosine Transform (DCT) is constructed such that its energy is heavily concentrated in the lower part of the
+ spectrum and is very widely used in signal and image coding applications.
+ The family of DCTs (DCT type- 1,2,3,4) is the outcome of different combinations of homogeneous boundary conditions.
+ DCT has an excellent energy-packing capability, hence has many applications and in data compression in particular.
+
+ DCT is essentially the Discrete Fourier Transform(DFT) of an even-extended real signal.
+ Reordering of the input data makes the computation of DCT just a problem of
+ computing the DFT of a real signal with a few additional operations.
+ This approach provides regular, simple, and very efficient DCT algorithms for practical hardware and software implementations.
+
+ DCT type-II can be implemented using Fast fourier transform (FFT) internally, as the transform is applied on real values, Real FFT can be used.
+ DCT4 is implemented using DCT2 as their implementations are similar except with some added pre-processing and post-processing.
+ DCT2 implementation can be described in the following steps:
+ - Re-ordering input
+ - Calculating Real FFT
+ - Multiplication of weights and Real FFT output and getting real part from the product.
+
+ This process is explained by the block diagram below:
+ \image html DCT4.gif "Discrete Cosine Transform - type-IV"
+
+ @par Algorithm
+ The N-point type-IV DCT is defined as a real, linear transformation by the formula:
+ \image html DCT4Equation.gif
+ where <code>k = 0, 1, 2, ..., N-1</code>
+ @par
+ Its inverse is defined as follows:
+ \image html IDCT4Equation.gif
+ where <code>n = 0, 1, 2, ..., N-1</code>
+ @par
+ The DCT4 matrices become involutory (i.e. they are self-inverse) by multiplying with an overall scale factor of sqrt(2/N).
+ The symmetry of the transform matrix indicates that the fast algorithms for the forward
+ and inverse transform computation are identical.
+ Note that the implementation of Inverse DCT4 and DCT4 is same, hence same process function can be used for both.
+
+ @par Lengths supported by the transform:
+ As DCT4 internally uses Real FFT, it supports all the lengths 128, 512, 2048 and 8192.
+ The library provides separate functions for Q15, Q31, and floating-point data types.
+
+ @par Instance Structure
+ The instances for Real FFT and FFT, cosine values table and twiddle factor table are stored in an instance data structure.
+ A separate instance structure must be defined for each transform.
+ There are separate instance structure declarations for each of the 3 supported data types.
+
+ @par Initialization Functions
+ There is also an associated initialization function for each data type.
+ The initialization function performs the following operations:
+ - Sets the values of the internal structure fields.
+ - Initializes Real FFT as its process function is used internally in DCT4, by calling \ref arm_rfft_init_f32().
+ @par
+ Use of the initialization function is optional.
+ However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
+ To place an instance structure into a const data section, the instance structure must be manually initialized.
+ Manually initialize the instance structure as follows:
+ <pre>
+ arm_dct4_instance_f32 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
+ arm_dct4_instance_q31 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
+ arm_dct4_instance_q15 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
+ </pre>
+ where \c N is the length of the DCT4; \c Nby2 is half of the length of the DCT4;
+ \c normalize is normalizing factor used and is equal to <code>sqrt(2/N)</code>;
+ \c pTwiddle points to the twiddle factor table;
+ \c pCosFactor points to the cosFactor table;
+ \c pRfft points to the real FFT instance;
+ \c pCfft points to the complex FFT instance;
+ The CFFT and RFFT structures also needs to be initialized, refer to arm_cfft_radix4_f32()
+ and arm_rfft_f32() respectively for details regarding static initialization.
+
+ @par Fixed-Point Behavior
+ Care must be taken when using the fixed-point versions of the DCT4 transform functions.
+ In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
+ Refer to the function specific documentation below for usage guidelines.
+ */
+
+ /**
+ @addtogroup DCT4_IDCT4
+ @{
+ */
+
+/**
+ @brief Processing function for the floating-point DCT4/IDCT4.
+ @param[in] S points to an instance of the floating-point DCT4/IDCT4 structure
+ @param[in] pState points to state buffer
+ @param[in,out] pInlineBuffer points to the in-place input and output buffer
+ @return none
+ */
+
+void arm_dct4_f32(
+ const arm_dct4_instance_f32 * S,
+ float32_t * pState,
+ float32_t * pInlineBuffer)
+{
+ const float32_t *weights = S->pTwiddle; /* Pointer to the Weights table */
+ const float32_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
+ float32_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
+ float32_t in; /* Temporary variable */
+ uint32_t i; /* Loop counter */
+
+
+ /* DCT4 computation involves DCT2 (which is calculated using RFFT)
+ * along with some pre-processing and post-processing.
+ * Computational procedure is explained as follows:
+ * (a) Pre-processing involves multiplying input with cos factor,
+ * r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
+ * where,
+ * r(n) -- output of preprocessing
+ * u(n) -- input to preprocessing(actual Source buffer)
+ * (b) Calculation of DCT2 using FFT is divided into three steps:
+ * Step1: Re-ordering of even and odd elements of input.
+ * Step2: Calculating FFT of the re-ordered input.
+ * Step3: Taking the real part of the product of FFT output and weights.
+ * (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
+ * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
+ * where,
+ * Y4 -- DCT4 output, Y2 -- DCT2 output
+ * (d) Multiplying the output with the normalizing factor sqrt(2/N).
+ */
+
+ /*-------- Pre-processing ------------*/
+ /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
+ arm_scale_f32(pInlineBuffer, 2.0f, pInlineBuffer, S->N);
+ arm_mult_f32(pInlineBuffer, cosFact, pInlineBuffer, S->N);
+
+ /* ----------------------------------------------------------------
+ * Step1: Re-ordering of even and odd elements as
+ * pState[i] = pInlineBuffer[2*i] and
+ * pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
+ ---------------------------------------------------------------------*/
+
+ /* pS1 initialized to pState */
+ pS1 = pState;
+
+ /* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
+ pS2 = pState + (S->N - 1U);
+
+ /* pbuff initialized to input buffer */
+ pbuff = pInlineBuffer;
+
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
+ i = S->Nby2 >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ do
+ {
+ /* Re-ordering of even and odd elements */
+ /* pState[i] = pInlineBuffer[2*i] */
+ *pS1++ = *pbuff++;
+ /* pState[N-i-1] = pInlineBuffer[2*i+1] */
+ *pS2-- = *pbuff++;
+
+ *pS1++ = *pbuff++;
+ *pS2-- = *pbuff++;
+
+ *pS1++ = *pbuff++;
+ *pS2-- = *pbuff++;
+
+ *pS1++ = *pbuff++;
+ *pS2-- = *pbuff++;
+
+ /* Decrement loop counter */
+ i--;
+ } while (i > 0U);
+
+ /* pbuff initialized to input buffer */
+ pbuff = pInlineBuffer;
+
+ /* pS1 initialized to pState */
+ pS1 = pState;
+
+ /* Initializing the loop counter to N/4 instead of N for loop unrolling */
+ i = S->N >> 2U;
+
+ /* Processing with loop unrolling 4 times as N is always multiple of 4.
+ * Compute 4 outputs at a time */
+ do
+ {
+ /* Writing the re-ordered output back to inplace input buffer */
+ *pbuff++ = *pS1++;
+ *pbuff++ = *pS1++;
+ *pbuff++ = *pS1++;
+ *pbuff++ = *pS1++;
+
+ /* Decrement the loop counter */
+ i--;
+ } while (i > 0U);
+
+
+ /* ---------------------------------------------------------
+ * Step2: Calculate RFFT for N-point input
+ * ---------------------------------------------------------- */
+ /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
+ arm_rfft_f32 (S->pRfft, pInlineBuffer, pState);
+
+ /*----------------------------------------------------------------------
+ * Step3: Multiply the FFT output with the weights.
+ *----------------------------------------------------------------------*/
+ arm_cmplx_mult_cmplx_f32 (pState, weights, pState, S->N);
+
+ /* ----------- Post-processing ---------- */
+ /* DCT-IV can be obtained from DCT-II by the equation,
+ * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
+ * Hence, Y4(0) = Y2(0)/2 */
+ /* Getting only real part from the output and Converting to DCT-IV */
+
+ /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
+ i = (S->N - 1U) >> 2U;
+
+ /* pbuff initialized to input buffer. */
+ pbuff = pInlineBuffer;
+
+ /* pS1 initialized to pState */
+ pS1 = pState;
+
+ /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
+ in = *pS1++ * (float32_t) 0.5;
+ /* input buffer acts as inplace, so output values are stored in the input itself. */
+ *pbuff++ = in;
+
+ /* pState pointer is incremented twice as the real values are located alternatively in the array */
+ pS1++;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ do
+ {
+ /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+ /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+ in = *pS1++ - in;
+ *pbuff++ = in;
+ /* points to the next real value */
+ pS1++;
+
+ in = *pS1++ - in;
+ *pbuff++ = in;
+ pS1++;
+
+ in = *pS1++ - in;
+ *pbuff++ = in;
+ pS1++;
+
+ in = *pS1++ - in;
+ *pbuff++ = in;
+ pS1++;
+
+ /* Decrement the loop counter */
+ i--;
+ } while (i > 0U);
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ i = (S->N - 1U) % 0x4U;
+
+ while (i > 0U)
+ {
+ /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+ /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+ in = *pS1++ - in;
+ *pbuff++ = in;
+
+ /* points to the next real value */
+ pS1++;
+
+ /* Decrement the loop counter */
+ i--;
+ }
+
+
+ /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+
+ /* Initializing the loop counter to N/4 instead of N for loop unrolling */
+ i = S->N >> 2U;
+
+ /* pbuff initialized to the pInlineBuffer(now contains the output values) */
+ pbuff = pInlineBuffer;
+
+ /* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
+ do
+ {
+ /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
+ in = *pbuff;
+ *pbuff++ = in * S->normalize;
+
+ in = *pbuff;
+ *pbuff++ = in * S->normalize;
+
+ in = *pbuff;
+ *pbuff++ = in * S->normalize;
+
+ in = *pbuff;
+ *pbuff++ = in * S->normalize;
+
+ /* Decrement the loop counter */
+ i--;
+ } while (i > 0U);
+
+
+#else
+
+ /* Initializing the loop counter to N/2 */
+ i = S->Nby2;
+
+ do
+ {
+ /* Re-ordering of even and odd elements */
+ /* pState[i] = pInlineBuffer[2*i] */
+ *pS1++ = *pbuff++;
+ /* pState[N-i-1] = pInlineBuffer[2*i+1] */
+ *pS2-- = *pbuff++;
+
+ /* Decrement the loop counter */
+ i--;
+ } while (i > 0U);
+
+ /* pbuff initialized to input buffer */
+ pbuff = pInlineBuffer;
+
+ /* pS1 initialized to pState */
+ pS1 = pState;
+
+ /* Initializing the loop counter */
+ i = S->N;
+
+ do
+ {
+ /* Writing the re-ordered output back to inplace input buffer */
+ *pbuff++ = *pS1++;
+
+ /* Decrement the loop counter */
+ i--;
+ } while (i > 0U);
+
+
+ /* ---------------------------------------------------------
+ * Step2: Calculate RFFT for N-point input
+ * ---------------------------------------------------------- */
+ /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
+ arm_rfft_f32 (S->pRfft, pInlineBuffer, pState);
+
+ /*----------------------------------------------------------------------
+ * Step3: Multiply the FFT output with the weights.
+ *----------------------------------------------------------------------*/
+ arm_cmplx_mult_cmplx_f32 (pState, weights, pState, S->N);
+
+ /* ----------- Post-processing ---------- */
+ /* DCT-IV can be obtained from DCT-II by the equation,
+ * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
+ * Hence, Y4(0) = Y2(0)/2 */
+ /* Getting only real part from the output and Converting to DCT-IV */
+
+ /* pbuff initialized to input buffer. */
+ pbuff = pInlineBuffer;
+
+ /* pS1 initialized to pState */
+ pS1 = pState;
+
+ /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
+ in = *pS1++ * (float32_t) 0.5;
+ /* input buffer acts as inplace, so output values are stored in the input itself. */
+ *pbuff++ = in;
+
+ /* pState pointer is incremented twice as the real values are located alternatively in the array */
+ pS1++;
+
+ /* Initializing the loop counter */
+ i = (S->N - 1U);
+
+ do
+ {
+ /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+ /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+ in = *pS1++ - in;
+ *pbuff++ = in;
+
+ /* points to the next real value */
+ pS1++;
+
+ /* Decrement loop counter */
+ i--;
+ } while (i > 0U);
+
+ /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+
+ /* Initializing loop counter */
+ i = S->N;
+
+ /* pbuff initialized to the pInlineBuffer (now contains the output values) */
+ pbuff = pInlineBuffer;
+
+ do
+ {
+ /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
+ in = *pbuff;
+ *pbuff++ = in * S->normalize;
+
+ /* Decrement loop counter */
+ i--;
+ } while (i > 0U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+}
+
+/**
+ @} end of DCT4_IDCT4 group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_f32.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_f32.c index e1d80c0..7522454 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_f32.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_f32.c @@ -1,130 +1,131 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_dct4_init_f32.c - * Description: Initialization function of DCT-4 & IDCT4 F32 - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" -#include "arm_common_tables.h" - -/** - @ingroup groupTransforms - */ - - /** - @addtogroup DCT4_IDCT4 - @{ - */ - -/** - @brief Initialization function for the floating-point DCT4/IDCT4. - @param[in,out] S points to an instance of floating-point DCT4/IDCT4 structure - @param[in] S_RFFT points to an instance of floating-point RFFT/RIFFT structure - @param[in] S_CFFT points to an instance of floating-point CFFT/CIFFT structure - @param[in] N length of the DCT4 - @param[in] Nby2 half of the length of the DCT4 - @param[in] normalize normalizing factor. - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : <code>N</code> is not a supported transform length - - @par Normalizing factor - The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>. - Floating-point normalizing factors are mentioned in the table below for different DCT sizes: - - \image html dct4NormalizingF32Table.gif - */ - -arm_status arm_dct4_init_f32( - arm_dct4_instance_f32 * S, - arm_rfft_instance_f32 * S_RFFT, - arm_cfft_radix4_instance_f32 * S_CFFT, - uint16_t N, - uint16_t Nby2, - float32_t normalize) -{ - /* Initialize the default arm status */ - arm_status status = ARM_MATH_SUCCESS; - - - /* Initialize the DCT4 length */ - S->N = N; - - /* Initialize the half of DCT4 length */ - S->Nby2 = Nby2; - - /* Initialize the DCT4 Normalizing factor */ - S->normalize = normalize; - - /* Initialize Real FFT Instance */ - S->pRfft = S_RFFT; - - /* Initialize Complex FFT Instance */ - S->pCfft = S_CFFT; - - switch (N) - { - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_8192) - /* Initialize the table modifier values */ - case 8192U: - S->pTwiddle = Weights_8192; - S->pCosFactor = cos_factors_8192; - break; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_2048) - case 2048U: - S->pTwiddle = Weights_2048; - S->pCosFactor = cos_factors_2048; - break; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_512) - case 512U: - S->pTwiddle = Weights_512; - S->pCosFactor = cos_factors_512; - break; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_128) - case 128U: - S->pTwiddle = Weights_128; - S->pCosFactor = cos_factors_128; - break; - #endif - default: - status = ARM_MATH_ARGUMENT_ERROR; - } - - /* Initialize the RFFT/RIFFT Function */ - arm_rfft_init_f32(S->pRfft, S->pCfft, S->N, 0U, 1U); - - /* return the status of DCT4 Init function */ - return (status); -} - -/** - @} end of DCT4_IDCT4 group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_dct4_init_f32.c
+ * Description: Initialization function of DCT-4 & IDCT4 F32
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**
+ @ingroup DCT4_IDCT4
+ */
+
+
+/**
+ @addtogroup DCT4_IDCT4
+ @{
+ */
+
+/**
+ @brief Initialization function for the floating-point DCT4/IDCT4.
+ @param[in,out] S points to an instance of floating-point DCT4/IDCT4 structure
+ @param[in] S_RFFT points to an instance of floating-point RFFT/RIFFT structure
+ @param[in] S_CFFT points to an instance of floating-point CFFT/CIFFT structure
+ @param[in] N length of the DCT4
+ @param[in] Nby2 half of the length of the DCT4
+ @param[in] normalize normalizing factor.
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : <code>N</code> is not a supported transform length
+
+ @par Normalizing factor
+ The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>.
+ Floating-point normalizing factors are mentioned in the table below for different DCT sizes:
+
+ \image html dct4NormalizingF32Table.gif
+ */
+
+arm_status arm_dct4_init_f32(
+ arm_dct4_instance_f32 * S,
+ arm_rfft_instance_f32 * S_RFFT,
+ arm_cfft_radix4_instance_f32 * S_CFFT,
+ uint16_t N,
+ uint16_t Nby2,
+ float32_t normalize)
+{
+ /* Initialize the default arm status */
+ arm_status status = ARM_MATH_SUCCESS;
+
+
+ /* Initialize the DCT4 length */
+ S->N = N;
+
+ /* Initialize the half of DCT4 length */
+ S->Nby2 = Nby2;
+
+ /* Initialize the DCT4 Normalizing factor */
+ S->normalize = normalize;
+
+ /* Initialize Real FFT Instance */
+ S->pRfft = S_RFFT;
+
+ /* Initialize Complex FFT Instance */
+ S->pCfft = S_CFFT;
+
+ switch (N)
+ {
+ #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_8192)
+ /* Initialize the table modifier values */
+ case 8192U:
+ S->pTwiddle = Weights_8192;
+ S->pCosFactor = cos_factors_8192;
+ break;
+ #endif
+
+ #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_2048)
+ case 2048U:
+ S->pTwiddle = Weights_2048;
+ S->pCosFactor = cos_factors_2048;
+ break;
+ #endif
+
+ #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_512)
+ case 512U:
+ S->pTwiddle = Weights_512;
+ S->pCosFactor = cos_factors_512;
+ break;
+ #endif
+
+ #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_128)
+ case 128U:
+ S->pTwiddle = Weights_128;
+ S->pCosFactor = cos_factors_128;
+ break;
+ #endif
+ default:
+ status = ARM_MATH_ARGUMENT_ERROR;
+ }
+
+ /* Initialize the RFFT/RIFFT Function */
+ arm_rfft_init_f32(S->pRfft, S->pCfft, S->N, 0U, 1U);
+
+ /* return the status of DCT4 Init function */
+ return (status);
+}
+
+/**
+ @} end of DCT4_IDCT4 group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q15.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q15.c index 5390da3..c5d834a 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q15.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q15.c @@ -1,130 +1,130 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_dct4_init_q15.c - * Description: Initialization function of DCT-4 & IDCT4 Q15 - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" -#include "arm_common_tables.h" - -/** - @ingroup groupTransforms - */ - - /** - @addtogroup DCT4_IDCT4 - @{ - */ - -/** - @brief Initialization function for the Q15 DCT4/IDCT4. - @param[in,out] S points to an instance of Q15 DCT4/IDCT4 structure - @param[in] S_RFFT points to an instance of Q15 RFFT/RIFFT structure - @param[in] S_CFFT points to an instance of Q15 CFFT/CIFFT structure - @param[in] N length of the DCT4 - @param[in] Nby2 half of the length of the DCT4 - @param[in] normalize normalizing factor - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : <code>N</code> is not a supported transform length - - @par Normalizing factor - The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>. - Normalizing factors in 1.15 format are mentioned in the table below for different DCT sizes: - - \image html dct4NormalizingQ15Table.gif - */ - -arm_status arm_dct4_init_q15( - arm_dct4_instance_q15 * S, - arm_rfft_instance_q15 * S_RFFT, - arm_cfft_radix4_instance_q15 * S_CFFT, - uint16_t N, - uint16_t Nby2, - q15_t normalize) -{ - /* Initialise the default arm status */ - arm_status status = ARM_MATH_SUCCESS; - - /* Initialize the DCT4 length */ - S->N = N; - - /* Initialize the half of DCT4 length */ - S->Nby2 = Nby2; - - /* Initialize the DCT4 Normalizing factor */ - S->normalize = normalize; - - /* Initialize Real FFT Instance */ - S->pRfft = S_RFFT; - - /* Initialize Complex FFT Instance */ - S->pCfft = S_CFFT; - - switch (N) - { - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_8192) - /* Initialize the table modifier values */ - case 8192U: - S->pTwiddle = WeightsQ15_8192; - S->pCosFactor = cos_factorsQ15_8192; - break; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_2048) - case 2048U: - S->pTwiddle = WeightsQ15_2048; - S->pCosFactor = cos_factorsQ15_2048; - break; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_512) - case 512U: - S->pTwiddle = WeightsQ15_512; - S->pCosFactor = cos_factorsQ15_512; - break; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_128) - case 128U: - S->pTwiddle = WeightsQ15_128; - S->pCosFactor = cos_factorsQ15_128; - break; - #endif - - default: - status = ARM_MATH_ARGUMENT_ERROR; - } - - /* Initialize the RFFT/RIFFT */ - arm_rfft_init_q15(S->pRfft, S->N, 0U, 1U); - - /* return the status of DCT4 Init function */ - return (status); -} - -/** - @} end of DCT4_IDCT4 group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_dct4_init_q15.c
+ * Description: Initialization function of DCT-4 & IDCT4 Q15
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**
+ @ingroup DCT4_IDCT4
+ */
+
+/**
+ @addtogroup DCT4_IDCT4
+ @{
+ */
+
+/**
+ @brief Initialization function for the Q15 DCT4/IDCT4.
+ @param[in,out] S points to an instance of Q15 DCT4/IDCT4 structure
+ @param[in] S_RFFT points to an instance of Q15 RFFT/RIFFT structure
+ @param[in] S_CFFT points to an instance of Q15 CFFT/CIFFT structure
+ @param[in] N length of the DCT4
+ @param[in] Nby2 half of the length of the DCT4
+ @param[in] normalize normalizing factor
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : <code>N</code> is not a supported transform length
+
+ @par Normalizing factor
+ The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>.
+ Normalizing factors in 1.15 format are mentioned in the table below for different DCT sizes:
+
+ \image html dct4NormalizingQ15Table.gif
+ */
+
+arm_status arm_dct4_init_q15(
+ arm_dct4_instance_q15 * S,
+ arm_rfft_instance_q15 * S_RFFT,
+ arm_cfft_radix4_instance_q15 * S_CFFT,
+ uint16_t N,
+ uint16_t Nby2,
+ q15_t normalize)
+{
+ /* Initialise the default arm status */
+ arm_status status = ARM_MATH_SUCCESS;
+
+ /* Initialize the DCT4 length */
+ S->N = N;
+
+ /* Initialize the half of DCT4 length */
+ S->Nby2 = Nby2;
+
+ /* Initialize the DCT4 Normalizing factor */
+ S->normalize = normalize;
+
+ /* Initialize Real FFT Instance */
+ S->pRfft = S_RFFT;
+
+ /* Initialize Complex FFT Instance */
+ S->pCfft = S_CFFT;
+
+ switch (N)
+ {
+ #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_8192)
+ /* Initialize the table modifier values */
+ case 8192U:
+ S->pTwiddle = WeightsQ15_8192;
+ S->pCosFactor = cos_factorsQ15_8192;
+ break;
+ #endif
+
+ #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_2048)
+ case 2048U:
+ S->pTwiddle = WeightsQ15_2048;
+ S->pCosFactor = cos_factorsQ15_2048;
+ break;
+ #endif
+
+ #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_512)
+ case 512U:
+ S->pTwiddle = WeightsQ15_512;
+ S->pCosFactor = cos_factorsQ15_512;
+ break;
+ #endif
+
+ #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_128)
+ case 128U:
+ S->pTwiddle = WeightsQ15_128;
+ S->pCosFactor = cos_factorsQ15_128;
+ break;
+ #endif
+
+ default:
+ status = ARM_MATH_ARGUMENT_ERROR;
+ }
+
+ /* Initialize the RFFT/RIFFT */
+ arm_rfft_init_q15(S->pRfft, S->N, 0U, 1U);
+
+ /* return the status of DCT4 Init function */
+ return (status);
+}
+
+/**
+ @} end of DCT4_IDCT4 group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q31.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q31.c index 4c7622a..c0294d7 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q31.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q31.c @@ -1,129 +1,130 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_dct4_init_q31.c - * Description: Initialization function of DCT-4 & IDCT4 Q31 - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" -#include "arm_common_tables.h" - -/** - @ingroup groupTransforms - */ - - /** - @addtogroup DCT4_IDCT4 - @{ - */ - -/** - @brief Initialization function for the Q31 DCT4/IDCT4. - @param[in,out] S points to an instance of Q31 DCT4/IDCT4 structure. - @param[in] S_RFFT points to an instance of Q31 RFFT/RIFFT structure - @param[in] S_CFFT points to an instance of Q31 CFFT/CIFFT structure - @param[in] N length of the DCT4. - @param[in] Nby2 half of the length of the DCT4. - @param[in] normalize normalizing factor. - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : <code>N</code> is not a supported transform length - - @par Normalizing factor: - The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>. - Normalizing factors in 1.31 format are mentioned in the table below for different DCT sizes: - - \image html dct4NormalizingQ31Table.gif - */ - -arm_status arm_dct4_init_q31( - arm_dct4_instance_q31 * S, - arm_rfft_instance_q31 * S_RFFT, - arm_cfft_radix4_instance_q31 * S_CFFT, - uint16_t N, - uint16_t Nby2, - q31_t normalize) -{ - /* Initialize the default arm status */ - arm_status status = ARM_MATH_SUCCESS; - - /* Initialize the DCT4 length */ - S->N = N; - - /* Initialize the half of DCT4 length */ - S->Nby2 = Nby2; - - /* Initialize the DCT4 Normalizing factor */ - S->normalize = normalize; - - /* Initialize Real FFT Instance */ - S->pRfft = S_RFFT; - - /* Initialize Complex FFT Instance */ - S->pCfft = S_CFFT; - - switch (N) - { - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_8192) - /* Initialize the table modifier values */ - case 8192U: - S->pTwiddle = WeightsQ31_8192; - S->pCosFactor = cos_factorsQ31_8192; - break; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_2048) - case 2048U: - S->pTwiddle = WeightsQ31_2048; - S->pCosFactor = cos_factorsQ31_2048; - break; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_512) - case 512U: - S->pTwiddle = WeightsQ31_512; - S->pCosFactor = cos_factorsQ31_512; - break; - #endif - - #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_128) - case 128U: - S->pTwiddle = WeightsQ31_128; - S->pCosFactor = cos_factorsQ31_128; - break; - #endif - default: - status = ARM_MATH_ARGUMENT_ERROR; - } - - /* Initialize the RFFT/RIFFT Function */ - arm_rfft_init_q31(S->pRfft, S->N, 0U, 1U); - - /* return the status of DCT4 Init function */ - return (status); -} - -/** - @} end of DCT4_IDCT4 group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_dct4_init_q31.c
+ * Description: Initialization function of DCT-4 & IDCT4 Q31
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**
+ @ingroup DCT4_IDCT4
+ */
+
+
+/**
+ @addtogroup DCT4_IDCT4
+ @{
+ */
+
+/**
+ @brief Initialization function for the Q31 DCT4/IDCT4.
+ @param[in,out] S points to an instance of Q31 DCT4/IDCT4 structure.
+ @param[in] S_RFFT points to an instance of Q31 RFFT/RIFFT structure
+ @param[in] S_CFFT points to an instance of Q31 CFFT/CIFFT structure
+ @param[in] N length of the DCT4.
+ @param[in] Nby2 half of the length of the DCT4.
+ @param[in] normalize normalizing factor.
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : <code>N</code> is not a supported transform length
+
+ @par Normalizing factor:
+ The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>.
+ Normalizing factors in 1.31 format are mentioned in the table below for different DCT sizes:
+
+ \image html dct4NormalizingQ31Table.gif
+ */
+
+arm_status arm_dct4_init_q31(
+ arm_dct4_instance_q31 * S,
+ arm_rfft_instance_q31 * S_RFFT,
+ arm_cfft_radix4_instance_q31 * S_CFFT,
+ uint16_t N,
+ uint16_t Nby2,
+ q31_t normalize)
+{
+ /* Initialize the default arm status */
+ arm_status status = ARM_MATH_SUCCESS;
+
+ /* Initialize the DCT4 length */
+ S->N = N;
+
+ /* Initialize the half of DCT4 length */
+ S->Nby2 = Nby2;
+
+ /* Initialize the DCT4 Normalizing factor */
+ S->normalize = normalize;
+
+ /* Initialize Real FFT Instance */
+ S->pRfft = S_RFFT;
+
+ /* Initialize Complex FFT Instance */
+ S->pCfft = S_CFFT;
+
+ switch (N)
+ {
+ #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_8192)
+ /* Initialize the table modifier values */
+ case 8192U:
+ S->pTwiddle = WeightsQ31_8192;
+ S->pCosFactor = cos_factorsQ31_8192;
+ break;
+ #endif
+
+ #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_2048)
+ case 2048U:
+ S->pTwiddle = WeightsQ31_2048;
+ S->pCosFactor = cos_factorsQ31_2048;
+ break;
+ #endif
+
+ #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_512)
+ case 512U:
+ S->pTwiddle = WeightsQ31_512;
+ S->pCosFactor = cos_factorsQ31_512;
+ break;
+ #endif
+
+ #if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_128)
+ case 128U:
+ S->pTwiddle = WeightsQ31_128;
+ S->pCosFactor = cos_factorsQ31_128;
+ break;
+ #endif
+ default:
+ status = ARM_MATH_ARGUMENT_ERROR;
+ }
+
+ /* Initialize the RFFT/RIFFT Function */
+ arm_rfft_init_q31(S->pRfft, S->N, 0U, 1U);
+
+ /* return the status of DCT4 Init function */
+ return (status);
+}
+
+/**
+ @} end of DCT4_IDCT4 group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q15.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q15.c index a4650da..ba26300 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q15.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q15.c @@ -1,381 +1,381 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_dct4_q15.c - * Description: Processing function of DCT4 & IDCT4 Q15 - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" - -/** - @addtogroup DCT4_IDCT4 - @{ - */ - -/** - @brief Processing function for the Q15 DCT4/IDCT4. - @param[in] S points to an instance of the Q15 DCT4 structure. - @param[in] pState points to state buffer. - @param[in,out] pInlineBuffer points to the in-place input and output buffer. - @return none - - @par Input an output formats - Internally inputs are downscaled in the RFFT process function to avoid overflows. - Number of bits downscaled, depends on the size of the transform. The input and output - formats for different DCT sizes and number of bits to upscale are mentioned in the table below: - - \image html dct4FormatsQ15Table.gif - */ - -void arm_dct4_q15( - const arm_dct4_instance_q15 * S, - q15_t * pState, - q15_t * pInlineBuffer) -{ - const q15_t *weights = S->pTwiddle; /* Pointer to the Weights table */ - const q15_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */ - q15_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */ - q15_t in; /* Temporary variable */ - uint32_t i; /* Loop counter */ - - - /* DCT4 computation involves DCT2 (which is calculated using RFFT) - * along with some pre-processing and post-processing. - * Computational procedure is explained as follows: - * (a) Pre-processing involves multiplying input with cos factor, - * r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n)) - * where, - * r(n) -- output of preprocessing - * u(n) -- input to preprocessing(actual Source buffer) - * (b) Calculation of DCT2 using FFT is divided into three steps: - * Step1: Re-ordering of even and odd elements of input. - * Step2: Calculating FFT of the re-ordered input. - * Step3: Taking the real part of the product of FFT output and weights. - * (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation: - * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0) - * where, - * Y4 -- DCT4 output, Y2 -- DCT2 output - * (d) Multiplying the output with the normalizing factor sqrt(2/N). - */ - - /*-------- Pre-processing ------------*/ - /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */ - arm_mult_q15 (pInlineBuffer, cosFact, pInlineBuffer, S->N); - arm_shift_q15 (pInlineBuffer, 1, pInlineBuffer, S->N); - - /* ---------------------------------------------------------------- - * Step1: Re-ordering of even and odd elements as - * pState[i] = pInlineBuffer[2*i] and - * pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2 - ---------------------------------------------------------------------*/ - - /* pS1 initialized to pState */ - pS1 = pState; - - /* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */ - pS2 = pState + (S->N - 1U); - - /* pbuff initialized to input buffer */ - pbuff = pInlineBuffer; - - -#if defined (ARM_MATH_LOOPUNROLL) - - /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */ - i = S->Nby2 >> 2U; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ - do - { - /* Re-ordering of even and odd elements */ - /* pState[i] = pInlineBuffer[2*i] */ - *pS1++ = *pbuff++; - /* pState[N-i-1] = pInlineBuffer[2*i+1] */ - *pS2-- = *pbuff++; - - *pS1++ = *pbuff++; - *pS2-- = *pbuff++; - - *pS1++ = *pbuff++; - *pS2-- = *pbuff++; - - *pS1++ = *pbuff++; - *pS2-- = *pbuff++; - - /* Decrement loop counter */ - i--; - } while (i > 0U); - - /* pbuff initialized to input buffer */ - pbuff = pInlineBuffer; - - /* pS1 initialized to pState */ - pS1 = pState; - - /* Initializing the loop counter to N/4 instead of N for loop unrolling */ - i = S->N >> 2U; - - /* Processing with loop unrolling 4 times as N is always multiple of 4. - * Compute 4 outputs at a time */ - do - { - /* Writing the re-ordered output back to inplace input buffer */ - *pbuff++ = *pS1++; - *pbuff++ = *pS1++; - *pbuff++ = *pS1++; - *pbuff++ = *pS1++; - - /* Decrement the loop counter */ - i--; - } while (i > 0U); - - - /* --------------------------------------------------------- - * Step2: Calculate RFFT for N-point input - * ---------------------------------------------------------- */ - /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */ - arm_rfft_q15 (S->pRfft, pInlineBuffer, pState); - - /*---------------------------------------------------------------------- - * Step3: Multiply the FFT output with the weights. - *----------------------------------------------------------------------*/ - arm_cmplx_mult_cmplx_q15 (pState, weights, pState, S->N); - - /* The output of complex multiplication is in 3.13 format. - * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */ - arm_shift_q15 (pState, 2, pState, S->N * 2); - - /* ----------- Post-processing ---------- */ - /* DCT-IV can be obtained from DCT-II by the equation, - * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0) - * Hence, Y4(0) = Y2(0)/2 */ - /* Getting only real part from the output and Converting to DCT-IV */ - - /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */ - i = (S->N - 1U) >> 2U; - - /* pbuff initialized to input buffer. */ - pbuff = pInlineBuffer; - - /* pS1 initialized to pState */ - pS1 = pState; - - /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */ - in = *pS1++ >> 1U; - /* input buffer acts as inplace, so output values are stored in the input itself. */ - *pbuff++ = in; - - /* pState pointer is incremented twice as the real values are located alternatively in the array */ - pS1++; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ - do - { - /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ - /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ - in = *pS1++ - in; - *pbuff++ = in; - /* points to the next real value */ - pS1++; - - in = *pS1++ - in; - *pbuff++ = in; - pS1++; - - in = *pS1++ - in; - *pbuff++ = in; - pS1++; - - in = *pS1++ - in; - *pbuff++ = in; - pS1++; - - /* Decrement the loop counter */ - i--; - } while (i > 0U); - - /* If the blockSize is not a multiple of 4, compute any remaining output samples here. - ** No loop unrolling is used. */ - i = (S->N - 1U) % 0x4U; - - while (i > 0U) - { - /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ - /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ - in = *pS1++ - in; - *pbuff++ = in; - - /* points to the next real value */ - pS1++; - - /* Decrement loop counter */ - i--; - } - - - /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/ - - /* Initializing the loop counter to N/4 instead of N for loop unrolling */ - i = S->N >> 2U; - - /* pbuff initialized to the pInlineBuffer(now contains the output values) */ - pbuff = pInlineBuffer; - - /* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */ - do - { - /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */ - in = *pbuff; - *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15)); - - in = *pbuff; - *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15)); - - in = *pbuff; - *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15)); - - in = *pbuff; - *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15)); - - /* Decrement loop counter */ - i--; - } while (i > 0U); - - -#else - - /* Initializing the loop counter to N/2 */ - i = S->Nby2; - - do - { - /* Re-ordering of even and odd elements */ - /* pState[i] = pInlineBuffer[2*i] */ - *pS1++ = *pbuff++; - /* pState[N-i-1] = pInlineBuffer[2*i+1] */ - *pS2-- = *pbuff++; - - /* Decrement the loop counter */ - i--; - } while (i > 0U); - - /* pbuff initialized to input buffer */ - pbuff = pInlineBuffer; - - /* pS1 initialized to pState */ - pS1 = pState; - - /* Initializing the loop counter */ - i = S->N; - - do - { - /* Writing the re-ordered output back to inplace input buffer */ - *pbuff++ = *pS1++; - - /* Decrement the loop counter */ - i--; - } while (i > 0U); - - - /* --------------------------------------------------------- - * Step2: Calculate RFFT for N-point input - * ---------------------------------------------------------- */ - /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */ - arm_rfft_q15 (S->pRfft, pInlineBuffer, pState); - - /*---------------------------------------------------------------------- - * Step3: Multiply the FFT output with the weights. - *----------------------------------------------------------------------*/ - arm_cmplx_mult_cmplx_q15 (pState, weights, pState, S->N); - - /* The output of complex multiplication is in 3.13 format. - * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */ - arm_shift_q15 (pState, 2, pState, S->N * 2); - - /* ----------- Post-processing ---------- */ - /* DCT-IV can be obtained from DCT-II by the equation, - * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0) - * Hence, Y4(0) = Y2(0)/2 */ - /* Getting only real part from the output and Converting to DCT-IV */ - - /* pbuff initialized to input buffer. */ - pbuff = pInlineBuffer; - - /* pS1 initialized to pState */ - pS1 = pState; - - /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */ - in = *pS1++ >> 1U; - /* input buffer acts as inplace, so output values are stored in the input itself. */ - *pbuff++ = in; - - /* pState pointer is incremented twice as the real values are located alternatively in the array */ - pS1++; - - /* Initializing the loop counter */ - i = (S->N - 1U); - - do - { - /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ - /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ - in = *pS1++ - in; - *pbuff++ = in; - - /* points to the next real value */ - pS1++; - - /* Decrement loop counter */ - i--; - } while (i > 0U); - - /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/ - - /* Initializing loop counter */ - i = S->N; - - /* pbuff initialized to the pInlineBuffer (now contains the output values) */ - pbuff = pInlineBuffer; - - do - { - /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */ - in = *pbuff; - *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15)); - - /* Decrement loop counter */ - i--; - - } while (i > 0U); - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - -} - -/** - @} end of DCT4_IDCT4 group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_dct4_q15.c
+ * Description: Processing function of DCT4 & IDCT4 Q15
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @addtogroup DCT4_IDCT4
+ @{
+ */
+
+/**
+ @brief Processing function for the Q15 DCT4/IDCT4.
+ @param[in] S points to an instance of the Q15 DCT4 structure.
+ @param[in] pState points to state buffer.
+ @param[in,out] pInlineBuffer points to the in-place input and output buffer.
+ @return none
+
+ @par Input an output formats
+ Internally inputs are downscaled in the RFFT process function to avoid overflows.
+ Number of bits downscaled, depends on the size of the transform. The input and output
+ formats for different DCT sizes and number of bits to upscale are mentioned in the table below:
+
+ \image html dct4FormatsQ15Table.gif
+ */
+
+void arm_dct4_q15(
+ const arm_dct4_instance_q15 * S,
+ q15_t * pState,
+ q15_t * pInlineBuffer)
+{
+ const q15_t *weights = S->pTwiddle; /* Pointer to the Weights table */
+ const q15_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
+ q15_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
+ q15_t in; /* Temporary variable */
+ uint32_t i; /* Loop counter */
+
+
+ /* DCT4 computation involves DCT2 (which is calculated using RFFT)
+ * along with some pre-processing and post-processing.
+ * Computational procedure is explained as follows:
+ * (a) Pre-processing involves multiplying input with cos factor,
+ * r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
+ * where,
+ * r(n) -- output of preprocessing
+ * u(n) -- input to preprocessing(actual Source buffer)
+ * (b) Calculation of DCT2 using FFT is divided into three steps:
+ * Step1: Re-ordering of even and odd elements of input.
+ * Step2: Calculating FFT of the re-ordered input.
+ * Step3: Taking the real part of the product of FFT output and weights.
+ * (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
+ * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
+ * where,
+ * Y4 -- DCT4 output, Y2 -- DCT2 output
+ * (d) Multiplying the output with the normalizing factor sqrt(2/N).
+ */
+
+ /*-------- Pre-processing ------------*/
+ /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
+ arm_mult_q15 (pInlineBuffer, cosFact, pInlineBuffer, S->N);
+ arm_shift_q15 (pInlineBuffer, 1, pInlineBuffer, S->N);
+
+ /* ----------------------------------------------------------------
+ * Step1: Re-ordering of even and odd elements as
+ * pState[i] = pInlineBuffer[2*i] and
+ * pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
+ ---------------------------------------------------------------------*/
+
+ /* pS1 initialized to pState */
+ pS1 = pState;
+
+ /* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
+ pS2 = pState + (S->N - 1U);
+
+ /* pbuff initialized to input buffer */
+ pbuff = pInlineBuffer;
+
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
+ i = S->Nby2 >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ do
+ {
+ /* Re-ordering of even and odd elements */
+ /* pState[i] = pInlineBuffer[2*i] */
+ *pS1++ = *pbuff++;
+ /* pState[N-i-1] = pInlineBuffer[2*i+1] */
+ *pS2-- = *pbuff++;
+
+ *pS1++ = *pbuff++;
+ *pS2-- = *pbuff++;
+
+ *pS1++ = *pbuff++;
+ *pS2-- = *pbuff++;
+
+ *pS1++ = *pbuff++;
+ *pS2-- = *pbuff++;
+
+ /* Decrement loop counter */
+ i--;
+ } while (i > 0U);
+
+ /* pbuff initialized to input buffer */
+ pbuff = pInlineBuffer;
+
+ /* pS1 initialized to pState */
+ pS1 = pState;
+
+ /* Initializing the loop counter to N/4 instead of N for loop unrolling */
+ i = S->N >> 2U;
+
+ /* Processing with loop unrolling 4 times as N is always multiple of 4.
+ * Compute 4 outputs at a time */
+ do
+ {
+ /* Writing the re-ordered output back to inplace input buffer */
+ *pbuff++ = *pS1++;
+ *pbuff++ = *pS1++;
+ *pbuff++ = *pS1++;
+ *pbuff++ = *pS1++;
+
+ /* Decrement the loop counter */
+ i--;
+ } while (i > 0U);
+
+
+ /* ---------------------------------------------------------
+ * Step2: Calculate RFFT for N-point input
+ * ---------------------------------------------------------- */
+ /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
+ arm_rfft_q15 (S->pRfft, pInlineBuffer, pState);
+
+ /*----------------------------------------------------------------------
+ * Step3: Multiply the FFT output with the weights.
+ *----------------------------------------------------------------------*/
+ arm_cmplx_mult_cmplx_q15 (pState, weights, pState, S->N);
+
+ /* The output of complex multiplication is in 3.13 format.
+ * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
+ arm_shift_q15 (pState, 2, pState, S->N * 2);
+
+ /* ----------- Post-processing ---------- */
+ /* DCT-IV can be obtained from DCT-II by the equation,
+ * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
+ * Hence, Y4(0) = Y2(0)/2 */
+ /* Getting only real part from the output and Converting to DCT-IV */
+
+ /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
+ i = (S->N - 1U) >> 2U;
+
+ /* pbuff initialized to input buffer. */
+ pbuff = pInlineBuffer;
+
+ /* pS1 initialized to pState */
+ pS1 = pState;
+
+ /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
+ in = *pS1++ >> 1U;
+ /* input buffer acts as inplace, so output values are stored in the input itself. */
+ *pbuff++ = in;
+
+ /* pState pointer is incremented twice as the real values are located alternatively in the array */
+ pS1++;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ do
+ {
+ /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+ /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+ in = *pS1++ - in;
+ *pbuff++ = in;
+ /* points to the next real value */
+ pS1++;
+
+ in = *pS1++ - in;
+ *pbuff++ = in;
+ pS1++;
+
+ in = *pS1++ - in;
+ *pbuff++ = in;
+ pS1++;
+
+ in = *pS1++ - in;
+ *pbuff++ = in;
+ pS1++;
+
+ /* Decrement the loop counter */
+ i--;
+ } while (i > 0U);
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ i = (S->N - 1U) % 0x4U;
+
+ while (i > 0U)
+ {
+ /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+ /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+ in = *pS1++ - in;
+ *pbuff++ = in;
+
+ /* points to the next real value */
+ pS1++;
+
+ /* Decrement loop counter */
+ i--;
+ }
+
+
+ /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+
+ /* Initializing the loop counter to N/4 instead of N for loop unrolling */
+ i = S->N >> 2U;
+
+ /* pbuff initialized to the pInlineBuffer(now contains the output values) */
+ pbuff = pInlineBuffer;
+
+ /* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
+ do
+ {
+ /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
+ in = *pbuff;
+ *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
+
+ in = *pbuff;
+ *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
+
+ in = *pbuff;
+ *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
+
+ in = *pbuff;
+ *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
+
+ /* Decrement loop counter */
+ i--;
+ } while (i > 0U);
+
+
+#else
+
+ /* Initializing the loop counter to N/2 */
+ i = S->Nby2;
+
+ do
+ {
+ /* Re-ordering of even and odd elements */
+ /* pState[i] = pInlineBuffer[2*i] */
+ *pS1++ = *pbuff++;
+ /* pState[N-i-1] = pInlineBuffer[2*i+1] */
+ *pS2-- = *pbuff++;
+
+ /* Decrement the loop counter */
+ i--;
+ } while (i > 0U);
+
+ /* pbuff initialized to input buffer */
+ pbuff = pInlineBuffer;
+
+ /* pS1 initialized to pState */
+ pS1 = pState;
+
+ /* Initializing the loop counter */
+ i = S->N;
+
+ do
+ {
+ /* Writing the re-ordered output back to inplace input buffer */
+ *pbuff++ = *pS1++;
+
+ /* Decrement the loop counter */
+ i--;
+ } while (i > 0U);
+
+
+ /* ---------------------------------------------------------
+ * Step2: Calculate RFFT for N-point input
+ * ---------------------------------------------------------- */
+ /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
+ arm_rfft_q15 (S->pRfft, pInlineBuffer, pState);
+
+ /*----------------------------------------------------------------------
+ * Step3: Multiply the FFT output with the weights.
+ *----------------------------------------------------------------------*/
+ arm_cmplx_mult_cmplx_q15 (pState, weights, pState, S->N);
+
+ /* The output of complex multiplication is in 3.13 format.
+ * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
+ arm_shift_q15 (pState, 2, pState, S->N * 2);
+
+ /* ----------- Post-processing ---------- */
+ /* DCT-IV can be obtained from DCT-II by the equation,
+ * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
+ * Hence, Y4(0) = Y2(0)/2 */
+ /* Getting only real part from the output and Converting to DCT-IV */
+
+ /* pbuff initialized to input buffer. */
+ pbuff = pInlineBuffer;
+
+ /* pS1 initialized to pState */
+ pS1 = pState;
+
+ /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
+ in = *pS1++ >> 1U;
+ /* input buffer acts as inplace, so output values are stored in the input itself. */
+ *pbuff++ = in;
+
+ /* pState pointer is incremented twice as the real values are located alternatively in the array */
+ pS1++;
+
+ /* Initializing the loop counter */
+ i = (S->N - 1U);
+
+ do
+ {
+ /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+ /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+ in = *pS1++ - in;
+ *pbuff++ = in;
+
+ /* points to the next real value */
+ pS1++;
+
+ /* Decrement loop counter */
+ i--;
+ } while (i > 0U);
+
+ /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+
+ /* Initializing loop counter */
+ i = S->N;
+
+ /* pbuff initialized to the pInlineBuffer (now contains the output values) */
+ pbuff = pInlineBuffer;
+
+ do
+ {
+ /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
+ in = *pbuff;
+ *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
+
+ /* Decrement loop counter */
+ i--;
+
+ } while (i > 0U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+}
+
+/**
+ @} end of DCT4_IDCT4 group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q31.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q31.c index 6cbccff..5757083 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q31.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q31.c @@ -1,383 +1,383 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_dct4_q31.c - * Description: Processing function of DCT4 & IDCT4 Q31 - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" - -/** - @addtogroup DCT4_IDCT4 - @{ - */ - -/** - @brief Processing function for the Q31 DCT4/IDCT4. - @param[in] S points to an instance of the Q31 DCT4 structure. - @param[in] pState points to state buffer. - @param[in,out] pInlineBuffer points to the in-place input and output buffer. - @return none - - @par Input an output formats - Input samples need to be downscaled by 1 bit to avoid saturations in the Q31 DCT process, - as the conversion from DCT2 to DCT4 involves one subtraction. - Internally inputs are downscaled in the RFFT process function to avoid overflows. - Number of bits downscaled, depends on the size of the transform. - The input and output formats for different DCT sizes and number of bits to upscale are - mentioned in the table below: - - \image html dct4FormatsQ31Table.gif - */ - -void arm_dct4_q31( - const arm_dct4_instance_q31 * S, - q31_t * pState, - q31_t * pInlineBuffer) -{ - const q31_t *weights = S->pTwiddle; /* Pointer to the Weights table */ - const q31_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */ - q31_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */ - q31_t in; /* Temporary variable */ - uint32_t i; /* Loop counter */ - - - /* DCT4 computation involves DCT2 (which is calculated using RFFT) - * along with some pre-processing and post-processing. - * Computational procedure is explained as follows: - * (a) Pre-processing involves multiplying input with cos factor, - * r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n)) - * where, - * r(n) -- output of preprocessing - * u(n) -- input to preprocessing(actual Source buffer) - * (b) Calculation of DCT2 using FFT is divided into three steps: - * Step1: Re-ordering of even and odd elements of input. - * Step2: Calculating FFT of the re-ordered input. - * Step3: Taking the real part of the product of FFT output and weights. - * (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation: - * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0) - * where, - * Y4 -- DCT4 output, Y2 -- DCT2 output - * (d) Multiplying the output with the normalizing factor sqrt(2/N). - */ - - /*-------- Pre-processing ------------*/ - /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */ - arm_mult_q31 (pInlineBuffer, cosFact, pInlineBuffer, S->N); - arm_shift_q31 (pInlineBuffer, 1, pInlineBuffer, S->N); - - /* ---------------------------------------------------------------- - * Step1: Re-ordering of even and odd elements as - * pState[i] = pInlineBuffer[2*i] and - * pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2 - ---------------------------------------------------------------------*/ - - /* pS1 initialized to pState */ - pS1 = pState; - - /* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */ - pS2 = pState + (S->N - 1U); - - /* pbuff initialized to input buffer */ - pbuff = pInlineBuffer; - - -#if defined (ARM_MATH_LOOPUNROLL) - - /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */ - i = S->Nby2 >> 2U; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ - do - { - /* Re-ordering of even and odd elements */ - /* pState[i] = pInlineBuffer[2*i] */ - *pS1++ = *pbuff++; - /* pState[N-i-1] = pInlineBuffer[2*i+1] */ - *pS2-- = *pbuff++; - - *pS1++ = *pbuff++; - *pS2-- = *pbuff++; - - *pS1++ = *pbuff++; - *pS2-- = *pbuff++; - - *pS1++ = *pbuff++; - *pS2-- = *pbuff++; - - /* Decrement loop counter */ - i--; - } while (i > 0U); - - /* pbuff initialized to input buffer */ - pbuff = pInlineBuffer; - - /* pS1 initialized to pState */ - pS1 = pState; - - /* Initializing the loop counter to N/4 instead of N for loop unrolling */ - i = S->N >> 2U; - - /* Processing with loop unrolling 4 times as N is always multiple of 4. - * Compute 4 outputs at a time */ - do - { - /* Writing the re-ordered output back to inplace input buffer */ - *pbuff++ = *pS1++; - *pbuff++ = *pS1++; - *pbuff++ = *pS1++; - *pbuff++ = *pS1++; - - /* Decrement the loop counter */ - i--; - } while (i > 0U); - - - /* --------------------------------------------------------- - * Step2: Calculate RFFT for N-point input - * ---------------------------------------------------------- */ - /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */ - arm_rfft_q31 (S->pRfft, pInlineBuffer, pState); - - /*---------------------------------------------------------------------- - * Step3: Multiply the FFT output with the weights. - *----------------------------------------------------------------------*/ - arm_cmplx_mult_cmplx_q31 (pState, weights, pState, S->N); - - /* The output of complex multiplication is in 3.29 format. - * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */ - arm_shift_q31 (pState, 2, pState, S->N * 2); - - /* ----------- Post-processing ---------- */ - /* DCT-IV can be obtained from DCT-II by the equation, - * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0) - * Hence, Y4(0) = Y2(0)/2 */ - /* Getting only real part from the output and Converting to DCT-IV */ - - /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */ - i = (S->N - 1U) >> 2U; - - /* pbuff initialized to input buffer. */ - pbuff = pInlineBuffer; - - /* pS1 initialized to pState */ - pS1 = pState; - - /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */ - in = *pS1++ >> 1U; - /* input buffer acts as inplace, so output values are stored in the input itself. */ - *pbuff++ = in; - - /* pState pointer is incremented twice as the real values are located alternatively in the array */ - pS1++; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ - do - { - /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ - /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ - in = *pS1++ - in; - *pbuff++ = in; - /* points to the next real value */ - pS1++; - - in = *pS1++ - in; - *pbuff++ = in; - pS1++; - - in = *pS1++ - in; - *pbuff++ = in; - pS1++; - - in = *pS1++ - in; - *pbuff++ = in; - pS1++; - - /* Decrement the loop counter */ - i--; - } while (i > 0U); - - /* If the blockSize is not a multiple of 4, compute any remaining output samples here. - ** No loop unrolling is used. */ - i = (S->N - 1U) % 0x4U; - - while (i > 0U) - { - /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ - /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ - in = *pS1++ - in; - *pbuff++ = in; - - /* points to the next real value */ - pS1++; - - /* Decrement loop counter */ - i--; - } - - - /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/ - - /* Initializing the loop counter to N/4 instead of N for loop unrolling */ - i = S->N >> 2U; - - /* pbuff initialized to the pInlineBuffer(now contains the output values) */ - pbuff = pInlineBuffer; - - /* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */ - do - { - /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */ - in = *pbuff; - *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31)); - - in = *pbuff; - *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31)); - - in = *pbuff; - *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31)); - - in = *pbuff; - *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31)); - - /* Decrement loop counter */ - i--; - } while (i > 0U); - - -#else - - /* Initializing the loop counter to N/2 */ - i = S->Nby2; - - do - { - /* Re-ordering of even and odd elements */ - /* pState[i] = pInlineBuffer[2*i] */ - *pS1++ = *pbuff++; - /* pState[N-i-1] = pInlineBuffer[2*i+1] */ - *pS2-- = *pbuff++; - - /* Decrement the loop counter */ - i--; - } while (i > 0U); - - /* pbuff initialized to input buffer */ - pbuff = pInlineBuffer; - - /* pS1 initialized to pState */ - pS1 = pState; - - /* Initializing the loop counter */ - i = S->N; - - do - { - /* Writing the re-ordered output back to inplace input buffer */ - *pbuff++ = *pS1++; - - /* Decrement the loop counter */ - i--; - } while (i > 0U); - - - /* --------------------------------------------------------- - * Step2: Calculate RFFT for N-point input - * ---------------------------------------------------------- */ - /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */ - arm_rfft_q31 (S->pRfft, pInlineBuffer, pState); - - /*---------------------------------------------------------------------- - * Step3: Multiply the FFT output with the weights. - *----------------------------------------------------------------------*/ - arm_cmplx_mult_cmplx_q31 (pState, weights, pState, S->N); - - /* The output of complex multiplication is in 3.29 format. - * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */ - arm_shift_q31(pState, 2, pState, S->N * 2); - - /* ----------- Post-processing ---------- */ - /* DCT-IV can be obtained from DCT-II by the equation, - * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0) - * Hence, Y4(0) = Y2(0)/2 */ - /* Getting only real part from the output and Converting to DCT-IV */ - - /* pbuff initialized to input buffer. */ - pbuff = pInlineBuffer; - - /* pS1 initialized to pState */ - pS1 = pState; - - /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */ - in = *pS1++ >> 1U; - /* input buffer acts as inplace, so output values are stored in the input itself. */ - *pbuff++ = in; - - /* pState pointer is incremented twice as the real values are located alternatively in the array */ - pS1++; - - /* Initializing the loop counter */ - i = (S->N - 1U); - - while (i > 0U) - { - /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ - /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ - in = *pS1++ - in; - *pbuff++ = in; - - /* points to the next real value */ - pS1++; - - /* Decrement loop counter */ - i--; - } - - /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/ - - /* Initializing loop counter */ - i = S->N; - - /* pbuff initialized to the pInlineBuffer (now contains the output values) */ - pbuff = pInlineBuffer; - - do - { - /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */ - in = *pbuff; - *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31)); - - /* Decrement loop counter */ - i--; - } while (i > 0U); - -#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ - -} - -/** - @} end of DCT4_IDCT4 group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_dct4_q31.c
+ * Description: Processing function of DCT4 & IDCT4 Q31
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ @addtogroup DCT4_IDCT4
+ @{
+ */
+
+/**
+ @brief Processing function for the Q31 DCT4/IDCT4.
+ @param[in] S points to an instance of the Q31 DCT4 structure.
+ @param[in] pState points to state buffer.
+ @param[in,out] pInlineBuffer points to the in-place input and output buffer.
+ @return none
+
+ @par Input an output formats
+ Input samples need to be downscaled by 1 bit to avoid saturations in the Q31 DCT process,
+ as the conversion from DCT2 to DCT4 involves one subtraction.
+ Internally inputs are downscaled in the RFFT process function to avoid overflows.
+ Number of bits downscaled, depends on the size of the transform.
+ The input and output formats for different DCT sizes and number of bits to upscale are
+ mentioned in the table below:
+
+ \image html dct4FormatsQ31Table.gif
+ */
+
+void arm_dct4_q31(
+ const arm_dct4_instance_q31 * S,
+ q31_t * pState,
+ q31_t * pInlineBuffer)
+{
+ const q31_t *weights = S->pTwiddle; /* Pointer to the Weights table */
+ const q31_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
+ q31_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
+ q31_t in; /* Temporary variable */
+ uint32_t i; /* Loop counter */
+
+
+ /* DCT4 computation involves DCT2 (which is calculated using RFFT)
+ * along with some pre-processing and post-processing.
+ * Computational procedure is explained as follows:
+ * (a) Pre-processing involves multiplying input with cos factor,
+ * r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
+ * where,
+ * r(n) -- output of preprocessing
+ * u(n) -- input to preprocessing(actual Source buffer)
+ * (b) Calculation of DCT2 using FFT is divided into three steps:
+ * Step1: Re-ordering of even and odd elements of input.
+ * Step2: Calculating FFT of the re-ordered input.
+ * Step3: Taking the real part of the product of FFT output and weights.
+ * (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
+ * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
+ * where,
+ * Y4 -- DCT4 output, Y2 -- DCT2 output
+ * (d) Multiplying the output with the normalizing factor sqrt(2/N).
+ */
+
+ /*-------- Pre-processing ------------*/
+ /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
+ arm_mult_q31 (pInlineBuffer, cosFact, pInlineBuffer, S->N);
+ arm_shift_q31 (pInlineBuffer, 1, pInlineBuffer, S->N);
+
+ /* ----------------------------------------------------------------
+ * Step1: Re-ordering of even and odd elements as
+ * pState[i] = pInlineBuffer[2*i] and
+ * pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
+ ---------------------------------------------------------------------*/
+
+ /* pS1 initialized to pState */
+ pS1 = pState;
+
+ /* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
+ pS2 = pState + (S->N - 1U);
+
+ /* pbuff initialized to input buffer */
+ pbuff = pInlineBuffer;
+
+
+#if defined (ARM_MATH_LOOPUNROLL)
+
+ /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
+ i = S->Nby2 >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ do
+ {
+ /* Re-ordering of even and odd elements */
+ /* pState[i] = pInlineBuffer[2*i] */
+ *pS1++ = *pbuff++;
+ /* pState[N-i-1] = pInlineBuffer[2*i+1] */
+ *pS2-- = *pbuff++;
+
+ *pS1++ = *pbuff++;
+ *pS2-- = *pbuff++;
+
+ *pS1++ = *pbuff++;
+ *pS2-- = *pbuff++;
+
+ *pS1++ = *pbuff++;
+ *pS2-- = *pbuff++;
+
+ /* Decrement loop counter */
+ i--;
+ } while (i > 0U);
+
+ /* pbuff initialized to input buffer */
+ pbuff = pInlineBuffer;
+
+ /* pS1 initialized to pState */
+ pS1 = pState;
+
+ /* Initializing the loop counter to N/4 instead of N for loop unrolling */
+ i = S->N >> 2U;
+
+ /* Processing with loop unrolling 4 times as N is always multiple of 4.
+ * Compute 4 outputs at a time */
+ do
+ {
+ /* Writing the re-ordered output back to inplace input buffer */
+ *pbuff++ = *pS1++;
+ *pbuff++ = *pS1++;
+ *pbuff++ = *pS1++;
+ *pbuff++ = *pS1++;
+
+ /* Decrement the loop counter */
+ i--;
+ } while (i > 0U);
+
+
+ /* ---------------------------------------------------------
+ * Step2: Calculate RFFT for N-point input
+ * ---------------------------------------------------------- */
+ /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
+ arm_rfft_q31 (S->pRfft, pInlineBuffer, pState);
+
+ /*----------------------------------------------------------------------
+ * Step3: Multiply the FFT output with the weights.
+ *----------------------------------------------------------------------*/
+ arm_cmplx_mult_cmplx_q31 (pState, weights, pState, S->N);
+
+ /* The output of complex multiplication is in 3.29 format.
+ * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */
+ arm_shift_q31 (pState, 2, pState, S->N * 2);
+
+ /* ----------- Post-processing ---------- */
+ /* DCT-IV can be obtained from DCT-II by the equation,
+ * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
+ * Hence, Y4(0) = Y2(0)/2 */
+ /* Getting only real part from the output and Converting to DCT-IV */
+
+ /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
+ i = (S->N - 1U) >> 2U;
+
+ /* pbuff initialized to input buffer. */
+ pbuff = pInlineBuffer;
+
+ /* pS1 initialized to pState */
+ pS1 = pState;
+
+ /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
+ in = *pS1++ >> 1U;
+ /* input buffer acts as inplace, so output values are stored in the input itself. */
+ *pbuff++ = in;
+
+ /* pState pointer is incremented twice as the real values are located alternatively in the array */
+ pS1++;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ do
+ {
+ /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+ /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+ in = *pS1++ - in;
+ *pbuff++ = in;
+ /* points to the next real value */
+ pS1++;
+
+ in = *pS1++ - in;
+ *pbuff++ = in;
+ pS1++;
+
+ in = *pS1++ - in;
+ *pbuff++ = in;
+ pS1++;
+
+ in = *pS1++ - in;
+ *pbuff++ = in;
+ pS1++;
+
+ /* Decrement the loop counter */
+ i--;
+ } while (i > 0U);
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ i = (S->N - 1U) % 0x4U;
+
+ while (i > 0U)
+ {
+ /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+ /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+ in = *pS1++ - in;
+ *pbuff++ = in;
+
+ /* points to the next real value */
+ pS1++;
+
+ /* Decrement loop counter */
+ i--;
+ }
+
+
+ /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+
+ /* Initializing the loop counter to N/4 instead of N for loop unrolling */
+ i = S->N >> 2U;
+
+ /* pbuff initialized to the pInlineBuffer(now contains the output values) */
+ pbuff = pInlineBuffer;
+
+ /* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
+ do
+ {
+ /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
+ in = *pbuff;
+ *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
+
+ in = *pbuff;
+ *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
+
+ in = *pbuff;
+ *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
+
+ in = *pbuff;
+ *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
+
+ /* Decrement loop counter */
+ i--;
+ } while (i > 0U);
+
+
+#else
+
+ /* Initializing the loop counter to N/2 */
+ i = S->Nby2;
+
+ do
+ {
+ /* Re-ordering of even and odd elements */
+ /* pState[i] = pInlineBuffer[2*i] */
+ *pS1++ = *pbuff++;
+ /* pState[N-i-1] = pInlineBuffer[2*i+1] */
+ *pS2-- = *pbuff++;
+
+ /* Decrement the loop counter */
+ i--;
+ } while (i > 0U);
+
+ /* pbuff initialized to input buffer */
+ pbuff = pInlineBuffer;
+
+ /* pS1 initialized to pState */
+ pS1 = pState;
+
+ /* Initializing the loop counter */
+ i = S->N;
+
+ do
+ {
+ /* Writing the re-ordered output back to inplace input buffer */
+ *pbuff++ = *pS1++;
+
+ /* Decrement the loop counter */
+ i--;
+ } while (i > 0U);
+
+
+ /* ---------------------------------------------------------
+ * Step2: Calculate RFFT for N-point input
+ * ---------------------------------------------------------- */
+ /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
+ arm_rfft_q31 (S->pRfft, pInlineBuffer, pState);
+
+ /*----------------------------------------------------------------------
+ * Step3: Multiply the FFT output with the weights.
+ *----------------------------------------------------------------------*/
+ arm_cmplx_mult_cmplx_q31 (pState, weights, pState, S->N);
+
+ /* The output of complex multiplication is in 3.29 format.
+ * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */
+ arm_shift_q31(pState, 2, pState, S->N * 2);
+
+ /* ----------- Post-processing ---------- */
+ /* DCT-IV can be obtained from DCT-II by the equation,
+ * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
+ * Hence, Y4(0) = Y2(0)/2 */
+ /* Getting only real part from the output and Converting to DCT-IV */
+
+ /* pbuff initialized to input buffer. */
+ pbuff = pInlineBuffer;
+
+ /* pS1 initialized to pState */
+ pS1 = pState;
+
+ /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
+ in = *pS1++ >> 1U;
+ /* input buffer acts as inplace, so output values are stored in the input itself. */
+ *pbuff++ = in;
+
+ /* pState pointer is incremented twice as the real values are located alternatively in the array */
+ pS1++;
+
+ /* Initializing the loop counter */
+ i = (S->N - 1U);
+
+ while (i > 0U)
+ {
+ /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+ /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+ in = *pS1++ - in;
+ *pbuff++ = in;
+
+ /* points to the next real value */
+ pS1++;
+
+ /* Decrement loop counter */
+ i--;
+ }
+
+ /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+
+ /* Initializing loop counter */
+ i = S->N;
+
+ /* pbuff initialized to the pInlineBuffer (now contains the output values) */
+ pbuff = pInlineBuffer;
+
+ do
+ {
+ /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
+ in = *pbuff;
+ *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
+
+ /* Decrement loop counter */
+ i--;
+ } while (i > 0U);
+
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
+
+}
+
+/**
+ @} end of DCT4_IDCT4 group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_f32.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_f32.c index 8844b73..2ad336d 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_f32.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_f32.c @@ -1,318 +1,309 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_rfft_f32.c - * Description: RFFT & RIFFT Floating point process function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" - -/* ---------------------------------------------------------------------- - * Internal functions prototypes - * -------------------------------------------------------------------- */ - -extern void arm_radix4_butterfly_f32( - float32_t * pSrc, - uint16_t fftLen, - const float32_t * pCoef, - uint16_t twidCoefModifier); - -extern void arm_radix4_butterfly_inverse_f32( - float32_t * pSrc, - uint16_t fftLen, - const float32_t * pCoef, - uint16_t twidCoefModifier, - float32_t onebyfftLen); - -extern void arm_bitreversal_f32( - float32_t * pSrc, - uint16_t fftSize, - uint16_t bitRevFactor, - const uint16_t * pBitRevTab); - -void arm_split_rfft_f32( - float32_t * pSrc, - uint32_t fftLen, - const float32_t * pATable, - const float32_t * pBTable, - float32_t * pDst, - uint32_t modifier); - -void arm_split_rifft_f32( - float32_t * pSrc, - uint32_t fftLen, - const float32_t * pATable, - const float32_t * pBTable, - float32_t * pDst, - uint32_t modifier); - -/** - @ingroup groupTransforms - */ - -/** - @addtogroup RealFFT - @{ - */ - -/** - @brief Processing function for the floating-point RFFT/RIFFT. - Source buffer is modified by this function. - - @deprecated Do not use this function. It has been superceded by \ref arm_rfft_fast_f32 and will be removed in the future. - @param[in] S points to an instance of the floating-point RFFT/RIFFT structure - @param[in] pSrc points to the input buffer - @param[out] pDst points to the output buffer - @return none - - @par - For the RIFFT, the source buffer must at least have length - fftLenReal + 2. - The last two elements must be equal to what would be generated - by the RFFT: - (pSrc[0] - pSrc[1]) and 0.0f - */ - -void arm_rfft_f32( - const arm_rfft_instance_f32 * S, - float32_t * pSrc, - float32_t * pDst) -{ - const arm_cfft_radix4_instance_f32 *S_CFFT = S->pCfft; - - /* Calculation of Real IFFT of input */ - if (S->ifftFlagR == 1U) - { - /* Real IFFT core process */ - arm_split_rifft_f32 (pSrc, S->fftLenBy2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier); - - - /* Complex radix-4 IFFT process */ - arm_radix4_butterfly_inverse_f32 (pDst, S_CFFT->fftLen, S_CFFT->pTwiddle, S_CFFT->twidCoefModifier, S_CFFT->onebyfftLen); - - /* Bit reversal process */ - if (S->bitReverseFlagR == 1U) - { - arm_bitreversal_f32 (pDst, S_CFFT->fftLen, S_CFFT->bitRevFactor, S_CFFT->pBitRevTable); - } - } - else - { - /* Calculation of RFFT of input */ - - /* Complex radix-4 FFT process */ - arm_radix4_butterfly_f32 (pSrc, S_CFFT->fftLen, S_CFFT->pTwiddle, S_CFFT->twidCoefModifier); - - /* Bit reversal process */ - if (S->bitReverseFlagR == 1U) - { - arm_bitreversal_f32 (pSrc, S_CFFT->fftLen, S_CFFT->bitRevFactor, S_CFFT->pBitRevTable); - } - - /* Real FFT core process */ - arm_split_rfft_f32 (pSrc, S->fftLenBy2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier); - } - -} - -/** - @} end of RealFFT group - */ - -/** - @brief Core Real FFT process - @param[in] pSrc points to input buffer - @param[in] fftLen length of FFT - @param[in] pATable points to twiddle Coef A buffer - @param[in] pBTable points to twiddle Coef B buffer - @param[out] pDst points to output buffer - @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table - @return none - */ - -void arm_split_rfft_f32( - float32_t * pSrc, - uint32_t fftLen, - const float32_t * pATable, - const float32_t * pBTable, - float32_t * pDst, - uint32_t modifier) -{ - uint32_t i; /* Loop Counter */ - float32_t outR, outI; /* Temporary variables for output */ - const float32_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */ - float32_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */ - float32_t *pDst1 = &pDst[2], *pDst2 = &pDst[(4U * fftLen) - 1U]; /* temp pointers for output buffer */ - float32_t *pSrc1 = &pSrc[2], *pSrc2 = &pSrc[(2U * fftLen) - 1U]; /* temp pointers for input buffer */ - - /* Init coefficient pointers */ - pCoefA = &pATable[modifier * 2]; - pCoefB = &pBTable[modifier * 2]; - - i = fftLen - 1U; - - while (i > 0U) - { - /* - outR = ( pSrc[2 * i] * pATable[2 * i] - - pSrc[2 * i + 1] * pATable[2 * i + 1] - + pSrc[2 * n - 2 * i] * pBTable[2 * i] - + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]); - - outI = ( pIn[2 * i + 1] * pATable[2 * i] - + pIn[2 * i] * pATable[2 * i + 1] - + pIn[2 * n - 2 * i] * pBTable[2 * i + 1] - - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); - */ - - /* read pATable[2 * i] */ - CoefA1 = *pCoefA++; - /* pATable[2 * i + 1] */ - CoefA2 = *pCoefA; - - /* pSrc[2 * i] * pATable[2 * i] */ - outR = *pSrc1 * CoefA1; - /* pSrc[2 * i] * CoefA2 */ - outI = *pSrc1++ * CoefA2; - - /* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */ - outR -= (*pSrc1 + *pSrc2) * CoefA2; - /* pSrc[2 * i + 1] * CoefA1 */ - outI += *pSrc1++ * CoefA1; - - CoefB1 = *pCoefB; - - /* pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */ - outI -= *pSrc2-- * CoefB1; - /* pSrc[2 * fftLen - 2 * i] * CoefA2 */ - outI -= *pSrc2 * CoefA2; - - /* pSrc[2 * fftLen - 2 * i] * CoefB1 */ - outR += *pSrc2-- * CoefB1; - - /* write output */ - *pDst1++ = outR; - *pDst1++ = outI; - - /* write complex conjugate output */ - *pDst2-- = -outI; - *pDst2-- = outR; - - /* update coefficient pointer */ - pCoefB = pCoefB + (modifier * 2U); - pCoefA = pCoefA + ((modifier * 2U) - 1U); - - i--; - - } - - pDst[2U * fftLen] = pSrc[0] - pSrc[1]; - pDst[(2U * fftLen) + 1U] = 0.0f; - - pDst[0] = pSrc[0] + pSrc[1]; - pDst[1] = 0.0f; - -} - - -/** - @brief Core Real IFFT process - @param[in] pSrc points to input buffer - @param[in] fftLen length of FFT - @param[in] pATable points to twiddle Coef A buffer - @param[in] pBTable points to twiddle Coef B buffer - @param[out] pDst points to output buffer - @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table - @return none - */ - -void arm_split_rifft_f32( - float32_t * pSrc, - uint32_t fftLen, - const float32_t * pATable, - const float32_t * pBTable, - float32_t * pDst, - uint32_t modifier) -{ - float32_t outR, outI; /* Temporary variables for output */ - const float32_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */ - float32_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */ - float32_t *pSrc1 = &pSrc[0], *pSrc2 = &pSrc[(2U * fftLen) + 1U]; - - pCoefA = &pATable[0]; - pCoefB = &pBTable[0]; - - while (fftLen > 0U) - { - /* - outR = ( pIn[2 * i] * pATable[2 * i] - + pIn[2 * i + 1] * pATable[2 * i + 1] - + pIn[2 * n - 2 * i] * pBTable[2 * i] - - pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]); - - outI = ( pIn[2 * i + 1] * pATable[2 * i] - - pIn[2 * i] * pATable[2 * i + 1] - - pIn[2 * n - 2 * i] * pBTable[2 * i + 1] - - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); - */ - - CoefA1 = *pCoefA++; - CoefA2 = *pCoefA; - - /* outR = (pSrc[2 * i] * CoefA1 */ - outR = *pSrc1 * CoefA1; - - /* - pSrc[2 * i] * CoefA2 */ - outI = -(*pSrc1++) * CoefA2; - - /* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */ - outR += (*pSrc1 + *pSrc2) * CoefA2; - - /* pSrc[2 * i + 1] * CoefA1 */ - outI += (*pSrc1++) * CoefA1; - - CoefB1 = *pCoefB; - - /* - pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */ - outI -= *pSrc2-- * CoefB1; - - /* pSrc[2 * fftLen - 2 * i] * CoefB1 */ - outR += *pSrc2 * CoefB1; - - /* pSrc[2 * fftLen - 2 * i] * CoefA2 */ - outI += *pSrc2-- * CoefA2; - - /* write output */ - *pDst++ = outR; - *pDst++ = outI; - - /* update coefficient pointer */ - pCoefB = pCoefB + (modifier * 2); - pCoefA = pCoefA + (modifier * 2 - 1); - - /* Decrement loop count */ - fftLen--; - } - -} +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_rfft_f32.c
+ * Description: RFFT & RIFFT Floating point process function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/* ----------------------------------------------------------------------
+ * Internal functions prototypes
+ * -------------------------------------------------------------------- */
+
+extern void arm_radix4_butterfly_f32(
+ float32_t * pSrc,
+ uint16_t fftLen,
+ const float32_t * pCoef,
+ uint16_t twidCoefModifier);
+
+extern void arm_radix4_butterfly_inverse_f32(
+ float32_t * pSrc,
+ uint16_t fftLen,
+ const float32_t * pCoef,
+ uint16_t twidCoefModifier,
+ float32_t onebyfftLen);
+
+extern void arm_bitreversal_f32(
+ float32_t * pSrc,
+ uint16_t fftSize,
+ uint16_t bitRevFactor,
+ const uint16_t * pBitRevTab);
+
+void arm_split_rfft_f32(
+ float32_t * pSrc,
+ uint32_t fftLen,
+ const float32_t * pATable,
+ const float32_t * pBTable,
+ float32_t * pDst,
+ uint32_t modifier);
+
+void arm_split_rifft_f32(
+ float32_t * pSrc,
+ uint32_t fftLen,
+ const float32_t * pATable,
+ const float32_t * pBTable,
+ float32_t * pDst,
+ uint32_t modifier);
+
+/**
+ @ingroup groupTransforms
+ */
+
+/**
+ @addtogroup RealFFT
+ @{
+ */
+
+/**
+ @brief Processing function for the floating-point RFFT/RIFFT.
+ @deprecated Do not use this function. It has been superceded by \ref arm_rfft_fast_f32 and will be removed in the future.
+ @param[in] S points to an instance of the floating-point RFFT/RIFFT structure
+ @param[in] pSrc points to the input buffer
+ @param[out] pDst points to the output buffer
+ @return none
+ */
+
+void arm_rfft_f32(
+ const arm_rfft_instance_f32 * S,
+ float32_t * pSrc,
+ float32_t * pDst)
+{
+ const arm_cfft_radix4_instance_f32 *S_CFFT = S->pCfft;
+
+ /* Calculation of Real IFFT of input */
+ if (S->ifftFlagR == 1U)
+ {
+ /* Real IFFT core process */
+ arm_split_rifft_f32 (pSrc, S->fftLenBy2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+
+
+ /* Complex radix-4 IFFT process */
+ arm_radix4_butterfly_inverse_f32 (pDst, S_CFFT->fftLen, S_CFFT->pTwiddle, S_CFFT->twidCoefModifier, S_CFFT->onebyfftLen);
+
+ /* Bit reversal process */
+ if (S->bitReverseFlagR == 1U)
+ {
+ arm_bitreversal_f32 (pDst, S_CFFT->fftLen, S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
+ }
+ }
+ else
+ {
+ /* Calculation of RFFT of input */
+
+ /* Complex radix-4 FFT process */
+ arm_radix4_butterfly_f32 (pSrc, S_CFFT->fftLen, S_CFFT->pTwiddle, S_CFFT->twidCoefModifier);
+
+ /* Bit reversal process */
+ if (S->bitReverseFlagR == 1U)
+ {
+ arm_bitreversal_f32 (pSrc, S_CFFT->fftLen, S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
+ }
+
+ /* Real FFT core process */
+ arm_split_rfft_f32 (pSrc, S->fftLenBy2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+ }
+
+}
+
+/**
+ @} end of RealFFT group
+ */
+
+/**
+ @brief Core Real FFT process
+ @param[in] pSrc points to input buffer
+ @param[in] fftLen length of FFT
+ @param[in] pATable points to twiddle Coef A buffer
+ @param[in] pBTable points to twiddle Coef B buffer
+ @param[out] pDst points to output buffer
+ @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
+ @return none
+ */
+
+void arm_split_rfft_f32(
+ float32_t * pSrc,
+ uint32_t fftLen,
+ const float32_t * pATable,
+ const float32_t * pBTable,
+ float32_t * pDst,
+ uint32_t modifier)
+{
+ uint32_t i; /* Loop Counter */
+ float32_t outR, outI; /* Temporary variables for output */
+ const float32_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
+ float32_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
+ float32_t *pDst1 = &pDst[2], *pDst2 = &pDst[(4U * fftLen) - 1U]; /* temp pointers for output buffer */
+ float32_t *pSrc1 = &pSrc[2], *pSrc2 = &pSrc[(2U * fftLen) - 1U]; /* temp pointers for input buffer */
+
+ /* Init coefficient pointers */
+ pCoefA = &pATable[modifier * 2];
+ pCoefB = &pBTable[modifier * 2];
+
+ i = fftLen - 1U;
+
+ while (i > 0U)
+ {
+ /*
+ outR = ( pSrc[2 * i] * pATable[2 * i]
+ - pSrc[2 * i + 1] * pATable[2 * i + 1]
+ + pSrc[2 * n - 2 * i] * pBTable[2 * i]
+ + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
+
+ outI = ( pIn[2 * i + 1] * pATable[2 * i]
+ + pIn[2 * i] * pATable[2 * i + 1]
+ + pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
+ - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
+ */
+
+ /* read pATable[2 * i] */
+ CoefA1 = *pCoefA++;
+ /* pATable[2 * i + 1] */
+ CoefA2 = *pCoefA;
+
+ /* pSrc[2 * i] * pATable[2 * i] */
+ outR = *pSrc1 * CoefA1;
+ /* pSrc[2 * i] * CoefA2 */
+ outI = *pSrc1++ * CoefA2;
+
+ /* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */
+ outR -= (*pSrc1 + *pSrc2) * CoefA2;
+ /* pSrc[2 * i + 1] * CoefA1 */
+ outI += *pSrc1++ * CoefA1;
+
+ CoefB1 = *pCoefB;
+
+ /* pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */
+ outI -= *pSrc2-- * CoefB1;
+ /* pSrc[2 * fftLen - 2 * i] * CoefA2 */
+ outI -= *pSrc2 * CoefA2;
+
+ /* pSrc[2 * fftLen - 2 * i] * CoefB1 */
+ outR += *pSrc2-- * CoefB1;
+
+ /* write output */
+ *pDst1++ = outR;
+ *pDst1++ = outI;
+
+ /* write complex conjugate output */
+ *pDst2-- = -outI;
+ *pDst2-- = outR;
+
+ /* update coefficient pointer */
+ pCoefB = pCoefB + (modifier * 2U);
+ pCoefA = pCoefA + ((modifier * 2U) - 1U);
+
+ i--;
+
+ }
+
+ pDst[2U * fftLen] = pSrc[0] - pSrc[1];
+ pDst[(2U * fftLen) + 1U] = 0.0f;
+
+ pDst[0] = pSrc[0] + pSrc[1];
+ pDst[1] = 0.0f;
+
+}
+
+
+/**
+ @brief Core Real IFFT process
+ @param[in] pSrc points to input buffer
+ @param[in] fftLen length of FFT
+ @param[in] pATable points to twiddle Coef A buffer
+ @param[in] pBTable points to twiddle Coef B buffer
+ @param[out] pDst points to output buffer
+ @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
+ @return none
+ */
+
+void arm_split_rifft_f32(
+ float32_t * pSrc,
+ uint32_t fftLen,
+ const float32_t * pATable,
+ const float32_t * pBTable,
+ float32_t * pDst,
+ uint32_t modifier)
+{
+ float32_t outR, outI; /* Temporary variables for output */
+ const float32_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
+ float32_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
+ float32_t *pSrc1 = &pSrc[0], *pSrc2 = &pSrc[(2U * fftLen) + 1U];
+
+ pCoefA = &pATable[0];
+ pCoefB = &pBTable[0];
+
+ while (fftLen > 0U)
+ {
+ /*
+ outR = ( pIn[2 * i] * pATable[2 * i]
+ + pIn[2 * i + 1] * pATable[2 * i + 1]
+ + pIn[2 * n - 2 * i] * pBTable[2 * i]
+ - pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
+
+ outI = ( pIn[2 * i + 1] * pATable[2 * i]
+ - pIn[2 * i] * pATable[2 * i + 1]
+ - pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
+ - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
+ */
+
+ CoefA1 = *pCoefA++;
+ CoefA2 = *pCoefA;
+
+ /* outR = (pSrc[2 * i] * CoefA1 */
+ outR = *pSrc1 * CoefA1;
+
+ /* - pSrc[2 * i] * CoefA2 */
+ outI = -(*pSrc1++) * CoefA2;
+
+ /* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */
+ outR += (*pSrc1 + *pSrc2) * CoefA2;
+
+ /* pSrc[2 * i + 1] * CoefA1 */
+ outI += (*pSrc1++) * CoefA1;
+
+ CoefB1 = *pCoefB;
+
+ /* - pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */
+ outI -= *pSrc2-- * CoefB1;
+
+ /* pSrc[2 * fftLen - 2 * i] * CoefB1 */
+ outR += *pSrc2 * CoefB1;
+
+ /* pSrc[2 * fftLen - 2 * i] * CoefA2 */
+ outI += *pSrc2-- * CoefA2;
+
+ /* write output */
+ *pDst++ = outR;
+ *pDst++ = outI;
+
+ /* update coefficient pointer */
+ pCoefB = pCoefB + (modifier * 2);
+ pCoefA = pCoefA + (modifier * 2 - 1);
+
+ /* Decrement loop count */
+ fftLen--;
+ }
+
+}
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f32.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f32.c index 6712504..ebaa7d9 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f32.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_f32.c @@ -1,603 +1,320 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_rfft_fast_f32.c - * Description: RFFT & RIFFT Floating point process function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" - -#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) -void stage_rfft_f32( - const arm_rfft_fast_instance_f32 * S, - float32_t * p, - float32_t * pOut) -{ - int32_t k; /* Loop Counter */ - float32_t twR, twI; /* RFFT Twiddle coefficients */ - const float32_t * pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */ - float32_t *pA = p; /* increasing pointer */ - float32_t *pB = p; /* decreasing pointer */ - float32_t xAR, xAI, xBR, xBI; /* temporary variables */ - float32_t t1a, t1b; /* temporary variables */ - float32_t p0, p1, p2, p3; /* temporary variables */ - - float32x4x2_t tw,xA,xB; - float32x4x2_t tmp1, tmp2, res; - - uint32x4_t vecStridesFwd, vecStridesBkwd; - - vecStridesFwd = vidupq_u32((uint32_t)0, 2); - vecStridesBkwd = -vecStridesFwd; - - int blockCnt; - - - k = (S->Sint).fftLen - 1; - - /* Pack first and last sample of the frequency domain together */ - - xBR = pB[0]; - xBI = pB[1]; - xAR = pA[0]; - xAI = pA[1]; - - twR = *pCoeff++ ; - twI = *pCoeff++ ; - - // U1 = XA(1) + XB(1); % It is real - t1a = xBR + xAR ; - - // U2 = XB(1) - XA(1); % It is imaginary - t1b = xBI + xAI ; - - // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI); - // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI); - *pOut++ = 0.5f * ( t1a + t1b ); - *pOut++ = 0.5f * ( t1a - t1b ); - - // XA(1) = 1/2*( U1 - imag(U2) + i*( U1 +imag(U2) )); - pB = p + 2*k; - pA += 2; - - blockCnt = k >> 2; - while (blockCnt > 0) - { - /* - function X = my_split_rfft(X, ifftFlag) - % X is a series of real numbers - L = length(X); - XC = X(1:2:end) +i*X(2:2:end); - XA = fft(XC); - XB = conj(XA([1 end:-1:2])); - TW = i*exp(-2*pi*i*[0:L/2-1]/L).'; - for l = 2:L/2 - XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l))); - end - XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1)))); - X = XA; - */ - - - xA = vld2q_f32(pA); - pA += 8; - - xB = vld2q_f32(pB); - - xB.val[0] = vldrwq_gather_shifted_offset_f32(pB, vecStridesBkwd); - xB.val[1] = vldrwq_gather_shifted_offset_f32(&pB[1], vecStridesBkwd); - - xB.val[1] = vnegq_f32(xB.val[1]); - pB -= 8; - - - tw = vld2q_f32(pCoeff); - pCoeff += 8; - - - tmp1.val[0] = vaddq_f32(xA.val[0],xB.val[0]); - tmp1.val[1] = vaddq_f32(xA.val[1],xB.val[1]); - - tmp2.val[0] = vsubq_f32(xB.val[0],xA.val[0]); - tmp2.val[1] = vsubq_f32(xB.val[1],xA.val[1]); - - res.val[0] = vmulq(tw.val[0], tmp2.val[0]); - res.val[0] = vfmsq(res.val[0],tw.val[1], tmp2.val[1]); - - res.val[1] = vmulq(tw.val[0], tmp2.val[1]); - res.val[1] = vfmaq(res.val[1], tw.val[1], tmp2.val[0]); - - res.val[0] = vaddq_f32(res.val[0],tmp1.val[0] ); - res.val[1] = vaddq_f32(res.val[1],tmp1.val[1] ); - - res.val[0] = vmulq_n_f32(res.val[0], 0.5f); - res.val[1] = vmulq_n_f32(res.val[1], 0.5f); - - - vst2q_f32(pOut, res); - pOut += 8; - - - blockCnt--; - } - - blockCnt = k & 3; - while (blockCnt > 0) - { - /* - function X = my_split_rfft(X, ifftFlag) - % X is a series of real numbers - L = length(X); - XC = X(1:2:end) +i*X(2:2:end); - XA = fft(XC); - XB = conj(XA([1 end:-1:2])); - TW = i*exp(-2*pi*i*[0:L/2-1]/L).'; - for l = 2:L/2 - XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l))); - end - XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1)))); - X = XA; - */ - - xBI = pB[1]; - xBR = pB[0]; - xAR = pA[0]; - xAI = pA[1]; - - twR = *pCoeff++; - twI = *pCoeff++; - - t1a = xBR - xAR ; - t1b = xBI + xAI ; - - // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI); - // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI); - p0 = twR * t1a; - p1 = twI * t1a; - p2 = twR * t1b; - p3 = twI * t1b; - - *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR - *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI - - pA += 2; - pB -= 2; - blockCnt--; - } -} - -/* Prepares data for inverse cfft */ -void merge_rfft_f32( - const arm_rfft_fast_instance_f32 * S, - float32_t * p, - float32_t * pOut) -{ - int32_t k; /* Loop Counter */ - float32_t twR, twI; /* RFFT Twiddle coefficients */ - const float32_t *pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */ - float32_t *pA = p; /* increasing pointer */ - float32_t *pB = p; /* decreasing pointer */ - float32_t xAR, xAI, xBR, xBI; /* temporary variables */ - float32_t t1a, t1b, r, s, t, u; /* temporary variables */ - - float32x4x2_t tw,xA,xB; - float32x4x2_t tmp1, tmp2, res; - uint32x4_t vecStridesFwd, vecStridesBkwd; - - vecStridesFwd = vidupq_u32((uint32_t)0, 2); - vecStridesBkwd = -vecStridesFwd; - - int blockCnt; - - - k = (S->Sint).fftLen - 1; - - xAR = pA[0]; - xAI = pA[1]; - - pCoeff += 2 ; - - *pOut++ = 0.5f * ( xAR + xAI ); - *pOut++ = 0.5f * ( xAR - xAI ); - - pB = p + 2*k ; - pA += 2 ; - - blockCnt = k >> 2; - while (blockCnt > 0) - { - /* G is half of the frequency complex spectrum */ - //for k = 2:N - // Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2)))); - xA = vld2q_f32(pA); - pA += 8; - - xB = vld2q_f32(pB); - - xB.val[0] = vldrwq_gather_shifted_offset_f32(pB, vecStridesBkwd); - xB.val[1] = vldrwq_gather_shifted_offset_f32(&pB[1], vecStridesBkwd); - - xB.val[1] = vnegq_f32(xB.val[1]); - pB -= 8; - - - tw = vld2q_f32(pCoeff); - tw.val[1] = vnegq_f32(tw.val[1]); - pCoeff += 8; - - - tmp1.val[0] = vaddq_f32(xA.val[0],xB.val[0]); - tmp1.val[1] = vaddq_f32(xA.val[1],xB.val[1]); - - tmp2.val[0] = vsubq_f32(xB.val[0],xA.val[0]); - tmp2.val[1] = vsubq_f32(xB.val[1],xA.val[1]); - - res.val[0] = vmulq(tw.val[0], tmp2.val[0]); - res.val[0] = vfmsq(res.val[0],tw.val[1], tmp2.val[1]); - - res.val[1] = vmulq(tw.val[0], tmp2.val[1]); - res.val[1] = vfmaq(res.val[1], tw.val[1], tmp2.val[0]); - - res.val[0] = vaddq_f32(res.val[0],tmp1.val[0] ); - res.val[1] = vaddq_f32(res.val[1],tmp1.val[1] ); - - res.val[0] = vmulq_n_f32(res.val[0], 0.5f); - res.val[1] = vmulq_n_f32(res.val[1], 0.5f); - - - vst2q_f32(pOut, res); - pOut += 8; - - - blockCnt--; - } - - blockCnt = k & 3; - while (blockCnt > 0) - { - /* G is half of the frequency complex spectrum */ - //for k = 2:N - // Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2)))); - xBI = pB[1] ; - xBR = pB[0] ; - xAR = pA[0]; - xAI = pA[1]; - - twR = *pCoeff++; - twI = *pCoeff++; - - t1a = xAR - xBR ; - t1b = xAI + xBI ; - - r = twR * t1a; - s = twI * t1b; - t = twI * t1a; - u = twR * t1b; - - // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI); - // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI); - *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR - *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI - - pA += 2; - pB -= 2; - blockCnt--; - } - -} -#else -void stage_rfft_f32( - const arm_rfft_fast_instance_f32 * S, - float32_t * p, - float32_t * pOut) -{ - int32_t k; /* Loop Counter */ - float32_t twR, twI; /* RFFT Twiddle coefficients */ - const float32_t * pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */ - float32_t *pA = p; /* increasing pointer */ - float32_t *pB = p; /* decreasing pointer */ - float32_t xAR, xAI, xBR, xBI; /* temporary variables */ - float32_t t1a, t1b; /* temporary variables */ - float32_t p0, p1, p2, p3; /* temporary variables */ - - - k = (S->Sint).fftLen - 1; - - /* Pack first and last sample of the frequency domain together */ - - xBR = pB[0]; - xBI = pB[1]; - xAR = pA[0]; - xAI = pA[1]; - - twR = *pCoeff++ ; - twI = *pCoeff++ ; - - - // U1 = XA(1) + XB(1); % It is real - t1a = xBR + xAR ; - - // U2 = XB(1) - XA(1); % It is imaginary - t1b = xBI + xAI ; - - // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI); - // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI); - *pOut++ = 0.5f * ( t1a + t1b ); - *pOut++ = 0.5f * ( t1a - t1b ); - - // XA(1) = 1/2*( U1 - imag(U2) + i*( U1 +imag(U2) )); - pB = p + 2*k; - pA += 2; - - do - { - /* - function X = my_split_rfft(X, ifftFlag) - % X is a series of real numbers - L = length(X); - XC = X(1:2:end) +i*X(2:2:end); - XA = fft(XC); - XB = conj(XA([1 end:-1:2])); - TW = i*exp(-2*pi*i*[0:L/2-1]/L).'; - for l = 2:L/2 - XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l))); - end - XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1)))); - X = XA; - */ - - xBI = pB[1]; - xBR = pB[0]; - xAR = pA[0]; - xAI = pA[1]; - - twR = *pCoeff++; - twI = *pCoeff++; - - t1a = xBR - xAR ; - t1b = xBI + xAI ; - - // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI); - // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI); - p0 = twR * t1a; - p1 = twI * t1a; - p2 = twR * t1b; - p3 = twI * t1b; - - *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR - *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI - - - pA += 2; - pB -= 2; - k--; - } while (k > 0); -} - -/* Prepares data for inverse cfft */ -void merge_rfft_f32( - const arm_rfft_fast_instance_f32 * S, - float32_t * p, - float32_t * pOut) -{ - int32_t k; /* Loop Counter */ - float32_t twR, twI; /* RFFT Twiddle coefficients */ - const float32_t *pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */ - float32_t *pA = p; /* increasing pointer */ - float32_t *pB = p; /* decreasing pointer */ - float32_t xAR, xAI, xBR, xBI; /* temporary variables */ - float32_t t1a, t1b, r, s, t, u; /* temporary variables */ - - k = (S->Sint).fftLen - 1; - - xAR = pA[0]; - xAI = pA[1]; - - pCoeff += 2 ; - - *pOut++ = 0.5f * ( xAR + xAI ); - *pOut++ = 0.5f * ( xAR - xAI ); - - pB = p + 2*k ; - pA += 2 ; - - while (k > 0) - { - /* G is half of the frequency complex spectrum */ - //for k = 2:N - // Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2)))); - xBI = pB[1] ; - xBR = pB[0] ; - xAR = pA[0]; - xAI = pA[1]; - - twR = *pCoeff++; - twI = *pCoeff++; - - t1a = xAR - xBR ; - t1b = xAI + xBI ; - - r = twR * t1a; - s = twI * t1b; - t = twI * t1a; - u = twR * t1b; - - // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI); - // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI); - *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR - *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI - - pA += 2; - pB -= 2; - k--; - } - -} - -#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ - -/** - @ingroup groupTransforms -*/ - -/** - @defgroup RealFFT Real FFT Functions - - @par - The CMSIS DSP library includes specialized algorithms for computing the - FFT of real data sequences. The FFT is defined over complex data but - in many applications the input is real. Real FFT algorithms take advantage - of the symmetry properties of the FFT and have a speed advantage over complex - algorithms of the same length. - @par - The Fast RFFT algorithm relays on the mixed radix CFFT that save processor usage. - @par - The real length N forward FFT of a sequence is computed using the steps shown below. - @par - \image html RFFT.gif "Real Fast Fourier Transform" - @par - The real sequence is initially treated as if it were complex to perform a CFFT. - Later, a processing stage reshapes the data to obtain half of the frequency spectrum - in complex format. Except the first complex number that contains the two real numbers - X[0] and X[N/2] all the data is complex. In other words, the first complex sample - contains two real values packed. - @par - The input for the inverse RFFT should keep the same format as the output of the - forward RFFT. A first processing stage pre-process the data to later perform an - inverse CFFT. - @par - \image html RIFFT.gif "Real Inverse Fast Fourier Transform" - @par - The algorithms for floating-point, Q15, and Q31 data are slightly different - and we describe each algorithm in turn. - @par Floating-point - The main functions are \ref arm_rfft_fast_f32() and \ref arm_rfft_fast_init_f32(). - The older functions \ref arm_rfft_f32() and \ref arm_rfft_init_f32() have been deprecated - but are still documented. - @par - The FFT of a real N-point sequence has even symmetry in the frequency domain. - The second half of the data equals the conjugate of the first half flipped in frequency. - Looking at the data, we see that we can uniquely represent the FFT using only N/2 complex numbers. - These are packed into the output array in alternating real and imaginary components: - @par - X = { real[0], imag[0], real[1], imag[1], real[2], imag[2] ... - real[(N/2)-1], imag[(N/2)-1 } - @par - It happens that the first complex number (real[0], imag[0]) is actually - all real. real[0] represents the DC offset, and imag[0] should be 0. - (real[1], imag[1]) is the fundamental frequency, (real[2], imag[2]) is - the first harmonic and so on. - @par - The real FFT functions pack the frequency domain data in this fashion. - The forward transform outputs the data in this form and the inverse - transform expects input data in this form. The function always performs - the needed bitreversal so that the input and output data is always in - normal order. The functions support lengths of [32, 64, 128, ..., 4096] - samples. - @par Q15 and Q31 - The real algorithms are defined in a similar manner and utilize N/2 complex - transforms behind the scenes. - @par - The complex transforms used internally include scaling to prevent fixed-point - overflows. The overall scaling equals 1/(fftLen/2). - Due to the use of complex transform internally, the source buffer is - modified by the rfft. - @par - A separate instance structure must be defined for each transform used but - twiddle factor and bit reversal tables can be reused. - @par - There is also an associated initialization function for each data type. - The initialization function performs the following operations: - - Sets the values of the internal structure fields. - - Initializes twiddle factor table and bit reversal table pointers. - - Initializes the internal complex FFT data structure. - @par - Use of the initialization function is optional **except for MVE versions where it is mandatory**. - If you don't use the initialization functions, then the structures should be initialized with code - similar to the one below: - <pre> - arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft}; - arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft}; - </pre> - where <code>fftLenReal</code> is the length of the real transform; - <code>fftLenBy2</code> length of the internal complex transform (fftLenReal/2). - <code>ifftFlagR</code> Selects forward (=0) or inverse (=1) transform. - <code>bitReverseFlagR</code> Selects bit reversed output (=0) or normal order - output (=1). - <code>twidCoefRModifier</code> stride modifier for the twiddle factor table. - The value is based on the FFT length; - <code>pTwiddleAReal</code>points to the A array of twiddle coefficients; - <code>pTwiddleBReal</code>points to the B array of twiddle coefficients; - <code>pCfft</code> points to the CFFT Instance structure. The CFFT structure - must also be initialized. -@par - Note that with MVE versions you can't initialize instance structures directly and **must - use the initialization function**. - */ - -/** - @addtogroup RealFFT - @{ -*/ - -/** - @brief Processing function for the floating-point real FFT. - @param[in] S points to an arm_rfft_fast_instance_f32 structure - @param[in] p points to input buffer (Source buffer is modified by this function.) - @param[in] pOut points to output buffer - @param[in] ifftFlag - - value = 0: RFFT - - value = 1: RIFFT - @return none -*/ - -void arm_rfft_fast_f32( - const arm_rfft_fast_instance_f32 * S, - float32_t * p, - float32_t * pOut, - uint8_t ifftFlag) -{ - const arm_cfft_instance_f32 * Sint = &(S->Sint); - - /* Calculation of Real FFT */ - if (ifftFlag) - { - /* Real FFT compression */ - merge_rfft_f32(S, p, pOut); - /* Complex radix-4 IFFT process */ - arm_cfft_f32( Sint, pOut, ifftFlag, 1); - } - else - { - /* Calculation of RFFT of input */ - arm_cfft_f32( Sint, p, ifftFlag, 1); - - /* Real FFT extraction */ - stage_rfft_f32(S, p, pOut); - } -} - -/** -* @} end of RealFFT group -*/ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_rfft_f32.c
+ * Description: RFFT & RIFFT Floating point process function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+void stage_rfft_f32(
+ const arm_rfft_fast_instance_f32 * S,
+ float32_t * p,
+ float32_t * pOut)
+{
+ uint32_t k; /* Loop Counter */
+ float32_t twR, twI; /* RFFT Twiddle coefficients */
+ const float32_t * pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */
+ float32_t *pA = p; /* increasing pointer */
+ float32_t *pB = p; /* decreasing pointer */
+ float32_t xAR, xAI, xBR, xBI; /* temporary variables */
+ float32_t t1a, t1b; /* temporary variables */
+ float32_t p0, p1, p2, p3; /* temporary variables */
+
+
+ k = (S->Sint).fftLen - 1;
+
+ /* Pack first and last sample of the frequency domain together */
+
+ xBR = pB[0];
+ xBI = pB[1];
+ xAR = pA[0];
+ xAI = pA[1];
+
+ twR = *pCoeff++ ;
+ twI = *pCoeff++ ;
+
+ // U1 = XA(1) + XB(1); % It is real
+ t1a = xBR + xAR ;
+
+ // U2 = XB(1) - XA(1); % It is imaginary
+ t1b = xBI + xAI ;
+
+ // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
+ // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
+ *pOut++ = 0.5f * ( t1a + t1b );
+ *pOut++ = 0.5f * ( t1a - t1b );
+
+ // XA(1) = 1/2*( U1 - imag(U2) + i*( U1 +imag(U2) ));
+ pB = p + 2*k;
+ pA += 2;
+
+ do
+ {
+ /*
+ function X = my_split_rfft(X, ifftFlag)
+ % X is a series of real numbers
+ L = length(X);
+ XC = X(1:2:end) +i*X(2:2:end);
+ XA = fft(XC);
+ XB = conj(XA([1 end:-1:2]));
+ TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
+ for l = 2:L/2
+ XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
+ end
+ XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
+ X = XA;
+ */
+
+ xBI = pB[1];
+ xBR = pB[0];
+ xAR = pA[0];
+ xAI = pA[1];
+
+ twR = *pCoeff++;
+ twI = *pCoeff++;
+
+ t1a = xBR - xAR ;
+ t1b = xBI + xAI ;
+
+ // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
+ // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
+ p0 = twR * t1a;
+ p1 = twI * t1a;
+ p2 = twR * t1b;
+ p3 = twI * t1b;
+
+ *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
+ *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
+
+ pA += 2;
+ pB -= 2;
+ k--;
+ } while (k > 0U);
+}
+
+/* Prepares data for inverse cfft */
+void merge_rfft_f32(
+ const arm_rfft_fast_instance_f32 * S,
+ float32_t * p,
+ float32_t * pOut)
+{
+ uint32_t k; /* Loop Counter */
+ float32_t twR, twI; /* RFFT Twiddle coefficients */
+ const float32_t *pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */
+ float32_t *pA = p; /* increasing pointer */
+ float32_t *pB = p; /* decreasing pointer */
+ float32_t xAR, xAI, xBR, xBI; /* temporary variables */
+ float32_t t1a, t1b, r, s, t, u; /* temporary variables */
+
+ k = (S->Sint).fftLen - 1;
+
+ xAR = pA[0];
+ xAI = pA[1];
+
+ pCoeff += 2 ;
+
+ *pOut++ = 0.5f * ( xAR + xAI );
+ *pOut++ = 0.5f * ( xAR - xAI );
+
+ pB = p + 2*k ;
+ pA += 2 ;
+
+ while (k > 0U)
+ {
+ /* G is half of the frequency complex spectrum */
+ //for k = 2:N
+ // Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
+ xBI = pB[1] ;
+ xBR = pB[0] ;
+ xAR = pA[0];
+ xAI = pA[1];
+
+ twR = *pCoeff++;
+ twI = *pCoeff++;
+
+ t1a = xAR - xBR ;
+ t1b = xAI + xBI ;
+
+ r = twR * t1a;
+ s = twI * t1b;
+ t = twI * t1a;
+ u = twR * t1b;
+
+ // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
+ // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
+ *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
+ *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
+
+ pA += 2;
+ pB -= 2;
+ k--;
+ }
+
+}
+
+/**
+ @ingroup groupTransforms
+*/
+
+/**
+ @defgroup RealFFT Real FFT Functions
+
+ @par
+ The CMSIS DSP library includes specialized algorithms for computing the
+ FFT of real data sequences. The FFT is defined over complex data but
+ in many applications the input is real. Real FFT algorithms take advantage
+ of the symmetry properties of the FFT and have a speed advantage over complex
+ algorithms of the same length.
+ @par
+ The Fast RFFT algorith relays on the mixed radix CFFT that save processor usage.
+ @par
+ The real length N forward FFT of a sequence is computed using the steps shown below.
+ @par
+ \image html RFFT.gif "Real Fast Fourier Transform"
+ @par
+ The real sequence is initially treated as if it were complex to perform a CFFT.
+ Later, a processing stage reshapes the data to obtain half of the frequency spectrum
+ in complex format. Except the first complex number that contains the two real numbers
+ X[0] and X[N/2] all the data is complex. In other words, the first complex sample
+ contains two real values packed.
+ @par
+ The input for the inverse RFFT should keep the same format as the output of the
+ forward RFFT. A first processing stage pre-process the data to later perform an
+ inverse CFFT.
+ @par
+ \image html RIFFT.gif "Real Inverse Fast Fourier Transform"
+ @par
+ The algorithms for floating-point, Q15, and Q31 data are slightly different
+ and we describe each algorithm in turn.
+ @par Floating-point
+ The main functions are \ref arm_rfft_fast_f32() and \ref arm_rfft_fast_init_f32().
+ The older functions \ref arm_rfft_f32() and \ref arm_rfft_init_f32() have been deprecated
+ but are still documented.
+ @par
+ The FFT of a real N-point sequence has even symmetry in the frequency domain.
+ The second half of the data equals the conjugate of the first half flipped in frequency.
+ Looking at the data, we see that we can uniquely represent the FFT using only N/2 complex numbers.
+ These are packed into the output array in alternating real and imaginary components:
+ @par
+ X = { real[0], imag[0], real[1], imag[1], real[2], imag[2] ...
+ real[(N/2)-1], imag[(N/2)-1 }
+ @par
+ It happens that the first complex number (real[0], imag[0]) is actually
+ all real. real[0] represents the DC offset, and imag[0] should be 0.
+ (real[1], imag[1]) is the fundamental frequency, (real[2], imag[2]) is
+ the first harmonic and so on.
+ @par
+ The real FFT functions pack the frequency domain data in this fashion.
+ The forward transform outputs the data in this form and the inverse
+ transform expects input data in this form. The function always performs
+ the needed bitreversal so that the input and output data is always in
+ normal order. The functions support lengths of [32, 64, 128, ..., 4096]
+ samples.
+ @par Q15 and Q31
+ The real algorithms are defined in a similar manner and utilize N/2 complex
+ transforms behind the scenes.
+ @par
+ The complex transforms used internally include scaling to prevent fixed-point
+ overflows. The overall scaling equals 1/(fftLen/2).
+ @par
+ A separate instance structure must be defined for each transform used but
+ twiddle factor and bit reversal tables can be reused.
+ @par
+ There is also an associated initialization function for each data type.
+ The initialization function performs the following operations:
+ - Sets the values of the internal structure fields.
+ - Initializes twiddle factor table and bit reversal table pointers.
+ - Initializes the internal complex FFT data structure.
+ @par
+ Use of the initialization function is optional.
+ However, if the initialization function is used, then the instance structure
+ cannot be placed into a const data section. To place an instance structure
+ into a const data section, the instance structure should be manually
+ initialized as follows:
+ <pre>
+ arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
+ arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
+ </pre>
+ where <code>fftLenReal</code> is the length of the real transform;
+ <code>fftLenBy2</code> length of the internal complex transform.
+ <code>ifftFlagR</code> Selects forward (=0) or inverse (=1) transform.
+ <code>bitReverseFlagR</code> Selects bit reversed output (=0) or normal order
+ output (=1).
+ <code>twidCoefRModifier</code> stride modifier for the twiddle factor table.
+ The value is based on the FFT length;
+ <code>pTwiddleAReal</code>points to the A array of twiddle coefficients;
+ <code>pTwiddleBReal</code>points to the B array of twiddle coefficients;
+ <code>pCfft</code> points to the CFFT Instance structure. The CFFT structure
+ must also be initialized. Refer to arm_cfft_radix4_f32() for details regarding
+ static initialization of the complex FFT instance structure.
+ */
+
+/**
+ @addtogroup RealFFT
+ @{
+*/
+
+/**
+ @brief Processing function for the floating-point real FFT.
+ @param[in] S points to an arm_rfft_fast_instance_f32 structure
+ @param[in] p points to input buffer
+ @param[in] pOut points to output buffer
+ @param[in] ifftFlag
+ - value = 0: RFFT
+ - value = 1: RIFFT
+ @return none
+*/
+
+void arm_rfft_fast_f32(
+ arm_rfft_fast_instance_f32 * S,
+ float32_t * p,
+ float32_t * pOut,
+ uint8_t ifftFlag)
+{
+ arm_cfft_instance_f32 * Sint = &(S->Sint);
+ Sint->fftLen = S->fftLenRFFT / 2;
+
+ /* Calculation of Real FFT */
+ if (ifftFlag)
+ {
+ /* Real FFT compression */
+ merge_rfft_f32(S, p, pOut);
+
+ /* Complex radix-4 IFFT process */
+ arm_cfft_f32( Sint, pOut, ifftFlag, 1);
+ }
+ else
+ {
+ /* Calculation of RFFT of input */
+ arm_cfft_f32( Sint, p, ifftFlag, 1);
+
+ /* Real FFT extraction */
+ stage_rfft_f32(S, p, pOut);
+ }
+}
+
+/**
+* @} end of RealFFT group
+*/
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f32.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f32.c index e8273b4..ca510cd 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f32.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_fast_init_f32.c @@ -1,352 +1,344 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_rfft_fast_init_f32.c - * Description: Split Radix Decimation in Frequency CFFT Floating point processing function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" -#include "arm_common_tables.h" - -/** - @ingroup groupTransforms - */ - -/** - @addtogroup RealFFT - @{ - */ - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_BITREVIDX_FLT_16) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_32)) - -/** - @private - @brief Initialization function for the 32pt floating-point real FFT. - @param[in,out] S points to an arm_rfft_fast_instance_f32 structure - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected - */ - -static arm_status arm_rfft_32_fast_init_f32( arm_rfft_fast_instance_f32 * S ) { - - arm_status status; - - if( !S ) return ARM_MATH_ARGUMENT_ERROR; - - status=arm_cfft_init_f32(&(S->Sint),16); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - - S->fftLenRFFT = 32U; - S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_32; - - return ARM_MATH_SUCCESS; -} -#endif - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_BITREVIDX_FLT_32) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_64)) - -/** - @private - @brief Initialization function for the 64pt floating-point real FFT. - @param[in,out] S points to an arm_rfft_fast_instance_f32 structure - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected - */ - -static arm_status arm_rfft_64_fast_init_f32( arm_rfft_fast_instance_f32 * S ) { - - arm_status status; - - if( !S ) return ARM_MATH_ARGUMENT_ERROR; - - status=arm_cfft_init_f32(&(S->Sint),32); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - S->fftLenRFFT = 64U; - - S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_64; - - return ARM_MATH_SUCCESS; -} -#endif - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_BITREVIDX_FLT_64) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_128)) - -/** - @private - @brief Initialization function for the 128pt floating-point real FFT. - @param[in,out] S points to an arm_rfft_fast_instance_f32 structure - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected - */ - -static arm_status arm_rfft_128_fast_init_f32( arm_rfft_fast_instance_f32 * S ) { - - arm_status status; - - if( !S ) return ARM_MATH_ARGUMENT_ERROR; - - status=arm_cfft_init_f32(&(S->Sint),64); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - S->fftLenRFFT = 128; - - S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_128; - - return ARM_MATH_SUCCESS; -} -#endif - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_BITREVIDX_FLT_128) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_256)) - -/** - @private - @brief Initialization function for the 256pt floating-point real FFT. - @param[in,out] S points to an arm_rfft_fast_instance_f32 structure - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected -*/ - -static arm_status arm_rfft_256_fast_init_f32( arm_rfft_fast_instance_f32 * S ) { - - arm_status status; - - if( !S ) return ARM_MATH_ARGUMENT_ERROR; - - status=arm_cfft_init_f32(&(S->Sint),128); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - S->fftLenRFFT = 256U; - - S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_256; - - return ARM_MATH_SUCCESS; -} -#endif - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_BITREVIDX_FLT_256) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_512)) - -/** - @private - @brief Initialization function for the 512pt floating-point real FFT. - @param[in,out] S points to an arm_rfft_fast_instance_f32 structure - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected - */ - -static arm_status arm_rfft_512_fast_init_f32( arm_rfft_fast_instance_f32 * S ) { - - arm_status status; - - if( !S ) return ARM_MATH_ARGUMENT_ERROR; - - status=arm_cfft_init_f32(&(S->Sint),256); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - S->fftLenRFFT = 512U; - - S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_512; - - return ARM_MATH_SUCCESS; -} -#endif - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_BITREVIDX_FLT_512) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_1024)) -/** - @private - @brief Initialization function for the 1024pt floating-point real FFT. - @param[in,out] S points to an arm_rfft_fast_instance_f32 structure - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected - */ - -static arm_status arm_rfft_1024_fast_init_f32( arm_rfft_fast_instance_f32 * S ) { - - arm_status status; - - if( !S ) return ARM_MATH_ARGUMENT_ERROR; - - status=arm_cfft_init_f32(&(S->Sint),512); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - S->fftLenRFFT = 1024U; - - S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_1024; - - return ARM_MATH_SUCCESS; -} -#endif - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_2048)) -/** - @private - @brief Initialization function for the 2048pt floating-point real FFT. - @param[in,out] S points to an arm_rfft_fast_instance_f32 structure - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected - */ -static arm_status arm_rfft_2048_fast_init_f32( arm_rfft_fast_instance_f32 * S ) { - - arm_status status; - - if( !S ) return ARM_MATH_ARGUMENT_ERROR; - - status=arm_cfft_init_f32(&(S->Sint),1024); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - S->fftLenRFFT = 2048U; - - S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_2048; - - return ARM_MATH_SUCCESS; -} -#endif - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_4096)) -/** - @private -* @brief Initialization function for the 4096pt floating-point real FFT. -* @param[in,out] S points to an arm_rfft_fast_instance_f32 structure - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected - */ - -static arm_status arm_rfft_4096_fast_init_f32( arm_rfft_fast_instance_f32 * S ) { - - arm_status status; - - if( !S ) return ARM_MATH_ARGUMENT_ERROR; - - status=arm_cfft_init_f32(&(S->Sint),2048); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - S->fftLenRFFT = 4096U; - - S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_4096; - - return ARM_MATH_SUCCESS; -} -#endif - -/** - @brief Initialization function for the floating-point real FFT. - @param[in,out] S points to an arm_rfft_fast_instance_f32 structure - @param[in] fftLen length of the Real Sequence - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length - - @par Description - The parameter <code>fftLen</code> specifies the length of RFFT/CIFFT process. - Supported FFT Lengths are 32, 64, 128, 256, 512, 1024, 2048, 4096. - @par - This Function also initializes Twiddle factor table pointer and Bit reversal table pointer. - */ - -arm_status arm_rfft_fast_init_f32( - arm_rfft_fast_instance_f32 * S, - uint16_t fftLen) -{ - typedef arm_status(*fft_init_ptr)( arm_rfft_fast_instance_f32 *); - fft_init_ptr fptr = 0x0; - - switch (fftLen) - { -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_4096)) - case 4096U: - fptr = arm_rfft_4096_fast_init_f32; - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_2048)) - case 2048U: - fptr = arm_rfft_2048_fast_init_f32; - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_BITREVIDX_FLT_512) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_1024)) - case 1024U: - fptr = arm_rfft_1024_fast_init_f32; - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_BITREVIDX_FLT_256) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_512)) - case 512U: - fptr = arm_rfft_512_fast_init_f32; - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_BITREVIDX_FLT_128) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_256)) - case 256U: - fptr = arm_rfft_256_fast_init_f32; - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_BITREVIDX_FLT_64) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_128)) - case 128U: - fptr = arm_rfft_128_fast_init_f32; - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_BITREVIDX_FLT_32) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_64)) - case 64U: - fptr = arm_rfft_64_fast_init_f32; - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_BITREVIDX_FLT_16) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_32)) - case 32U: - fptr = arm_rfft_32_fast_init_f32; - break; -#endif - default: - break; - } - - if( ! fptr ) return ARM_MATH_ARGUMENT_ERROR; - return fptr( S ); - -} - -/** - @} end of RealFFT group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cfft_init_f32.c
+ * Description: Split Radix Decimation in Frequency CFFT Floating point processing function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**
+ @ingroup groupTransforms
+ */
+
+/**
+ @addtogroup RealFFT
+ @{
+ */
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_BITREVIDX_FLT_16) && defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_32))
+
+/**
+ @brief Initialization function for the 32pt floating-point real FFT.
+ @param[in,out] S points to an arm_rfft_fast_instance_f32 structure
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+arm_status arm_rfft_32_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
+
+ arm_cfft_instance_f32 * Sint;
+
+ if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+ Sint = &(S->Sint);
+ Sint->fftLen = 16U;
+ S->fftLenRFFT = 32U;
+
+ Sint->bitRevLength = ARMBITREVINDEXTABLE_16_TABLE_LENGTH;
+ Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable16;
+ Sint->pTwiddle = (float32_t *) twiddleCoef_16;
+ S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_32;
+
+ return ARM_MATH_SUCCESS;
+}
+#endif
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_BITREVIDX_FLT_32) && defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_64))
+
+/**
+ @brief Initialization function for the 64pt floating-point real FFT.
+ @param[in,out] S points to an arm_rfft_fast_instance_f32 structure
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+arm_status arm_rfft_64_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
+
+ arm_cfft_instance_f32 * Sint;
+
+ if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+ Sint = &(S->Sint);
+ Sint->fftLen = 32U;
+ S->fftLenRFFT = 64U;
+
+ Sint->bitRevLength = ARMBITREVINDEXTABLE_32_TABLE_LENGTH;
+ Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable32;
+ Sint->pTwiddle = (float32_t *) twiddleCoef_32;
+ S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_64;
+
+ return ARM_MATH_SUCCESS;
+}
+#endif
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_BITREVIDX_FLT_64) && defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_128))
+
+/**
+ @brief Initialization function for the 128pt floating-point real FFT.
+ @param[in,out] S points to an arm_rfft_fast_instance_f32 structure
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+arm_status arm_rfft_128_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
+
+ arm_cfft_instance_f32 * Sint;
+
+ if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+ Sint = &(S->Sint);
+ Sint->fftLen = 64U;
+ S->fftLenRFFT = 128U;
+
+ Sint->bitRevLength = ARMBITREVINDEXTABLE_64_TABLE_LENGTH;
+ Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable64;
+ Sint->pTwiddle = (float32_t *) twiddleCoef_64;
+ S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_128;
+
+ return ARM_MATH_SUCCESS;
+}
+#endif
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_BITREVIDX_FLT_128) && defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_256))
+
+/**
+ @brief Initialization function for the 256pt floating-point real FFT.
+ @param[in,out] S points to an arm_rfft_fast_instance_f32 structure
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+*/
+
+arm_status arm_rfft_256_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
+
+ arm_cfft_instance_f32 * Sint;
+
+ if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+ Sint = &(S->Sint);
+ Sint->fftLen = 128U;
+ S->fftLenRFFT = 256U;
+
+ Sint->bitRevLength = ARMBITREVINDEXTABLE_128_TABLE_LENGTH;
+ Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable128;
+ Sint->pTwiddle = (float32_t *) twiddleCoef_128;
+ S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_256;
+
+ return ARM_MATH_SUCCESS;
+}
+#endif
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_BITREVIDX_FLT_256) && defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_512))
+
+/**
+ @brief Initialization function for the 512pt floating-point real FFT.
+ @param[in,out] S points to an arm_rfft_fast_instance_f32 structure
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+arm_status arm_rfft_512_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
+
+ arm_cfft_instance_f32 * Sint;
+
+ if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+ Sint = &(S->Sint);
+ Sint->fftLen = 256U;
+ S->fftLenRFFT = 512U;
+
+ Sint->bitRevLength = ARMBITREVINDEXTABLE_256_TABLE_LENGTH;
+ Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable256;
+ Sint->pTwiddle = (float32_t *) twiddleCoef_256;
+ S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_512;
+
+ return ARM_MATH_SUCCESS;
+}
+#endif
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_BITREVIDX_FLT_512) && defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_1024))
+/**
+ @brief Initialization function for the 1024pt floating-point real FFT.
+ @param[in,out] S points to an arm_rfft_fast_instance_f32 structure
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+arm_status arm_rfft_1024_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
+
+ arm_cfft_instance_f32 * Sint;
+
+ if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+ Sint = &(S->Sint);
+ Sint->fftLen = 512U;
+ S->fftLenRFFT = 1024U;
+
+ Sint->bitRevLength = ARMBITREVINDEXTABLE_512_TABLE_LENGTH;
+ Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable512;
+ Sint->pTwiddle = (float32_t *) twiddleCoef_512;
+ S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_1024;
+
+ return ARM_MATH_SUCCESS;
+}
+#endif
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024) && defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_2048))
+/**
+ @brief Initialization function for the 2048pt floating-point real FFT.
+ @param[in,out] S points to an arm_rfft_fast_instance_f32 structure
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+arm_status arm_rfft_2048_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
+
+ arm_cfft_instance_f32 * Sint;
+
+ if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+ Sint = &(S->Sint);
+ Sint->fftLen = 1024U;
+ S->fftLenRFFT = 2048U;
+
+ Sint->bitRevLength = ARMBITREVINDEXTABLE_1024_TABLE_LENGTH;
+ Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable1024;
+ Sint->pTwiddle = (float32_t *) twiddleCoef_1024;
+ S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_2048;
+
+ return ARM_MATH_SUCCESS;
+}
+#endif
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048) && defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_4096))
+/**
+* @brief Initialization function for the 4096pt floating-point real FFT.
+* @param[in,out] S points to an arm_rfft_fast_instance_f32 structure
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+arm_status arm_rfft_4096_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
+
+ arm_cfft_instance_f32 * Sint;
+
+ if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+ Sint = &(S->Sint);
+ Sint->fftLen = 2048U;
+ S->fftLenRFFT = 4096U;
+
+ Sint->bitRevLength = ARMBITREVINDEXTABLE_2048_TABLE_LENGTH;
+ Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable2048;
+ Sint->pTwiddle = (float32_t *) twiddleCoef_2048;
+ S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_4096;
+
+ return ARM_MATH_SUCCESS;
+}
+#endif
+
+/**
+ @brief Initialization function for the floating-point real FFT.
+ @param[in,out] S points to an arm_rfft_fast_instance_f32 structure
+ @param[in] fftLen length of the Real Sequence
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
+
+ @par Description
+ The parameter <code>fftLen</code> specifies the length of RFFT/CIFFT process.
+ Supported FFT Lengths are 32, 64, 128, 256, 512, 1024, 2048, 4096.
+ @par
+ This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
+ */
+
+arm_status arm_rfft_fast_init_f32(
+ arm_rfft_fast_instance_f32 * S,
+ uint16_t fftLen)
+{
+ typedef arm_status(*fft_init_ptr)( arm_rfft_fast_instance_f32 *);
+ fft_init_ptr fptr = 0x0;
+
+ switch (fftLen)
+ {
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048) && defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_4096))
+ case 4096U:
+ fptr = arm_rfft_4096_fast_init_f32;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024) && defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_2048))
+ case 2048U:
+ fptr = arm_rfft_2048_fast_init_f32;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_BITREVIDX_FLT_512) && defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_1024))
+ case 1024U:
+ fptr = arm_rfft_1024_fast_init_f32;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_BITREVIDX_FLT_256) && defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_512))
+ case 512U:
+ fptr = arm_rfft_512_fast_init_f32;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_BITREVIDX_FLT_128) && defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_256))
+ case 256U:
+ fptr = arm_rfft_256_fast_init_f32;
+ break;
+#endif
+#if (defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_BITREVIDX_FLT_64) && defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_128))
+ case 128U:
+ fptr = arm_rfft_128_fast_init_f32;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_BITREVIDX_FLT_32) && defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_64))
+ case 64U:
+ fptr = arm_rfft_64_fast_init_f32;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_BITREVIDX_FLT_16) && defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_32))
+ case 32U:
+ fptr = arm_rfft_32_fast_init_f32;
+ break;
+#endif
+ default:
+ return ARM_MATH_ARGUMENT_ERROR;
+ }
+
+ if( ! fptr ) return ARM_MATH_ARGUMENT_ERROR;
+ return fptr( S );
+
+}
+
+/**
+ @} end of RealFFT group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_f32.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_f32.c index 0a32da6..3d57a21 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_f32.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_f32.c @@ -1,147 +1,139 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_rfft_init_f32.c - * Description: RFFT & RIFFT Floating point initialisation function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" -#include "arm_common_tables.h" - - -/** - @addtogroup RealFFT - @{ - */ - -/** - @brief Initialization function for the floating-point RFFT/RIFFT. - @deprecated Do not use this function. It has been superceded by \ref arm_rfft_fast_init_f32 and will be removed in the future. - @param[in,out] S points to an instance of the floating-point RFFT/RIFFT structure - @param[in,out] S_CFFT points to an instance of the floating-point CFFT/CIFFT structure - @param[in] fftLenReal length of the FFT. - @param[in] ifftFlagR flag that selects transform direction - - value = 0: forward transform - - value = 1: inverse transform - @param[in] bitReverseFlag flag that enables / disables bit reversal of output - - value = 0: disables bit reversal of output - - value = 1: enables bit reversal of output - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLenReal</code> is not a supported length - - @par Description - The parameter <code>fftLenReal</code>specifies length of RFFT/RIFFT Process. - Supported FFT Lengths are 128, 512, 2048. - @par - The parameter <code>ifftFlagR</code> controls whether a forward or inverse transform is computed. - Set(=1) ifftFlagR to calculate RIFFT, otherwise RFFT is calculated. - @par - The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order. - Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order. - @par - This function also initializes Twiddle factor table. - */ - -arm_status arm_rfft_init_f32( - arm_rfft_instance_f32 * S, - arm_cfft_radix4_instance_f32 * S_CFFT, - uint32_t fftLenReal, - uint32_t ifftFlagR, - uint32_t bitReverseFlag) -{ - /* Initialise the default arm status */ - arm_status status = ARM_MATH_ARGUMENT_ERROR; - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_REALCOEF_F32) - - /* Initialise the default arm status */ - status = ARM_MATH_SUCCESS; - - /* Initialize the Real FFT length */ - S->fftLenReal = (uint16_t) fftLenReal; - - /* Initialize the Complex FFT length */ - S->fftLenBy2 = (uint16_t) fftLenReal / 2U; - - /* Initialize the Twiddle coefficientA pointer */ - S->pTwiddleAReal = (float32_t *) realCoefA; - - /* Initialize the Twiddle coefficientB pointer */ - S->pTwiddleBReal = (float32_t *) realCoefB; - - /* Initialize the Flag for selection of RFFT or RIFFT */ - S->ifftFlagR = (uint8_t) ifftFlagR; - - /* Initialize the Flag for calculation Bit reversal or not */ - S->bitReverseFlagR = (uint8_t) bitReverseFlag; - - /* Initializations of structure parameters depending on the FFT length */ - switch (S->fftLenReal) - { - /* Init table modifier value */ - case 8192U: - S->twidCoefRModifier = 1U; - break; - case 2048U: - S->twidCoefRModifier = 4U; - break; - case 512U: - S->twidCoefRModifier = 16U; - break; - case 128U: - S->twidCoefRModifier = 64U; - break; - default: - /* Reporting argument error if rfftSize is not valid value */ - status = ARM_MATH_ARGUMENT_ERROR; - break; - } - - /* Init Complex FFT Instance */ - S->pCfft = S_CFFT; - - if (S->ifftFlagR) - { - /* Initializes the CIFFT Module for fftLenreal/2 length */ - arm_cfft_radix4_init_f32(S->pCfft, S->fftLenBy2, 1U, 0U); - } - else - { - /* Initializes the CFFT Module for fftLenreal/2 length */ - arm_cfft_radix4_init_f32(S->pCfft, S->fftLenBy2, 0U, 0U); - } - -#endif -#endif - /* return the status of RFFT Init function */ - return (status); - -} - -/** - @} end of RealFFT group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_rfft_init_f32.c
+ * Description: RFFT & RIFFT Floating point initialisation function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+
+/**
+ @addtogroup RealFFT
+ @{
+ */
+
+/**
+ @brief Initialization function for the floating-point RFFT/RIFFT.
+ @deprecated Do not use this function. It has been superceded by \ref arm_rfft_fast_init_f32 and will be removed in the future.
+ @param[in,out] S points to an instance of the floating-point RFFT/RIFFT structure
+ @param[in,out] S_CFFT points to an instance of the floating-point CFFT/CIFFT structure
+ @param[in] fftLenReal length of the FFT.
+ @param[in] ifftFlagR flag that selects transform direction
+ - value = 0: forward transform
+ - value = 1: inverse transform
+ @param[in] bitReverseFlag flag that enables / disables bit reversal of output
+ - value = 0: disables bit reversal of output
+ - value = 1: enables bit reversal of output
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLenReal</code> is not a supported length
+
+ @par Description
+ The parameter <code>fftLenReal</code>specifies length of RFFT/RIFFT Process.
+ Supported FFT Lengths are 128, 512, 2048.
+ @par
+ The parameter <code>ifftFlagR</code> controls whether a forward or inverse transform is computed.
+ Set(=1) ifftFlagR to calculate RIFFT, otherwise RFFT is calculated.
+ @par
+ The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
+ Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
+ @par
+ This function also initializes Twiddle factor table.
+ */
+
+arm_status arm_rfft_init_f32(
+ arm_rfft_instance_f32 * S,
+ arm_cfft_radix4_instance_f32 * S_CFFT,
+ uint32_t fftLenReal,
+ uint32_t ifftFlagR,
+ uint32_t bitReverseFlag)
+{
+
+ /* Initialise the default arm status */
+ arm_status status = ARM_MATH_SUCCESS;
+
+ /* Initialize the Real FFT length */
+ S->fftLenReal = (uint16_t) fftLenReal;
+
+ /* Initialize the Complex FFT length */
+ S->fftLenBy2 = (uint16_t) fftLenReal / 2U;
+
+ /* Initialize the Twiddle coefficientA pointer */
+ S->pTwiddleAReal = (float32_t *) realCoefA;
+
+ /* Initialize the Twiddle coefficientB pointer */
+ S->pTwiddleBReal = (float32_t *) realCoefB;
+
+ /* Initialize the Flag for selection of RFFT or RIFFT */
+ S->ifftFlagR = (uint8_t) ifftFlagR;
+
+ /* Initialize the Flag for calculation Bit reversal or not */
+ S->bitReverseFlagR = (uint8_t) bitReverseFlag;
+
+ /* Initializations of structure parameters depending on the FFT length */
+ switch (S->fftLenReal)
+ {
+ /* Init table modifier value */
+ case 8192U:
+ S->twidCoefRModifier = 1U;
+ break;
+ case 2048U:
+ S->twidCoefRModifier = 4U;
+ break;
+ case 512U:
+ S->twidCoefRModifier = 16U;
+ break;
+ case 128U:
+ S->twidCoefRModifier = 64U;
+ break;
+ default:
+ /* Reporting argument error if rfftSize is not valid value */
+ status = ARM_MATH_ARGUMENT_ERROR;
+ break;
+ }
+
+ /* Init Complex FFT Instance */
+ S->pCfft = S_CFFT;
+
+ if (S->ifftFlagR)
+ {
+ /* Initializes the CIFFT Module for fftLenreal/2 length */
+ arm_cfft_radix4_init_f32(S->pCfft, S->fftLenBy2, 1U, 0U);
+ }
+ else
+ {
+ /* Initializes the CFFT Module for fftLenreal/2 length */
+ arm_cfft_radix4_init_f32(S->pCfft, S->fftLenBy2, 0U, 0U);
+ }
+
+ /* return the status of RFFT Init function */
+ return (status);
+
+}
+
+/**
+ @} end of RealFFT group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_q15.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_q15.c index e70f8af..fb4c66c 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_q15.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_q15.c @@ -1,248 +1,158 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_rfft_init_q15.c - * Description: RFFT & RIFFT Q15 initialisation function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" -#include "arm_common_tables.h" -#include "arm_const_structs.h" - -/** - @addtogroup RealFFT - @{ - */ - -/** - @brief Initialization function for the Q15 RFFT/RIFFT. - @param[in,out] S points to an instance of the Q15 RFFT/RIFFT structure - @param[in] fftLenReal length of the FFT - @param[in] ifftFlagR flag that selects transform direction - - value = 0: forward transform - - value = 1: inverse transform - @param[in] bitReverseFlag flag that enables / disables bit reversal of output - - value = 0: disables bit reversal of output - - value = 1: enables bit reversal of output - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLenReal</code> is not a supported length - - @par Details - The parameter <code>fftLenReal</code> specifies length of RFFT/RIFFT Process. - Supported FFT Lengths are 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192. - @par - The parameter <code>ifftFlagR</code> controls whether a forward or inverse transform is computed. - Set(=1) ifftFlagR to calculate RIFFT, otherwise RFFT is calculated. - @par - The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order. - Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order. - @par - This function also initializes Twiddle factor table. - */ - -arm_status arm_rfft_init_q15( - arm_rfft_instance_q15 * S, - uint32_t fftLenReal, - uint32_t ifftFlagR, - uint32_t bitReverseFlag) -{ - /* Initialise the default arm status */ - arm_status status = ARM_MATH_ARGUMENT_ERROR; - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_REALCOEF_Q15) - - /* Initialise the default arm status */ - status = ARM_MATH_SUCCESS; - - /* Initialize the Real FFT length */ - S->fftLenReal = (uint16_t) fftLenReal; - - /* Initialize the Twiddle coefficientA pointer */ - S->pTwiddleAReal = (q15_t *) realCoefAQ15; - - /* Initialize the Twiddle coefficientB pointer */ - S->pTwiddleBReal = (q15_t *) realCoefBQ15; - - /* Initialize the Flag for selection of RFFT or RIFFT */ - S->ifftFlagR = (uint8_t) ifftFlagR; - - /* Initialize the Flag for calculation Bit reversal or not */ - S->bitReverseFlagR = (uint8_t) bitReverseFlag; - - /* Initialization of coef modifier depending on the FFT length */ - switch (S->fftLenReal) - { -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096)) - case 8192U: - S->twidCoefRModifier = 1U; - - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q15(&(S->cfftInst),4096); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q15_len4096; - #endif - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048)) - case 4096U: - S->twidCoefRModifier = 2U; - - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q15(&(S->cfftInst),2048); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q15_len2048; - #endif - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024)) - case 2048U: - S->twidCoefRModifier = 4U; - - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q15(&(S->cfftInst),1024); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q15_len1024; - #endif - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_512) && defined(ARM_TABLE_BITREVIDX_FXT_512)) - case 1024U: - S->twidCoefRModifier = 8U; - - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q15(&(S->cfftInst),512); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q15_len512; - #endif - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_256) && defined(ARM_TABLE_BITREVIDX_FXT_256)) - case 512U: - S->twidCoefRModifier = 16U; - - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q15(&(S->cfftInst),256); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q15_len256; - #endif - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_128) && defined(ARM_TABLE_BITREVIDX_FXT_128)) - case 256U: - S->twidCoefRModifier = 32U; - - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q15(&(S->cfftInst),128); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q15_len128; - #endif - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_64) && defined(ARM_TABLE_BITREVIDX_FXT_64)) - case 128U: - S->twidCoefRModifier = 64U; - - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q15(&(S->cfftInst),64); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q15_len64; - #endif - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_32) && defined(ARM_TABLE_BITREVIDX_FXT_32)) - case 64U: - S->twidCoefRModifier = 128U; - - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q15(&(S->cfftInst),32); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q15_len32; - #endif - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_16) && defined(ARM_TABLE_BITREVIDX_FXT_16)) - case 32U: - S->twidCoefRModifier = 256U; - - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q15(&(S->cfftInst),16); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q15_len16; - #endif - break; -#endif - default: - /* Reporting argument error if rfftSize is not valid value */ - status = ARM_MATH_ARGUMENT_ERROR; - break; - } - -#endif -#endif - /* return the status of RFFT Init function */ - return (status); -} - -/** - @} end of RealFFT group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_rfft_init_q15.c
+ * Description: RFFT & RIFFT Q15 initialisation function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+#include "arm_const_structs.h"
+
+/**
+ @addtogroup RealFFT
+ @{
+ */
+
+/**
+ @brief Initialization function for the Q15 RFFT/RIFFT.
+ @param[in,out] S points to an instance of the Q15 RFFT/RIFFT structure
+ @param[in] fftLenReal length of the FFT
+ @param[in] ifftFlagR flag that selects transform direction
+ - value = 0: forward transform
+ - value = 1: inverse transform
+ @param[in] bitReverseFlag flag that enables / disables bit reversal of output
+ - value = 0: disables bit reversal of output
+ - value = 1: enables bit reversal of output
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLenReal</code> is not a supported length
+
+ @par Details
+ The parameter <code>fftLenReal</code> specifies length of RFFT/RIFFT Process.
+ Supported FFT Lengths are 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192.
+ @par
+ The parameter <code>ifftFlagR</code> controls whether a forward or inverse transform is computed.
+ Set(=1) ifftFlagR to calculate RIFFT, otherwise RFFT is calculated.
+ @par
+ The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
+ Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
+ @par
+ This function also initializes Twiddle factor table.
+ */
+
+arm_status arm_rfft_init_q15(
+ arm_rfft_instance_q15 * S,
+ uint32_t fftLenReal,
+ uint32_t ifftFlagR,
+ uint32_t bitReverseFlag)
+{
+ /* Initialise the default arm status */
+ arm_status status = ARM_MATH_SUCCESS;
+
+ /* Initialize the Real FFT length */
+ S->fftLenReal = (uint16_t) fftLenReal;
+
+ /* Initialize the Twiddle coefficientA pointer */
+ S->pTwiddleAReal = (q15_t *) realCoefAQ15;
+
+ /* Initialize the Twiddle coefficientB pointer */
+ S->pTwiddleBReal = (q15_t *) realCoefBQ15;
+
+ /* Initialize the Flag for selection of RFFT or RIFFT */
+ S->ifftFlagR = (uint8_t) ifftFlagR;
+
+ /* Initialize the Flag for calculation Bit reversal or not */
+ S->bitReverseFlagR = (uint8_t) bitReverseFlag;
+
+ /* Initialization of coef modifier depending on the FFT length */
+ switch (S->fftLenReal)
+ {
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096))
+ case 8192U:
+ S->twidCoefRModifier = 1U;
+ S->pCfft = &arm_cfft_sR_q15_len4096;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048))
+ case 4096U:
+ S->twidCoefRModifier = 2U;
+ S->pCfft = &arm_cfft_sR_q15_len2048;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024))
+ case 2048U:
+ S->twidCoefRModifier = 4U;
+ S->pCfft = &arm_cfft_sR_q15_len1024;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_512) && defined(ARM_TABLE_BITREVIDX_FXT_512))
+ case 1024U:
+ S->twidCoefRModifier = 8U;
+ S->pCfft = &arm_cfft_sR_q15_len512;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_256) && defined(ARM_TABLE_BITREVIDX_FXT_256))
+ case 512U:
+ S->twidCoefRModifier = 16U;
+ S->pCfft = &arm_cfft_sR_q15_len256;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_128) && defined(ARM_TABLE_BITREVIDX_FXT_128))
+ case 256U:
+ S->twidCoefRModifier = 32U;
+ S->pCfft = &arm_cfft_sR_q15_len128;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_64) && defined(ARM_TABLE_BITREVIDX_FXT_64))
+ case 128U:
+ S->twidCoefRModifier = 64U;
+ S->pCfft = &arm_cfft_sR_q15_len64;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_32) && defined(ARM_TABLE_BITREVIDX_FXT_32))
+ case 64U:
+ S->twidCoefRModifier = 128U;
+ S->pCfft = &arm_cfft_sR_q15_len32;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_16) && defined(ARM_TABLE_BITREVIDX_FXT_16))
+ case 32U:
+ S->twidCoefRModifier = 256U;
+ S->pCfft = &arm_cfft_sR_q15_len16;
+ break;
+#endif
+ default:
+ /* Reporting argument error if rfftSize is not valid value */
+ status = ARM_MATH_ARGUMENT_ERROR;
+ break;
+ }
+
+ /* return the status of RFFT Init function */
+ return (status);
+}
+
+/**
+ @} end of RealFFT group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_q31.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_q31.c index 0a28719..efae1ea 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_q31.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_init_q31.c @@ -1,246 +1,160 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_rfft_init_q31.c - * Description: RFFT & RIFFT Q31 initialisation function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" -#include "arm_common_tables.h" -#include "arm_const_structs.h" - - - -/** - @addtogroup RealFFT - @{ - */ - -/** - @brief Initialization function for the Q31 RFFT/RIFFT. - @param[in,out] S points to an instance of the Q31 RFFT/RIFFT structure - @param[in] fftLenReal length of the FFT - @param[in] ifftFlagR flag that selects transform direction - - value = 0: forward transform - - value = 1: inverse transform - @param[in] bitReverseFlag flag that enables / disables bit reversal of output - - value = 0: disables bit reversal of output - - value = 1: enables bit reversal of output - @return execution status - - \ref ARM_MATH_SUCCESS : Operation successful - - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLenReal</code> is not a supported length - - @par Details - The parameter <code>fftLenReal</code> specifies length of RFFT/RIFFT Process. - Supported FFT Lengths are 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192. - @par - The parameter <code>ifftFlagR</code> controls whether a forward or inverse transform is computed. - Set(=1) ifftFlagR to calculate RIFFT, otherwise RFFT is calculated. - @par - The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order. - Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order. - @par - This function also initializes Twiddle factor table. -*/ - -arm_status arm_rfft_init_q31( - arm_rfft_instance_q31 * S, - uint32_t fftLenReal, - uint32_t ifftFlagR, - uint32_t bitReverseFlag) -{ - /* Initialise the default arm status */ - arm_status status = ARM_MATH_ARGUMENT_ERROR; - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES) - -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_REALCOEF_Q31) - - /* Initialise the default arm status */ - status = ARM_MATH_SUCCESS; - - /* Initialize the Real FFT length */ - S->fftLenReal = (uint16_t) fftLenReal; - - /* Initialize the Twiddle coefficientA pointer */ - S->pTwiddleAReal = (q31_t *) realCoefAQ31; - - /* Initialize the Twiddle coefficientB pointer */ - S->pTwiddleBReal = (q31_t *) realCoefBQ31; - - /* Initialize the Flag for selection of RFFT or RIFFT */ - S->ifftFlagR = (uint8_t) ifftFlagR; - - /* Initialize the Flag for calculation Bit reversal or not */ - S->bitReverseFlagR = (uint8_t) bitReverseFlag; - - /* Initialization of coef modifier depending on the FFT length */ - switch (S->fftLenReal) - { -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096)) - case 8192U: - - - S->twidCoefRModifier = 1U; - - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q31(&(S->cfftInst),4096); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q31_len4096; - #endif - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048)) - case 4096U: - S->twidCoefRModifier = 2U; - - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q31(&(S->cfftInst),2048); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q31_len2048; - #endif - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024)) - case 2048U: - S->twidCoefRModifier = 4U; - - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q31(&(S->cfftInst),1024); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q31_len1024; - #endif - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_512) && defined(ARM_TABLE_BITREVIDX_FXT_512)) - case 1024U: - S->twidCoefRModifier = 8U; - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q31(&(S->cfftInst),512); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q31_len512; - #endif - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_256) && defined(ARM_TABLE_BITREVIDX_FXT_256)) - case 512U: - S->twidCoefRModifier = 16U; - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q31(&(S->cfftInst),256); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q31_len256; - #endif - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_128) && defined(ARM_TABLE_BITREVIDX_FXT_128)) - case 256U: - S->twidCoefRModifier = 32U; - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q31(&(S->cfftInst),128); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q31_len128; - #endif - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_64) && defined(ARM_TABLE_BITREVIDX_FXT_64)) - case 128U: - S->twidCoefRModifier = 64U; - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q31(&(S->cfftInst),64); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q31_len64; - #endif - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_32) && defined(ARM_TABLE_BITREVIDX_FXT_32)) - case 64U: - S->twidCoefRModifier = 128U; - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q31(&(S->cfftInst),32); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q31_len32; - #endif - break; -#endif -#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_16) && defined(ARM_TABLE_BITREVIDX_FXT_16)) - case 32U: - S->twidCoefRModifier = 256U; - #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - status=arm_cfft_init_q31(&(S->cfftInst),16); - if (status != ARM_MATH_SUCCESS) - { - return(status); - } - #else - S->pCfft = &arm_cfft_sR_q31_len16; - #endif - break; -#endif - default: - /* Reporting argument error if rfftSize is not valid value */ - status = ARM_MATH_ARGUMENT_ERROR; - break; - } - -#endif -#endif - /* return the status of RFFT Init function */ - return (status); -} - -/** - @} end of RealFFT group - */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_rfft_init_q31.c
+ * Description: RFFT & RIFFT Q31 initialisation function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+#include "arm_const_structs.h"
+
+
+
+/**
+ @addtogroup RealFFT
+ @{
+ */
+
+/**
+ @brief Initialization function for the Q31 RFFT/RIFFT.
+ @param[in,out] S points to an instance of the Q31 RFFT/RIFFT structure
+ @param[in] fftLenReal length of the FFT
+ @param[in] ifftFlagR flag that selects transform direction
+ - value = 0: forward transform
+ - value = 1: inverse transform
+ @param[in] bitReverseFlag flag that enables / disables bit reversal of output
+ - value = 0: disables bit reversal of output
+ - value = 1: enables bit reversal of output
+ @return execution status
+ - \ref ARM_MATH_SUCCESS : Operation successful
+ - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLenReal</code> is not a supported length
+
+ @par Details
+ The parameter <code>fftLenReal</code> specifies length of RFFT/RIFFT Process.
+ Supported FFT Lengths are 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192.
+ @par
+ The parameter <code>ifftFlagR</code> controls whether a forward or inverse transform is computed.
+ Set(=1) ifftFlagR to calculate RIFFT, otherwise RFFT is calculated.
+ @par
+ The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
+ Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
+ @par
+ This function also initializes Twiddle factor table.
+*/
+
+arm_status arm_rfft_init_q31(
+ arm_rfft_instance_q31 * S,
+ uint32_t fftLenReal,
+ uint32_t ifftFlagR,
+ uint32_t bitReverseFlag)
+{
+ /* Initialise the default arm status */
+ arm_status status = ARM_MATH_SUCCESS;
+
+ /* Initialize the Real FFT length */
+ S->fftLenReal = (uint16_t) fftLenReal;
+
+ /* Initialize the Twiddle coefficientA pointer */
+ S->pTwiddleAReal = (q31_t *) realCoefAQ31;
+
+ /* Initialize the Twiddle coefficientB pointer */
+ S->pTwiddleBReal = (q31_t *) realCoefBQ31;
+
+ /* Initialize the Flag for selection of RFFT or RIFFT */
+ S->ifftFlagR = (uint8_t) ifftFlagR;
+
+ /* Initialize the Flag for calculation Bit reversal or not */
+ S->bitReverseFlagR = (uint8_t) bitReverseFlag;
+
+ /* Initialization of coef modifier depending on the FFT length */
+ switch (S->fftLenReal)
+ {
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096))
+ case 8192U:
+ S->twidCoefRModifier = 1U;
+ S->pCfft = &arm_cfft_sR_q31_len4096;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048))
+ case 4096U:
+ S->twidCoefRModifier = 2U;
+ S->pCfft = &arm_cfft_sR_q31_len2048;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024))
+ case 2048U:
+ S->twidCoefRModifier = 4U;
+ S->pCfft = &arm_cfft_sR_q31_len1024;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_512) && defined(ARM_TABLE_BITREVIDX_FXT_512))
+ case 1024U:
+ S->twidCoefRModifier = 8U;
+ S->pCfft = &arm_cfft_sR_q31_len512;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_256) && defined(ARM_TABLE_BITREVIDX_FXT_256))
+ case 512U:
+ S->twidCoefRModifier = 16U;
+ S->pCfft = &arm_cfft_sR_q31_len256;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_128) && defined(ARM_TABLE_BITREVIDX_FXT_128))
+ case 256U:
+ S->twidCoefRModifier = 32U;
+ S->pCfft = &arm_cfft_sR_q31_len128;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_64) && defined(ARM_TABLE_BITREVIDX_FXT_64))
+ case 128U:
+ S->twidCoefRModifier = 64U;
+ S->pCfft = &arm_cfft_sR_q31_len64;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_32) && defined(ARM_TABLE_BITREVIDX_FXT_32))
+ case 64U:
+ S->twidCoefRModifier = 128U;
+ S->pCfft = &arm_cfft_sR_q31_len32;
+ break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_16) && defined(ARM_TABLE_BITREVIDX_FXT_16))
+ case 32U:
+ S->twidCoefRModifier = 256U;
+ S->pCfft = &arm_cfft_sR_q31_len16;
+ break;
+#endif
+ default:
+ /* Reporting argument error if rfftSize is not valid value */
+ status = ARM_MATH_ARGUMENT_ERROR;
+ break;
+ }
+
+ /* return the status of RFFT Init function */
+ return (status);
+}
+
+/**
+ @} end of RealFFT group
+ */
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_q15.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_q15.c index 7d149c6..29d12b7 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_q15.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_q15.c @@ -1,526 +1,380 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_rfft_q15.c - * Description: RFFT & RIFFT Q15 process function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" - -/* ---------------------------------------------------------------------- - * Internal functions prototypes - * -------------------------------------------------------------------- */ - -void arm_split_rfft_q15( - q15_t * pSrc, - uint32_t fftLen, - const q15_t * pATable, - const q15_t * pBTable, - q15_t * pDst, - uint32_t modifier); - -void arm_split_rifft_q15( - q15_t * pSrc, - uint32_t fftLen, - const q15_t * pATable, - const q15_t * pBTable, - q15_t * pDst, - uint32_t modifier); - -/** - @addtogroup RealFFT - @{ - */ - -/** - @brief Processing function for the Q15 RFFT/RIFFT. - @param[in] S points to an instance of the Q15 RFFT/RIFFT structure - @param[in] pSrc points to input buffer (Source buffer is modified by this function.) - @param[out] pDst points to output buffer - @return none - - @par Input an output formats - Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. - Hence the output format is different for different RFFT sizes. - The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT: - @par - \image html RFFTQ15.gif "Input and Output Formats for Q15 RFFT" - @par - \image html RIFFTQ15.gif "Input and Output Formats for Q15 RIFFT" - @par - If the input buffer is of length N, the output buffer must have length 2*N. - The input buffer is modified by this function. - @par - For the RIFFT, the source buffer must at least have length - fftLenReal + 2. - The last two elements must be equal to what would be generated - by the RFFT: - (pSrc[0] - pSrc[1]) >> 1 and 0 - */ - -void arm_rfft_q15( - const arm_rfft_instance_q15 * S, - q15_t * pSrc, - q15_t * pDst) -{ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - const arm_cfft_instance_q15 *S_CFFT = &(S->cfftInst); -#else - const arm_cfft_instance_q15 *S_CFFT = S->pCfft; -#endif - uint32_t L2 = S->fftLenReal >> 1U; - - /* Calculation of RIFFT of input */ - if (S->ifftFlagR == 1U) - { - /* Real IFFT core process */ - arm_split_rifft_q15 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier); - - /* Complex IFFT process */ - arm_cfft_q15 (S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR); - - arm_shift_q15(pDst, 1, pDst, S->fftLenReal); - } - else - { - /* Calculation of RFFT of input */ - - /* Complex FFT process */ - arm_cfft_q15 (S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR); - - /* Real FFT core process */ - arm_split_rfft_q15 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier); - } - -} - -/** - @} end of RealFFT group - */ - -/** - @brief Core Real FFT process - @param[in] pSrc points to input buffer - @param[in] fftLen length of FFT - @param[in] pATable points to twiddle Coef A buffer - @param[in] pBTable points to twiddle Coef B buffer - @param[out] pDst points to output buffer - @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table - @return none - - @par - The function implements a Real FFT - */ - -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - -#include "arm_helium_utils.h" -#include "arm_vec_fft.h" - -#if defined(__CMSIS_GCC_H) -#define MVE_CMPLX_MULT_FX_AxB_S16(A,B) vqdmladhxq_s16(vqdmlsdhq_s16((__typeof(A))vuninitializedq_s16(), A, B), A, B) -#define MVE_CMPLX_MULT_FX_AxConjB_S16(A,B) vqdmladhq_s16(vqdmlsdhxq_s16((__typeof(A))vuninitializedq_s16(), A, B), A, B) - -#endif - -void arm_split_rfft_q15( - q15_t * pSrc, - uint32_t fftLen, - const q15_t * pATable, - const q15_t * pBTable, - q15_t * pDst, - uint32_t modifier) -{ - uint32_t i; /* Loop Counter */ - const q15_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */ - q15_t *pOut1 = &pDst[2]; - q15_t *pIn1 = &pSrc[2]; - uint16x8_t offsetIn = { 6, 7, 4, 5, 2, 3, 0, 1 }; - uint16x8_t offsetCoef; - const uint16_t offsetCoefArr[16] = { - 0, 0, 2, 2, 4, 4, 6, 6, - 0, 1, 0, 1, 0, 1, 0, 1 - }; - - offsetCoef = vmulq_n_u16(vld1q_u16(offsetCoefArr), modifier) + vld1q_u16(offsetCoefArr + 8); - offsetIn = vaddq_n_u16(offsetIn, (2 * fftLen - 8)); - - /* Init coefficient pointers */ - pCoefA = &pATable[modifier * 2]; - pCoefB = &pBTable[modifier * 2]; - - const q15_t *pCoefAb, *pCoefBb; - pCoefAb = pCoefA; - pCoefBb = pCoefB; - - pIn1 = &pSrc[2]; - - i = fftLen - 1U; - i = i / 4 + 1; - while (i > 0U) { - q15x8_t in1 = vld1q_s16(pIn1); - q15x8_t in2 = vldrhq_gather_shifted_offset_s16(pSrc, offsetIn); - q15x8_t coefA = vldrhq_gather_shifted_offset_s16(pCoefAb, offsetCoef); - q15x8_t coefB = vldrhq_gather_shifted_offset_s16(pCoefBb, offsetCoef); - -#if defined(__CMSIS_GCC_H) - q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxB_S16(in1, coefA), - MVE_CMPLX_MULT_FX_AxConjB_S16(coefB, in2)); -#else - q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxB(in1, coefA, q15x8_t), - MVE_CMPLX_MULT_FX_AxConjB(coefB, in2, q15x8_t)); -#endif - vst1q_s16(pOut1, out); - pOut1 += 8; - - offsetCoef = vaddq_n_u16(offsetCoef, modifier * 8); - offsetIn -= 8; - pIn1 += 8; - i -= 1; - } - - pDst[2 * fftLen] = (pSrc[0] - pSrc[1]) >> 1U; - pDst[2 * fftLen + 1] = 0; - - pDst[0] = (pSrc[0] + pSrc[1]) >> 1U; - pDst[1] = 0; -} -#else -void arm_split_rfft_q15( - q15_t * pSrc, - uint32_t fftLen, - const q15_t * pATable, - const q15_t * pBTable, - q15_t * pDst, - uint32_t modifier) -{ - uint32_t i; /* Loop Counter */ - q31_t outR, outI; /* Temporary variables for output */ - const q15_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */ - q15_t *pSrc1, *pSrc2; -#if defined (ARM_MATH_DSP) - q15_t *pD1, *pD2; -#endif - - /* Init coefficient pointers */ - pCoefA = &pATable[modifier * 2]; - pCoefB = &pBTable[modifier * 2]; - - pSrc1 = &pSrc[2]; - pSrc2 = &pSrc[(2U * fftLen) - 2U]; - -#if defined (ARM_MATH_DSP) - - i = 1U; - pD1 = pDst + 2; - pD2 = pDst + (4U * fftLen) - 2; - - for (i = fftLen - 1; i > 0; i--) - { - /* - outR = ( pSrc[2 * i] * pATable[2 * i] - - pSrc[2 * i + 1] * pATable[2 * i + 1] - + pSrc[2 * n - 2 * i] * pBTable[2 * i] - + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]); - - outI = ( pIn[2 * i + 1] * pATable[2 * i] - + pIn[2 * i] * pATable[2 * i + 1] - + pIn[2 * n - 2 * i] * pBTable[2 * i + 1] - - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]) - */ - - -#ifndef ARM_MATH_BIG_ENDIAN - /* pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1] */ - outR = __SMUSD(read_q15x2 (pSrc1), read_q15x2((q15_t *) pCoefA)); -#else - /* -(pSrc[2 * i + 1] * pATable[2 * i + 1] - pSrc[2 * i] * pATable[2 * i]) */ - outR = -(__SMUSD(read_q15x2 (pSrc1), read_q15x2((q15_t *) pCoefA))); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* pSrc[2 * n - 2 * i] * pBTable[2 * i] + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */ - outR = __SMLAD(read_q15x2 (pSrc2), read_q15x2((q15_t *) pCoefB), outR) >> 16U; - - /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] - pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */ -#ifndef ARM_MATH_BIG_ENDIAN - outI = __SMUSDX(read_q15x2_da (&pSrc2), read_q15x2((q15_t *) pCoefB)); -#else - outI = __SMUSDX(read_q15x2 ((q15_t *) pCoefB), read_q15x2_da (&pSrc2)); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] */ - outI = __SMLADX(read_q15x2_ia (&pSrc1), read_q15x2 ((q15_t *) pCoefA), outI); - - /* write output */ - *pD1++ = (q15_t) outR; - *pD1++ = outI >> 16U; - - /* write complex conjugate output */ - pD2[0] = (q15_t) outR; - pD2[1] = -(outI >> 16U); - pD2 -= 2; - - /* update coefficient pointer */ - pCoefB = pCoefB + (2U * modifier); - pCoefA = pCoefA + (2U * modifier); - } - - pDst[2U * fftLen] = (pSrc[0] - pSrc[1]) >> 1U; - pDst[2U * fftLen + 1U] = 0; - - pDst[0] = (pSrc[0] + pSrc[1]) >> 1U; - pDst[1] = 0; - -#else - - i = 1U; - - while (i < fftLen) - { - /* - outR = ( pSrc[2 * i] * pATable[2 * i] - - pSrc[2 * i + 1] * pATable[2 * i + 1] - + pSrc[2 * n - 2 * i] * pBTable[2 * i] - + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]); - */ - - outR = *pSrc1 * *pCoefA; - outR = outR - (*(pSrc1 + 1) * *(pCoefA + 1)); - outR = outR + (*pSrc2 * *pCoefB); - outR = (outR + (*(pSrc2 + 1) * *(pCoefB + 1))) >> 16; - - /* - outI = ( pIn[2 * i + 1] * pATable[2 * i] - + pIn[2 * i] * pATable[2 * i + 1] - + pIn[2 * n - 2 * i] * pBTable[2 * i + 1] - - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); - */ - - outI = *pSrc2 * *(pCoefB + 1); - outI = outI - (*(pSrc2 + 1) * *pCoefB); - outI = outI + (*(pSrc1 + 1) * *pCoefA); - outI = outI + (*pSrc1 * *(pCoefA + 1)); - - /* update input pointers */ - pSrc1 += 2U; - pSrc2 -= 2U; - - /* write output */ - pDst[2U * i] = (q15_t) outR; - pDst[2U * i + 1U] = outI >> 16U; - - /* write complex conjugate output */ - pDst[(4U * fftLen) - (2U * i)] = (q15_t) outR; - pDst[((4U * fftLen) - (2U * i)) + 1U] = -(outI >> 16U); - - /* update coefficient pointer */ - pCoefB = pCoefB + (2U * modifier); - pCoefA = pCoefA + (2U * modifier); - - i++; - } - - pDst[2U * fftLen] = (pSrc[0] - pSrc[1]) >> 1; - pDst[2U * fftLen + 1U] = 0; - - pDst[0] = (pSrc[0] + pSrc[1]) >> 1; - pDst[1] = 0; - -#endif /* #if defined (ARM_MATH_DSP) */ -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @brief Core Real IFFT process - @param[in] pSrc points to input buffer - @param[in] fftLen length of FFT - @param[in] pATable points to twiddle Coef A buffer - @param[in] pBTable points to twiddle Coef B buffer - @param[out] pDst points to output buffer - @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table - @return none - - @par - The function implements a Real IFFT - */ - -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - -#include "arm_helium_utils.h" -#include "arm_vec_fft.h" - -void arm_split_rifft_q15( - q15_t * pSrc, - uint32_t fftLen, - const q15_t * pATable, - const q15_t * pBTable, - q15_t * pDst, - uint32_t modifier) -{ - uint32_t i; /* Loop Counter */ - const q15_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */ - q15_t *pIn1; - uint16x8_t offset = { 6, 7, 4, 5, 2, 3, 0, 1 }; - uint16x8_t offsetCoef; - int16x8_t conj = { 1, -1, 1, -1, 1, -1, 1, -1 }; /* conjugate */ - const uint16_t offsetCoefArr[16] = { - 0, 0, 2, 2, 4, 4, 6, 6, - 0, 1, 0, 1, 0, 1, 0, 1 - }; - - offsetCoef = vmulq_n_u16(vld1q_u16(offsetCoefArr), modifier) + vld1q_u16(offsetCoefArr + 8); - - offset = vaddq_n_u16(offset, (2 * fftLen - 6)); - - /* Init coefficient pointers */ - pCoefA = &pATable[0]; - pCoefB = &pBTable[0]; - - const q15_t *pCoefAb, *pCoefBb; - pCoefAb = pCoefA; - pCoefBb = pCoefB; - - pIn1 = &pSrc[0]; - - i = fftLen; - i = i / 4; - - while (i > 0U) { - q15x8_t in1 = vld1q_s16(pIn1); - q15x8_t in2 = vldrhq_gather_shifted_offset_s16(pSrc, offset); - q15x8_t coefA = vldrhq_gather_shifted_offset_s16(pCoefAb, offsetCoef); - q15x8_t coefB = vldrhq_gather_shifted_offset_s16(pCoefBb, offsetCoef); - - /* can we avoid the conjugate here ? */ - q15x8_t out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA, q15x8_t), - vmulq(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB, q15x8_t))); - - vst1q_s16(pDst, out); - pDst += 8; - - offsetCoef = vaddq_n_u16(offsetCoef, modifier * 8); - offset -= 8; - - pIn1 += 8; - i -= 1; - } -} -#else -void arm_split_rifft_q15( - q15_t * pSrc, - uint32_t fftLen, - const q15_t * pATable, - const q15_t * pBTable, - q15_t * pDst, - uint32_t modifier) -{ - uint32_t i; /* Loop Counter */ - q31_t outR, outI; /* Temporary variables for output */ - const q15_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */ - q15_t *pSrc1, *pSrc2; - q15_t *pDst1 = &pDst[0]; - - pCoefA = &pATable[0]; - pCoefB = &pBTable[0]; - - pSrc1 = &pSrc[0]; - pSrc2 = &pSrc[2 * fftLen]; - - i = fftLen; - while (i > 0U) - { - /* - outR = ( pIn[2 * i] * pATable[2 * i] - + pIn[2 * i + 1] * pATable[2 * i + 1] - + pIn[2 * n - 2 * i] * pBTable[2 * i] - - pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]); - - outI = ( pIn[2 * i + 1] * pATable[2 * i] - - pIn[2 * i] * pATable[2 * i + 1] - - pIn[2 * n - 2 * i] * pBTable[2 * i + 1] - - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); - */ - -#if defined (ARM_MATH_DSP) - -#ifndef ARM_MATH_BIG_ENDIAN - /* pIn[2 * n - 2 * i] * pBTable[2 * i] - pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */ - outR = __SMUSD(read_q15x2(pSrc2), read_q15x2((q15_t *) pCoefB)); -#else - /* -(-pIn[2 * n - 2 * i] * pBTable[2 * i] + pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1])) */ - outR = -(__SMUSD(read_q15x2(pSrc2), read_q15x2((q15_t *) pCoefB))); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] + pIn[2 * n - 2 * i] * pBTable[2 * i] */ - outR = __SMLAD(read_q15x2(pSrc1), read_q15x2 ((q15_t *) pCoefA), outR) >> 16U; - - /* -pIn[2 * n - 2 * i] * pBTable[2 * i + 1] + pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */ - outI = __SMUADX(read_q15x2_da (&pSrc2), read_q15x2((q15_t *) pCoefB)); - - /* pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] */ -#ifndef ARM_MATH_BIG_ENDIAN - outI = __SMLSDX(read_q15x2 ((q15_t *) pCoefA), read_q15x2_ia (&pSrc1), -outI); -#else - outI = __SMLSDX(read_q15x2_ia (&pSrc1), read_q15x2 ((q15_t *) pCoefA), -outI); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - /* write output */ -#ifndef ARM_MATH_BIG_ENDIAN - write_q15x2_ia (&pDst1, __PKHBT(outR, (outI >> 16U), 16)); -#else - write_q15x2_ia (&pDst1, __PKHBT((outI >> 16U), outR, 16)); -#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ - - -#else /* #if defined (ARM_MATH_DSP) */ - - outR = *pSrc2 * *pCoefB; - outR = outR - (*(pSrc2 + 1) * *(pCoefB + 1)); - outR = outR + (*pSrc1 * *pCoefA); - outR = (outR + (*(pSrc1 + 1) * *(pCoefA + 1))) >> 16; - - outI = *(pSrc1 + 1) * *pCoefA; - outI = outI - (*pSrc1 * *(pCoefA + 1)); - outI = outI - (*pSrc2 * *(pCoefB + 1)); - outI = outI - (*(pSrc2 + 1) * *(pCoefB)); - - /* update input pointers */ - pSrc1 += 2U; - pSrc2 -= 2U; - - /* write output */ - *pDst1++ = (q15_t) outR; - *pDst1++ = (q15_t) (outI >> 16); - -#endif /* #if defined (ARM_MATH_DSP) */ - - /* update coefficient pointer */ - pCoefB = pCoefB + (2 * modifier); - pCoefA = pCoefA + (2 * modifier); - - i--; - } - -} -#endif /* defined(ARM_MATH_MVEI) */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_rfft_q15.c
+ * Description: RFFT & RIFFT Q15 process function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/* ----------------------------------------------------------------------
+ * Internal functions prototypes
+ * -------------------------------------------------------------------- */
+
+void arm_split_rfft_q15(
+ q15_t * pSrc,
+ uint32_t fftLen,
+ const q15_t * pATable,
+ const q15_t * pBTable,
+ q15_t * pDst,
+ uint32_t modifier);
+
+void arm_split_rifft_q15(
+ q15_t * pSrc,
+ uint32_t fftLen,
+ const q15_t * pATable,
+ const q15_t * pBTable,
+ q15_t * pDst,
+ uint32_t modifier);
+
+/**
+ @addtogroup RealFFT
+ @{
+ */
+
+/**
+ @brief Processing function for the Q15 RFFT/RIFFT.
+ @param[in] S points to an instance of the Q15 RFFT/RIFFT structure
+ @param[in] pSrc points to input buffer
+ @param[out] pDst points to output buffer
+ @return none
+
+ @par Input an output formats
+ Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
+ Hence the output format is different for different RFFT sizes.
+ The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
+ @par
+ \image html RFFTQ15.gif "Input and Output Formats for Q15 RFFT"
+ @par
+ \image html RIFFTQ15.gif "Input and Output Formats for Q15 RIFFT"
+ */
+
+void arm_rfft_q15(
+ const arm_rfft_instance_q15 * S,
+ q15_t * pSrc,
+ q15_t * pDst)
+{
+ const arm_cfft_instance_q15 *S_CFFT = S->pCfft;
+ uint32_t L2 = S->fftLenReal >> 1U;
+ uint32_t i;
+
+ /* Calculation of RIFFT of input */
+ if (S->ifftFlagR == 1U)
+ {
+ /* Real IFFT core process */
+ arm_split_rifft_q15 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+
+ /* Complex IFFT process */
+ arm_cfft_q15 (S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);
+
+ for(i = 0; i < S->fftLenReal; i++)
+ {
+ pDst[i] = pDst[i] << 1U;
+ }
+ }
+ else
+ {
+ /* Calculation of RFFT of input */
+
+ /* Complex FFT process */
+ arm_cfft_q15 (S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
+
+ /* Real FFT core process */
+ arm_split_rfft_q15 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+ }
+
+}
+
+/**
+ @} end of RealFFT group
+ */
+
+/**
+ @brief Core Real FFT process
+ @param[in] pSrc points to input buffer
+ @param[in] fftLen length of FFT
+ @param[in] pATable points to twiddle Coef A buffer
+ @param[in] pBTable points to twiddle Coef B buffer
+ @param[out] pDst points to output buffer
+ @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
+ @return none
+
+ @par
+ The function implements a Real FFT
+ */
+
+void arm_split_rfft_q15(
+ q15_t * pSrc,
+ uint32_t fftLen,
+ const q15_t * pATable,
+ const q15_t * pBTable,
+ q15_t * pDst,
+ uint32_t modifier)
+{
+ uint32_t i; /* Loop Counter */
+ q31_t outR, outI; /* Temporary variables for output */
+ const q15_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
+ q15_t *pSrc1, *pSrc2;
+#if defined (ARM_MATH_DSP)
+ q15_t *pD1, *pD2;
+#endif
+
+ /* Init coefficient pointers */
+ pCoefA = &pATable[modifier * 2];
+ pCoefB = &pBTable[modifier * 2];
+
+ pSrc1 = &pSrc[2];
+ pSrc2 = &pSrc[(2U * fftLen) - 2U];
+
+#if defined (ARM_MATH_DSP)
+
+ i = 1U;
+ pD1 = pDst + 2;
+ pD2 = pDst + (4U * fftLen) - 2;
+
+ for (i = fftLen - 1; i > 0; i--)
+ {
+ /*
+ outR = ( pSrc[2 * i] * pATable[2 * i]
+ - pSrc[2 * i + 1] * pATable[2 * i + 1]
+ + pSrc[2 * n - 2 * i] * pBTable[2 * i]
+ + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
+
+ outI = ( pIn[2 * i + 1] * pATable[2 * i]
+ + pIn[2 * i] * pATable[2 * i + 1]
+ + pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
+ - pIn[2 * n - 2 * i + 1] * pBTable[2 * i])
+ */
+
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ /* pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1] */
+ outR = __SMUSD(read_q15x2 (pSrc1), read_q15x2((q15_t *) pCoefA));
+#else
+ /* -(pSrc[2 * i + 1] * pATable[2 * i + 1] - pSrc[2 * i] * pATable[2 * i]) */
+ outR = -(__SMUSD(read_q15x2 (pSrc1), read_q15x2((q15_t *) pCoefA)));
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* pSrc[2 * n - 2 * i] * pBTable[2 * i] + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
+ outR = __SMLAD(read_q15x2 (pSrc2), read_q15x2((q15_t *) pCoefB), outR) >> 16U;
+
+ /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] - pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
+#ifndef ARM_MATH_BIG_ENDIAN
+ outI = __SMUSDX(read_q15x2_da (&pSrc2), read_q15x2((q15_t *) pCoefB));
+#else
+ outI = __SMUSDX(read_q15x2 ((q15_t *) pCoefB), read_q15x2_da (&pSrc2));
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] */
+ outI = __SMLADX(read_q15x2_ia (&pSrc1), read_q15x2 ((q15_t *) pCoefA), outI);
+
+ /* write output */
+ *pD1++ = (q15_t) outR;
+ *pD1++ = outI >> 16U;
+
+ /* write complex conjugate output */
+ pD2[0] = (q15_t) outR;
+ pD2[1] = -(outI >> 16U);
+ pD2 -= 2;
+
+ /* update coefficient pointer */
+ pCoefB = pCoefB + (2U * modifier);
+ pCoefA = pCoefA + (2U * modifier);
+ }
+
+ pDst[2U * fftLen] = (pSrc[0] - pSrc[1]) >> 1U;
+ pDst[2U * fftLen + 1U] = 0;
+
+ pDst[0] = (pSrc[0] + pSrc[1]) >> 1U;
+ pDst[1] = 0;
+
+#else
+
+ i = 1U;
+
+ while (i < fftLen)
+ {
+ /*
+ outR = ( pSrc[2 * i] * pATable[2 * i]
+ - pSrc[2 * i + 1] * pATable[2 * i + 1]
+ + pSrc[2 * n - 2 * i] * pBTable[2 * i]
+ + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
+ */
+
+ outR = *pSrc1 * *pCoefA;
+ outR = outR - (*(pSrc1 + 1) * *(pCoefA + 1));
+ outR = outR + (*pSrc2 * *pCoefB);
+ outR = (outR + (*(pSrc2 + 1) * *(pCoefB + 1))) >> 16;
+
+ /*
+ outI = ( pIn[2 * i + 1] * pATable[2 * i]
+ + pIn[2 * i] * pATable[2 * i + 1]
+ + pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
+ - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
+ */
+
+ outI = *pSrc2 * *(pCoefB + 1);
+ outI = outI - (*(pSrc2 + 1) * *pCoefB);
+ outI = outI + (*(pSrc1 + 1) * *pCoefA);
+ outI = outI + (*pSrc1 * *(pCoefA + 1));
+
+ /* update input pointers */
+ pSrc1 += 2U;
+ pSrc2 -= 2U;
+
+ /* write output */
+ pDst[2U * i] = (q15_t) outR;
+ pDst[2U * i + 1U] = outI >> 16U;
+
+ /* write complex conjugate output */
+ pDst[(4U * fftLen) - (2U * i)] = (q15_t) outR;
+ pDst[((4U * fftLen) - (2U * i)) + 1U] = -(outI >> 16U);
+
+ /* update coefficient pointer */
+ pCoefB = pCoefB + (2U * modifier);
+ pCoefA = pCoefA + (2U * modifier);
+
+ i++;
+ }
+
+ pDst[2U * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
+ pDst[2U * fftLen + 1U] = 0;
+
+ pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
+ pDst[1] = 0;
+
+#endif /* #if defined (ARM_MATH_DSP) */
+}
+
+
+/**
+ @brief Core Real IFFT process
+ @param[in] pSrc points to input buffer
+ @param[in] fftLen length of FFT
+ @param[in] pATable points to twiddle Coef A buffer
+ @param[in] pBTable points to twiddle Coef B buffer
+ @param[out] pDst points to output buffer
+ @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
+ @return none
+
+ @par
+ The function implements a Real IFFT
+ */
+
+void arm_split_rifft_q15(
+ q15_t * pSrc,
+ uint32_t fftLen,
+ const q15_t * pATable,
+ const q15_t * pBTable,
+ q15_t * pDst,
+ uint32_t modifier)
+{
+ uint32_t i; /* Loop Counter */
+ q31_t outR, outI; /* Temporary variables for output */
+ const q15_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
+ q15_t *pSrc1, *pSrc2;
+ q15_t *pDst1 = &pDst[0];
+
+ pCoefA = &pATable[0];
+ pCoefB = &pBTable[0];
+
+ pSrc1 = &pSrc[0];
+ pSrc2 = &pSrc[2 * fftLen];
+
+ i = fftLen;
+ while (i > 0U)
+ {
+ /*
+ outR = ( pIn[2 * i] * pATable[2 * i]
+ + pIn[2 * i + 1] * pATable[2 * i + 1]
+ + pIn[2 * n - 2 * i] * pBTable[2 * i]
+ - pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
+
+ outI = ( pIn[2 * i + 1] * pATable[2 * i]
+ - pIn[2 * i] * pATable[2 * i + 1]
+ - pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
+ - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
+ */
+
+#if defined (ARM_MATH_DSP)
+
+#ifndef ARM_MATH_BIG_ENDIAN
+ /* pIn[2 * n - 2 * i] * pBTable[2 * i] - pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
+ outR = __SMUSD(read_q15x2(pSrc2), read_q15x2((q15_t *) pCoefB));
+#else
+ /* -(-pIn[2 * n - 2 * i] * pBTable[2 * i] + pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1])) */
+ outR = -(__SMUSD(read_q15x2(pSrc2), read_q15x2((q15_t *) pCoefB)));
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] + pIn[2 * n - 2 * i] * pBTable[2 * i] */
+ outR = __SMLAD(read_q15x2(pSrc1), read_q15x2 ((q15_t *) pCoefA), outR) >> 16U;
+
+ /* -pIn[2 * n - 2 * i] * pBTable[2 * i + 1] + pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
+ outI = __SMUADX(read_q15x2_da (&pSrc2), read_q15x2((q15_t *) pCoefB));
+
+ /* pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] */
+#ifndef ARM_MATH_BIG_ENDIAN
+ outI = __SMLSDX(read_q15x2 ((q15_t *) pCoefA), read_q15x2_ia (&pSrc1), -outI);
+#else
+ outI = __SMLSDX(read_q15x2_ia (&pSrc1), read_q15x2 ((q15_t *) pCoefA), -outI);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* write output */
+#ifndef ARM_MATH_BIG_ENDIAN
+ write_q15x2_ia (&pDst1, __PKHBT(outR, (outI >> 16U), 16));
+#else
+ write_q15x2_ia (&pDst1, __PKHBT((outI >> 16U), outR, 16));
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+
+#else /* #if defined (ARM_MATH_DSP) */
+
+ outR = *pSrc2 * *pCoefB;
+ outR = outR - (*(pSrc2 + 1) * *(pCoefB + 1));
+ outR = outR + (*pSrc1 * *pCoefA);
+ outR = (outR + (*(pSrc1 + 1) * *(pCoefA + 1))) >> 16;
+
+ outI = *(pSrc1 + 1) * *pCoefA;
+ outI = outI - (*pSrc1 * *(pCoefA + 1));
+ outI = outI - (*pSrc2 * *(pCoefB + 1));
+ outI = outI - (*(pSrc2 + 1) * *(pCoefB));
+
+ /* update input pointers */
+ pSrc1 += 2U;
+ pSrc2 -= 2U;
+
+ /* write output */
+ *pDst1++ = (q15_t) outR;
+ *pDst1++ = (q15_t) (outI >> 16);
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* update coefficient pointer */
+ pCoefB = pCoefB + (2 * modifier);
+ pCoefA = pCoefA + (2 * modifier);
+
+ i--;
+ }
+
+}
diff --git a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_q31.c b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_q31.c index ad3212d..0cc595b 100644 --- a/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_q31.c +++ b/Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_q31.c @@ -1,430 +1,292 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS DSP Library - * Title: arm_rfft_q31.c - * Description: FFT & RIFFT Q31 process function - * - * $Date: 23 April 2021 - * $Revision: V1.9.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dsp/transform_functions.h" - -/* ---------------------------------------------------------------------- - * Internal functions prototypes - * -------------------------------------------------------------------- */ - -void arm_split_rfft_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pATable, - const q31_t * pBTable, - q31_t * pDst, - uint32_t modifier); - -void arm_split_rifft_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pATable, - const q31_t * pBTable, - q31_t * pDst, - uint32_t modifier); - -/** - @addtogroup RealFFT - @{ - */ - -/** - @brief Processing function for the Q31 RFFT/RIFFT. - @param[in] S points to an instance of the Q31 RFFT/RIFFT structure - @param[in] pSrc points to input buffer (Source buffer is modified by this function) - @param[out] pDst points to output buffer - @return none - - @par Input an output formats - Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. - Hence the output format is different for different RFFT sizes. - The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT: - @par - \image html RFFTQ31.gif "Input and Output Formats for Q31 RFFT" - @par - \image html RIFFTQ31.gif "Input and Output Formats for Q31 RIFFT" - @par - If the input buffer is of length N, the output buffer must have length 2*N. - The input buffer is modified by this function. - @par - For the RIFFT, the source buffer must at least have length - fftLenReal + 2. - The last two elements must be equal to what would be generated - by the RFFT: - (pSrc[0] - pSrc[1]) >> 1 and 0 - - */ - -void arm_rfft_q31( - const arm_rfft_instance_q31 * S, - q31_t * pSrc, - q31_t * pDst) -{ -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - const arm_cfft_instance_q31 *S_CFFT = &(S->cfftInst); -#else - const arm_cfft_instance_q31 *S_CFFT = S->pCfft; -#endif - uint32_t L2 = S->fftLenReal >> 1U; - - /* Calculation of RIFFT of input */ - if (S->ifftFlagR == 1U) - { - /* Real IFFT core process */ - arm_split_rifft_q31 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier); - - /* Complex IFFT process */ - arm_cfft_q31 (S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR); - - arm_shift_q31(pDst, 1, pDst, S->fftLenReal); - } - else - { - /* Calculation of RFFT of input */ - - /* Complex FFT process */ - arm_cfft_q31 (S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR); - - /* Real FFT core process */ - arm_split_rfft_q31 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier); - } - -} - -/** - @} end of RealFFT group - */ - -/** - @brief Core Real FFT process - @param[in] pSrc points to input buffer - @param[in] fftLen length of FFT - @param[in] pATable points to twiddle Coef A buffer - @param[in] pBTable points to twiddle Coef B buffer - @param[out] pDst points to output buffer - @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table - @return none - */ - -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - -#include "arm_helium_utils.h" -#include "arm_vec_fft.h" - -#if defined(__CMSIS_GCC_H) - -#define MVE_CMPLX_MULT_FX_AxB_S32(A,B) vqdmladhxq_s32(vqdmlsdhq_s32((__typeof(A))vuninitializedq_s32(), A, B), A, B) -#define MVE_CMPLX_MULT_FX_AxConjB_S32(A,B) vqdmladhq_s32(vqdmlsdhxq_s32((__typeof(A))vuninitializedq_s32(), A, B), A, B) - -#endif - -void arm_split_rfft_q31( - q31_t *pSrc, - uint32_t fftLen, - const q31_t *pATable, - const q31_t *pBTable, - q31_t *pDst, - uint32_t modifier) -{ - uint32_t i; /* Loop Counter */ - const q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */ - q31_t *pOut1 = &pDst[2]; - q31_t *pIn1 = &pSrc[2]; - uint32x4_t offset = { 2, 3, 0, 1 }; - uint32x4_t offsetCoef = { 0, 1, modifier * 2, modifier * 2 + 1 }; - - offset = offset + (2 * fftLen - 4); - - - /* Init coefficient pointers */ - pCoefA = &pATable[modifier * 2]; - pCoefB = &pBTable[modifier * 2]; - - const q31_t *pCoefAb, *pCoefBb; - pCoefAb = pCoefA; - pCoefBb = pCoefB; - - pIn1 = &pSrc[2]; - - i = fftLen - 1U; - i = i / 2 + 1; - while (i > 0U) { - q31x4_t in1 = vld1q_s32(pIn1); - q31x4_t in2 = vldrwq_gather_shifted_offset_s32(pSrc, offset); - q31x4_t coefA = vldrwq_gather_shifted_offset_s32(pCoefAb, offsetCoef); - q31x4_t coefB = vldrwq_gather_shifted_offset_s32(pCoefBb, offsetCoef); -#if defined(__CMSIS_GCC_H) - q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB_S32(in1, coefA),MVE_CMPLX_MULT_FX_AxConjB_S32(coefB, in2)); -#else - q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxB(in1, coefA, q31x4_t), - MVE_CMPLX_MULT_FX_AxConjB(coefB, in2, q31x4_t)); -#endif - vst1q(pOut1, out); - pOut1 += 4; - - offsetCoef += modifier * 4; - offset -= 4; - - pIn1 += 4; - i -= 1; - } - - pDst[2 * fftLen] = (pSrc[0] - pSrc[1]) >> 1U; - pDst[2 * fftLen + 1] = 0; - - pDst[0] = (pSrc[0] + pSrc[1]) >> 1U; - pDst[1] = 0; -} -#else -void arm_split_rfft_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pATable, - const q31_t * pBTable, - q31_t * pDst, - uint32_t modifier) -{ - uint32_t i; /* Loop Counter */ - q31_t outR, outI; /* Temporary variables for output */ - const q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */ - q31_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */ - q31_t *pOut1 = &pDst[2], *pOut2 = &pDst[4 * fftLen - 1]; - q31_t *pIn1 = &pSrc[2], *pIn2 = &pSrc[2 * fftLen - 1]; - - /* Init coefficient pointers */ - pCoefA = &pATable[modifier * 2]; - pCoefB = &pBTable[modifier * 2]; - - i = fftLen - 1U; - - while (i > 0U) - { - /* - outR = ( pSrc[2 * i] * pATable[2 * i] - - pSrc[2 * i + 1] * pATable[2 * i + 1] - + pSrc[2 * n - 2 * i] * pBTable[2 * i] - + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]); - - outI = ( pIn[2 * i + 1] * pATable[2 * i] - + pIn[2 * i] * pATable[2 * i + 1] - + pIn[2 * n - 2 * i] * pBTable[2 * i + 1] - - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); - */ - - CoefA1 = *pCoefA++; - CoefA2 = *pCoefA; - - /* outR = (pSrc[2 * i] * pATable[2 * i] */ - mult_32x32_keep32_R (outR, *pIn1, CoefA1); - - /* outI = pIn[2 * i] * pATable[2 * i + 1] */ - mult_32x32_keep32_R (outI, *pIn1++, CoefA2); - - /* - pSrc[2 * i + 1] * pATable[2 * i + 1] */ - multSub_32x32_keep32_R (outR, *pIn1, CoefA2); - - /* (pIn[2 * i + 1] * pATable[2 * i] */ - multAcc_32x32_keep32_R (outI, *pIn1++, CoefA1); - - /* pSrc[2 * n - 2 * i] * pBTable[2 * i] */ - multSub_32x32_keep32_R (outR, *pIn2, CoefA2); - CoefB1 = *pCoefB; - - /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */ - multSub_32x32_keep32_R (outI, *pIn2--, CoefB1); - - /* pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */ - multAcc_32x32_keep32_R (outR, *pIn2, CoefB1); - - /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */ - multSub_32x32_keep32_R (outI, *pIn2--, CoefA2); - - /* write output */ - *pOut1++ = outR; - *pOut1++ = outI; - - /* write complex conjugate output */ - *pOut2-- = -outI; - *pOut2-- = outR; - - /* update coefficient pointer */ - pCoefB = pCoefB + (2 * modifier); - pCoefA = pCoefA + (2 * modifier - 1); - - /* Decrement loop count */ - i--; - } - - pDst[2 * fftLen] = (pSrc[0] - pSrc[1]) >> 1U; - pDst[2 * fftLen + 1] = 0; - - pDst[0] = (pSrc[0] + pSrc[1]) >> 1U; - pDst[1] = 0; -} -#endif /* defined(ARM_MATH_MVEI) */ - -/** - @brief Core Real IFFT process - @param[in] pSrc points to input buffer - @param[in] fftLen length of FFT - @param[in] pATable points to twiddle Coef A buffer - @param[in] pBTable points to twiddle Coef B buffer - @param[out] pDst points to output buffer - @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table - @return none - */ - -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - -void arm_split_rifft_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pATable, - const q31_t * pBTable, - q31_t * pDst, - uint32_t modifier) -{ - uint32_t i; /* Loop Counter */ - const q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */ - q31_t *pIn1; - uint32x4_t offset = { 2, 3, 0, 1 }; - uint32x4_t offsetCoef = { 0, 1, modifier * 2, modifier * 2 + 1 }; - int32x4_t conj = { 1, -1, 1, -1 }; - - offset = offset + (2 * fftLen - 2); - - /* Init coefficient pointers */ - pCoefA = &pATable[0]; - pCoefB = &pBTable[0]; - - const q31_t *pCoefAb, *pCoefBb; - pCoefAb = pCoefA; - pCoefBb = pCoefB; - - pIn1 = &pSrc[0]; - - i = fftLen; - i = i >> 1; - while (i > 0U) { - q31x4_t in1 = vld1q_s32(pIn1); - q31x4_t in2 = vldrwq_gather_shifted_offset_s32(pSrc, offset); - q31x4_t coefA = vldrwq_gather_shifted_offset_s32(pCoefAb, offsetCoef); - q31x4_t coefB = vldrwq_gather_shifted_offset_s32(pCoefBb, offsetCoef); - - /* can we avoid the conjugate here ? */ -#if defined(__CMSIS_GCC_H) - q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB_S32(in1, coefA), - vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB_S32(in2, coefB))); -#else - q31x4_t out = vhaddq_s32(MVE_CMPLX_MULT_FX_AxConjB(in1, coefA, q31x4_t), - vmulq_s32(conj, MVE_CMPLX_MULT_FX_AxB(in2, coefB, q31x4_t))); -#endif - vst1q_s32(pDst, out); - pDst += 4; - - offsetCoef += modifier * 4; - offset -= 4; - - pIn1 += 4; - i -= 1; - } -} -#else -void arm_split_rifft_q31( - q31_t * pSrc, - uint32_t fftLen, - const q31_t * pATable, - const q31_t * pBTable, - q31_t * pDst, - uint32_t modifier) -{ - q31_t outR, outI; /* Temporary variables for output */ - const q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */ - q31_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */ - q31_t *pIn1 = &pSrc[0], *pIn2 = &pSrc[2 * fftLen + 1]; - - pCoefA = &pATable[0]; - pCoefB = &pBTable[0]; - - while (fftLen > 0U) - { - /* - outR = ( pIn[2 * i] * pATable[2 * i] - + pIn[2 * i + 1] * pATable[2 * i + 1] - + pIn[2 * n - 2 * i] * pBTable[2 * i] - - pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]); - - outI = ( pIn[2 * i + 1] * pATable[2 * i] - - pIn[2 * i] * pATable[2 * i + 1] - - pIn[2 * n - 2 * i] * pBTable[2 * i + 1] - - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); - */ - - CoefA1 = *pCoefA++; - CoefA2 = *pCoefA; - - /* outR = (pIn[2 * i] * pATable[2 * i] */ - mult_32x32_keep32_R (outR, *pIn1, CoefA1); - - /* - pIn[2 * i] * pATable[2 * i + 1] */ - mult_32x32_keep32_R (outI, *pIn1++, -CoefA2); - - /* pIn[2 * i + 1] * pATable[2 * i + 1] */ - multAcc_32x32_keep32_R (outR, *pIn1, CoefA2); - - /* pIn[2 * i + 1] * pATable[2 * i] */ - multAcc_32x32_keep32_R (outI, *pIn1++, CoefA1); - - /* pIn[2 * n - 2 * i] * pBTable[2 * i] */ - multAcc_32x32_keep32_R (outR, *pIn2, CoefA2); - CoefB1 = *pCoefB; - - /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */ - multSub_32x32_keep32_R (outI, *pIn2--, CoefB1); - - /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */ - multAcc_32x32_keep32_R (outR, *pIn2, CoefB1); - - /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */ - multAcc_32x32_keep32_R (outI, *pIn2--, CoefA2); - - /* write output */ - *pDst++ = outR; - *pDst++ = outI; - - /* update coefficient pointer */ - pCoefB = pCoefB + (modifier * 2); - pCoefA = pCoefA + (modifier * 2 - 1); - - /* Decrement loop count */ - fftLen--; - } - -} - -#endif /* defined(ARM_MATH_MVEI) */ +/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_rfft_q31.c
+ * Description: FFT & RIFFT Q31 process function
+ *
+ * $Date: 18. March 2019
+ * $Revision: V1.6.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/* ----------------------------------------------------------------------
+ * Internal functions prototypes
+ * -------------------------------------------------------------------- */
+
+void arm_split_rfft_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pATable,
+ const q31_t * pBTable,
+ q31_t * pDst,
+ uint32_t modifier);
+
+void arm_split_rifft_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pATable,
+ const q31_t * pBTable,
+ q31_t * pDst,
+ uint32_t modifier);
+
+/**
+ @addtogroup RealFFT
+ @{
+ */
+
+/**
+ @brief Processing function for the Q31 RFFT/RIFFT.
+ @param[in] S points to an instance of the Q31 RFFT/RIFFT structure
+ @param[in] pSrc points to input buffer
+ @param[out] pDst points to output buffer
+ @return none
+
+ @par Input an output formats
+ Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
+ Hence the output format is different for different RFFT sizes.
+ The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
+ @par
+ \image html RFFTQ31.gif "Input and Output Formats for Q31 RFFT"
+ @par
+ \image html RIFFTQ31.gif "Input and Output Formats for Q31 RIFFT"
+ */
+
+void arm_rfft_q31(
+ const arm_rfft_instance_q31 * S,
+ q31_t * pSrc,
+ q31_t * pDst)
+{
+ const arm_cfft_instance_q31 *S_CFFT = S->pCfft;
+ uint32_t L2 = S->fftLenReal >> 1U;
+ uint32_t i;
+
+ /* Calculation of RIFFT of input */
+ if (S->ifftFlagR == 1U)
+ {
+ /* Real IFFT core process */
+ arm_split_rifft_q31 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+
+ /* Complex IFFT process */
+ arm_cfft_q31 (S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);
+
+ for(i = 0; i < S->fftLenReal; i++)
+ {
+ pDst[i] = pDst[i] << 1U;
+ }
+ }
+ else
+ {
+ /* Calculation of RFFT of input */
+
+ /* Complex FFT process */
+ arm_cfft_q31 (S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
+
+ /* Real FFT core process */
+ arm_split_rfft_q31 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+ }
+
+}
+
+/**
+ @} end of RealFFT group
+ */
+
+/**
+ @brief Core Real FFT process
+ @param[in] pSrc points to input buffer
+ @param[in] fftLen length of FFT
+ @param[in] pATable points to twiddle Coef A buffer
+ @param[in] pBTable points to twiddle Coef B buffer
+ @param[out] pDst points to output buffer
+ @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
+ @return none
+ */
+
+void arm_split_rfft_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pATable,
+ const q31_t * pBTable,
+ q31_t * pDst,
+ uint32_t modifier)
+{
+ uint32_t i; /* Loop Counter */
+ q31_t outR, outI; /* Temporary variables for output */
+ const q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
+ q31_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
+ q31_t *pOut1 = &pDst[2], *pOut2 = &pDst[4 * fftLen - 1];
+ q31_t *pIn1 = &pSrc[2], *pIn2 = &pSrc[2 * fftLen - 1];
+
+ /* Init coefficient pointers */
+ pCoefA = &pATable[modifier * 2];
+ pCoefB = &pBTable[modifier * 2];
+
+ i = fftLen - 1U;
+
+ while (i > 0U)
+ {
+ /*
+ outR = ( pSrc[2 * i] * pATable[2 * i]
+ - pSrc[2 * i + 1] * pATable[2 * i + 1]
+ + pSrc[2 * n - 2 * i] * pBTable[2 * i]
+ + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
+
+ outI = ( pIn[2 * i + 1] * pATable[2 * i]
+ + pIn[2 * i] * pATable[2 * i + 1]
+ + pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
+ - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
+ */
+
+ CoefA1 = *pCoefA++;
+ CoefA2 = *pCoefA;
+
+ /* outR = (pSrc[2 * i] * pATable[2 * i] */
+ mult_32x32_keep32_R (outR, *pIn1, CoefA1);
+
+ /* outI = pIn[2 * i] * pATable[2 * i + 1] */
+ mult_32x32_keep32_R (outI, *pIn1++, CoefA2);
+
+ /* - pSrc[2 * i + 1] * pATable[2 * i + 1] */
+ multSub_32x32_keep32_R (outR, *pIn1, CoefA2);
+
+ /* (pIn[2 * i + 1] * pATable[2 * i] */
+ multAcc_32x32_keep32_R (outI, *pIn1++, CoefA1);
+
+ /* pSrc[2 * n - 2 * i] * pBTable[2 * i] */
+ multSub_32x32_keep32_R (outR, *pIn2, CoefA2);
+ CoefB1 = *pCoefB;
+
+ /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
+ multSub_32x32_keep32_R (outI, *pIn2--, CoefB1);
+
+ /* pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
+ multAcc_32x32_keep32_R (outR, *pIn2, CoefB1);
+
+ /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
+ multSub_32x32_keep32_R (outI, *pIn2--, CoefA2);
+
+ /* write output */
+ *pOut1++ = outR;
+ *pOut1++ = outI;
+
+ /* write complex conjugate output */
+ *pOut2-- = -outI;
+ *pOut2-- = outR;
+
+ /* update coefficient pointer */
+ pCoefB = pCoefB + (2 * modifier);
+ pCoefA = pCoefA + (2 * modifier - 1);
+
+ /* Decrement loop count */
+ i--;
+ }
+
+ pDst[2 * fftLen] = (pSrc[0] - pSrc[1]) >> 1U;
+ pDst[2 * fftLen + 1] = 0;
+
+ pDst[0] = (pSrc[0] + pSrc[1]) >> 1U;
+ pDst[1] = 0;
+}
+
+
+/**
+ @brief Core Real IFFT process
+ @param[in] pSrc points to input buffer
+ @param[in] fftLen length of FFT
+ @param[in] pATable points to twiddle Coef A buffer
+ @param[in] pBTable points to twiddle Coef B buffer
+ @param[out] pDst points to output buffer
+ @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
+ @return none
+ */
+
+void arm_split_rifft_q31(
+ q31_t * pSrc,
+ uint32_t fftLen,
+ const q31_t * pATable,
+ const q31_t * pBTable,
+ q31_t * pDst,
+ uint32_t modifier)
+{
+ q31_t outR, outI; /* Temporary variables for output */
+ const q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
+ q31_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
+ q31_t *pIn1 = &pSrc[0], *pIn2 = &pSrc[2 * fftLen + 1];
+
+ pCoefA = &pATable[0];
+ pCoefB = &pBTable[0];
+
+ while (fftLen > 0U)
+ {
+ /*
+ outR = ( pIn[2 * i] * pATable[2 * i]
+ + pIn[2 * i + 1] * pATable[2 * i + 1]
+ + pIn[2 * n - 2 * i] * pBTable[2 * i]
+ - pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
+
+ outI = ( pIn[2 * i + 1] * pATable[2 * i]
+ - pIn[2 * i] * pATable[2 * i + 1]
+ - pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
+ - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
+ */
+
+ CoefA1 = *pCoefA++;
+ CoefA2 = *pCoefA;
+
+ /* outR = (pIn[2 * i] * pATable[2 * i] */
+ mult_32x32_keep32_R (outR, *pIn1, CoefA1);
+
+ /* - pIn[2 * i] * pATable[2 * i + 1] */
+ mult_32x32_keep32_R (outI, *pIn1++, -CoefA2);
+
+ /* pIn[2 * i + 1] * pATable[2 * i + 1] */
+ multAcc_32x32_keep32_R (outR, *pIn1, CoefA2);
+
+ /* pIn[2 * i + 1] * pATable[2 * i] */
+ multAcc_32x32_keep32_R (outI, *pIn1++, CoefA1);
+
+ /* pIn[2 * n - 2 * i] * pBTable[2 * i] */
+ multAcc_32x32_keep32_R (outR, *pIn2, CoefA2);
+ CoefB1 = *pCoefB;
+
+ /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
+ multSub_32x32_keep32_R (outI, *pIn2--, CoefB1);
+
+ /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
+ multAcc_32x32_keep32_R (outR, *pIn2, CoefB1);
+
+ /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
+ multAcc_32x32_keep32_R (outI, *pIn2--, CoefA2);
+
+ /* write output */
+ *pDst++ = outR;
+ *pDst++ = outI;
+
+ /* update coefficient pointer */
+ pCoefB = pCoefB + (modifier * 2);
+ pCoefA = pCoefA + (modifier * 2 - 1);
+
+ /* Decrement loop count */
+ fftLen--;
+ }
+
+}
|