From c723f59ecc27505566d54d9edde0bdb98a5623b1 Mon Sep 17 00:00:00 2001 From: Clyne Sullivan Date: Thu, 30 Jan 2025 08:14:20 -0500 Subject: [PATCH] decibel measuring? --- Core/Src/main.c | 65 +- Drivers/qfplib-m0-full-20240105/LICENCE | 339 ++++++ Drivers/qfplib-m0-full-20240105/README | 4 + .../qfplib-m0-full-20240105/qfplib-m0-full.h | 44 + .../qfplib-m0-full-20240105/qfplib-m0-full.s | 1039 +++++++++++++++++ Drivers/qfplib-m0-full-20240105/qfplib-port.h | 382 ++++++ Makefile | 8 +- 7 files changed, 1866 insertions(+), 15 deletions(-) create mode 100644 Drivers/qfplib-m0-full-20240105/LICENCE create mode 100644 Drivers/qfplib-m0-full-20240105/README create mode 100644 Drivers/qfplib-m0-full-20240105/qfplib-m0-full.h create mode 100644 Drivers/qfplib-m0-full-20240105/qfplib-m0-full.s create mode 100644 Drivers/qfplib-m0-full-20240105/qfplib-port.h diff --git a/Core/Src/main.c b/Core/Src/main.c index aa520a3..6b7fad0 100644 --- a/Core/Src/main.c +++ b/Core/Src/main.c @@ -22,18 +22,23 @@ /* Private includes ----------------------------------------------------------*/ /* USER CODE BEGIN Includes */ #include +#include /* USER CODE END Includes */ /* Private typedef -----------------------------------------------------------*/ /* USER CODE BEGIN PTD */ -typedef struct { +typedef struct { int32_t value : 18; } sample_t; /* USER CODE END PTD */ /* Private define ------------------------------------------------------------*/ /* USER CODE BEGIN PD */ - +#define SAMPLE_COUNT (512) // Sample count per half-transfer +#define MIC_OFFSET_DB ( 0.f) // Linear offset +#define MIC_SENSITIVITY (-26.f) // dBFS value expected at MIC_REF_DB +#define MIC_REF_DB ( 94.f) // dB where sensitivity is specified +#define MIC_BITS (18) /* USER CODE END PD */ /* Private macro -------------------------------------------------------------*/ @@ -50,10 +55,16 @@ DMA_HandleTypeDef hdma_spi1_tx; UART_HandleTypeDef huart2; /* USER CODE BEGIN PV */ -static /*const*/ uint8_t I2S_Frame_Buffer[8] = { +static const uint8_t I2S_Frame_Buffer[8] = { 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF }; -static uint8_t I2S_Receive_Buffer[4096]; +static uint8_t I2S_Receive_Buffer[SAMPLE_COUNT * sizeof(sample_t)]; + +float ln10; +float MIC_REF_AMPL; + +static float DB_Sum_Squares = 0.f; +static unsigned DB_Count = 0; /* USER CODE END PV */ /* Private function prototypes -----------------------------------------------*/ @@ -65,7 +76,7 @@ static void MX_USART2_UART_Init(void); /* USER CODE BEGIN PFP */ static HAL_StatusTypeDef HAL_SPI_TransmitReceive_DMA_Mixed( SPI_HandleTypeDef *hspi, - uint8_t *pTxData, + const uint8_t *pTxData, uint8_t *pRxData, uint16_t TxSize, uint16_t RxSize); @@ -91,7 +102,9 @@ int main(void) { /* USER CODE BEGIN 1 */ - + ln10 = qfp_fln(10.f); + MIC_REF_AMPL = qfp_fmul(qfp_int2float((1 << (MIC_BITS - 1)) - 1), + qfp_fpow(10.f, MIC_SENSITIVITY / 20.f)); /* USER CODE END 1 */ /* MCU Configuration--------------------------------------------------------*/ @@ -426,7 +439,7 @@ static void MX_GPIO_Init(void) /* USER CODE BEGIN 4 */ HAL_StatusTypeDef HAL_SPI_TransmitReceive_DMA_Mixed( SPI_HandleTypeDef *hspi, - uint8_t *pTxData, + const uint8_t *pTxData, uint8_t *pRxData, uint16_t TxSize, uint16_t RxSize) @@ -507,13 +520,44 @@ void SPI_DMATransmitReceiveCplt(DMA_HandleTypeDef *hdma) (void)hspi; } +static float process(float in_div4) +{ + static float z1 = 0, z2 = 0, z3 = 0, z5 = 0; + + float out1 = qfp_fadd(in_div4, z1); + z1 = qfp_fadd(qfp_fmul(1.062f, out1), z2); + z2 = qfp_fsub(qfp_fmul(-0.14f, out1), in_div4); + + float out2 = qfp_fadd(out1, z3); + z3 = out1; + + float out3 = qfp_fadd(out2, z5); + z5 = qfp_fsub(qfp_fmul(0.985f, out3), out2); + + return out3; +} + void SPI_DMAHalfTransmitReceiveCplt(DMA_HandleTypeDef *hdma) { SPI_HandleTypeDef *hspi = (SPI_HandleTypeDef *)hdma->Parent; (void)hspi; - sample_t sample = *(sample_t *)I2S_Receive_Buffer; - printf("%d\r\n", sample.value); + sample_t *sample = (sample_t *)I2S_Receive_Buffer; + for (int i = 0; i < SAMPLE_COUNT / 8; i++) { + float f = process(qfp_int2float(sample[i].value / 4)); + DB_Sum_Squares = qfp_fadd(DB_Sum_Squares, qfp_fmul(f, f)); + } + DB_Count += SAMPLE_COUNT / 8; + + if (DB_Count > 48000 / 4) { + float rms = qfp_fsqrt(qfp_fdiv(DB_Sum_Squares, qfp_uint2float(DB_Count))); + float db = qfp_fadd(MIC_OFFSET_DB + MIC_REF_DB, qfp_fmul(20.f, + qfp_flog10(qfp_fdiv(rms, MIC_REF_AMPL)))); + DB_Sum_Squares = 0.f; + DB_Count = 0; + + printf("%d\r\n", qfp_float2int(db)); + } } /* USER CODE END 4 */ @@ -524,7 +568,6 @@ void SPI_DMAHalfTransmitReceiveCplt(DMA_HandleTypeDef *hdma) void Error_Handler(void) { /* USER CODE BEGIN Error_Handler_Debug */ - /* User can add his own implementation to report the HAL error return state */ __disable_irq(); printf("Unhandled error, halting!\r\n"); while (1) @@ -544,8 +587,6 @@ void Error_Handler(void) void assert_failed(uint8_t *file, uint32_t line) { /* USER CODE BEGIN 6 */ - /* User can add his own implementation to report the file name and line number, - ex: printf("Wrong parameters value: file %s on line %d\r\n", file, line) */ printf("Wrong parameters value: file %s on line %d\r\n", file, line); /* USER CODE END 6 */ } diff --git a/Drivers/qfplib-m0-full-20240105/LICENCE b/Drivers/qfplib-m0-full-20240105/LICENCE new file mode 100644 index 0000000..d511905 --- /dev/null +++ b/Drivers/qfplib-m0-full-20240105/LICENCE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/Drivers/qfplib-m0-full-20240105/README b/Drivers/qfplib-m0-full-20240105/README new file mode 100644 index 0000000..99e878a --- /dev/null +++ b/Drivers/qfplib-m0-full-20240105/README @@ -0,0 +1,4 @@ +The Qfplib libraries are open source, licensed under version 2 of the +GNU GPL. A copy of that licence is included in this archive. + +Visit http://www.quinapalus.com/qfplib.html for more information. diff --git a/Drivers/qfplib-m0-full-20240105/qfplib-m0-full.h b/Drivers/qfplib-m0-full-20240105/qfplib-m0-full.h new file mode 100644 index 0000000..0b1f6eb --- /dev/null +++ b/Drivers/qfplib-m0-full-20240105/qfplib-m0-full.h @@ -0,0 +1,44 @@ +#ifndef __QFPLIB_M0_FULL_H__ +#define __QFPLIB_M0_FULL_H__ + +/* +Copyright 2019-2024 Mark Owen +http://www.quinapalus.com +E-mail: qfp@quinapalus.com + +This file is free software: you can redistribute it and/or modify +it under the terms of version 2 of the GNU General Public License +as published by the Free Software Foundation. + +This file is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this file. If not, see or +write to the Free Software Foundation, Inc., 51 Franklin Street, +Fifth Floor, Boston, MA 02110-1301, USA. +*/ + +typedef unsigned int ui32; +typedef int i32; + +extern float qfp_fadd (float x,float y); +extern float qfp_fsub (float x,float y); +extern float qfp_fmul (float x,float y); +extern float qfp_fdiv (float x,float y); +extern int qfp_fcmp (float x,float y); +extern float qfp_fsqrt (float x); +extern i32 qfp_float2int (float x); +extern i32 qfp_float2fix (float x,int f); +extern ui32 qfp_float2uint (float x); +extern ui32 qfp_float2ufix (float x,int f); +extern float qfp_int2float (i32 x); +extern float qfp_fix2float (i32 x,int f); +extern float qfp_uint2float (ui32 x); +extern float qfp_ufix2float (ui32 x,int f); +extern float qfp_fexp (float x); +extern float qfp_fln (float x); + +#endif diff --git a/Drivers/qfplib-m0-full-20240105/qfplib-m0-full.s b/Drivers/qfplib-m0-full-20240105/qfplib-m0-full.s new file mode 100644 index 0000000..2ded28d --- /dev/null +++ b/Drivers/qfplib-m0-full-20240105/qfplib-m0-full.s @@ -0,0 +1,1039 @@ +@ Copyright 2019-2024 Mark Owen +@ http://www.quinapalus.com +@ E-mail: qfp@quinapalus.com +@ +@ This file is free software: you can redistribute it and/or modify +@ it under the terms of version 2 of the GNU General Public License +@ as published by the Free Software Foundation. +@ +@ This file is distributed in the hope that it will be useful, +@ but WITHOUT ANY WARRANTY; without even the implied warranty of +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +@ GNU General Public License for more details. +@ +@ You should have received a copy of the GNU General Public License +@ along with this file. If not, see or +@ write to the Free Software Foundation, Inc., 51 Franklin Street, +@ Fifth Floor, Boston, MA 02110-1301, USA. + +.syntax unified +.cpu cortex-m0plus +.thumb + +@ exported symbols + +.global qfp_fadd +.global qfp_fsub +.global qfp_fmul +.global qfp_fdiv +.global qfp_fcmp +.global qfp_fsqrt +.global qfp_float2int +.global qfp_float2fix +.global qfp_float2uint +.global qfp_float2ufix +.global qfp_int2float +.global qfp_fix2float +.global qfp_uint2float +.global qfp_ufix2float +.global qfp_fexp +.global qfp_fln + +qfp_lib_start: + +@ exchange r0<->r1, r2<->r3 +xchxy: + push {r0,r2,r14} + mov r0,r1 + mov r2,r3 + pop {r1,r3,r15} + +@ IEEE single in r0-> signed (two's complemennt) mantissa in r0 9Q23 (24 significant bits), signed exponent (bias removed) in r2 +@ trashes r4; zero, denormal -> mantissa=+/-1, exponent=-380; Inf, NaN -> mantissa=+/-1, exponent=+640 +unpackx: + lsrs r2,r0,#23 @ save exponent and sign + lsls r0,#9 @ extract mantissa + lsrs r0,#9 + movs r4,#1 + lsls r4,#23 + orrs r0,r4 @ reinstate implied leading 1 + cmp r2,#255 @ test sign bit + uxtb r2,r2 @ clear it + bls 1f @ branch on positive + rsbs r0,#0 @ negate mantissa +1: + subs r2,#1 + cmp r2,#254 @ zero/denormal/Inf/NaN? + bhs 2f + subs r2,#126 @ remove exponent bias: can now be -126..+127 + bx r14 + +2: @ here with special-case values + cmp r0,#0 + mov r0,r4 @ set mantissa to +1 + bpl 3f + rsbs r0,#0 @ zero/denormal/Inf/NaN: mantissa=+/-1 +3: + subs r2,#126 @ zero/denormal: exponent -> -127; Inf, NaN: exponent -> 128 + lsls r2,#2 @ zero/denormal: exponent -> -508; Inf, NaN: exponent -> 512 + adds r2,#128 @ zero/denormal: exponent -> -380; Inf, NaN: exponent -> 640 + bx r14 + +@ normalise and pack signed mantissa in r0 nominally 3Q29, signed exponent in r2-> IEEE single in r0 +@ trashes r4, preserves r1,r3 +@ r5: "sticky bits", must be zero iff all result bits below r0 are zero for correct rounding +packx: + lsrs r4,r0,#31 @ save sign bit + lsls r4,r4,#31 @ sign now in b31 + bpl 2f @ skip if positive + cmp r5,#0 + beq 11f + adds r0,#1 @ fiddle carry in to following rsb if sticky bits are non-zero +11: + rsbs r0,#0 @ can now treat r0 as unsigned +packx0: + bmi 3f @ catch r0=0x80000000 case +2: + subs r2,#1 @ normalisation loop + adds r0,r0 + beq 1f @ zero? special case + bpl 2b @ normalise so leading "1" in bit 31 +3: + adds r2,#129 @ (mis-)offset exponent + bne 12f @ special case: highest denormal can round to lowest normal + adds r0,#0x80 @ in special case, need to add 256 to r0 for rounding + bcs 4f @ tripped carry? then have leading 1 in C as required +12: + adds r0,#0x80 @ rounding + bcs 4f @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits) + cmp r5,#0 + beq 7f @ sticky bits zero? +8: + lsls r0,#1 @ remove leading 1 +9: + subs r2,#1 @ compensate exponent on this path +4: + cmp r2,#254 + bge 5f @ overflow? + adds r2,#1 @ correct exponent offset + ble 10f @ denormal/underflow? + lsrs r0,#9 @ align mantissa + lsls r2,#23 @ align exponent + orrs r0,r2 @ assemble exponent and mantissa +6: + orrs r0,r4 @ apply sign +1: + bx r14 + +5: + movs r0,#0xff @ create infinity + lsls r0,#23 + b 6b + +10: + movs r0,#0 @ create zero + bx r14 + +7: @ sticky bit rounding case + lsls r5,r0,#24 @ check bottom 8 bits of r0 + bne 8b @ in rounding-tie case? + lsrs r0,#9 @ ensure even result + lsls r0,#10 + b 9b + +.align 2 +.ltorg + +@ signed multiply r0 1Q23 by r1 4Q23, result in r0 7Q25, sticky bits in r5 +@ trashes r3,r4 +mul0: + uxth r3,r0 @ Q23 + asrs r4,r1,#16 @ Q7 + muls r3,r4 @ L*H, Q30 signed + asrs r4,r0,#16 @ Q7 + uxth r5,r1 @ Q23 + muls r4,r5 @ H*L, Q30 signed + adds r3,r4 @ sum of middle partial products + uxth r4,r0 + muls r4,r5 @ L*L, Q46 unsigned + lsls r5,r4,#16 @ initialise sticky bits from low half of low partial product + lsrs r4,#16 @ Q25 + adds r3,r4 @ add high half of low partial product to sum of middle partial products + @ (cannot generate carry by limits on input arguments) + asrs r0,#16 @ Q7 + asrs r1,#16 @ Q7 + muls r0,r1 @ H*H, Q14 signed + lsls r0,#11 @ high partial product Q25 + lsls r1,r3,#27 @ sticky + orrs r5,r1 @ collect further sticky bits + asrs r1,r3,#5 @ middle partial products Q25 + adds r0,r1 @ final result + bx r14 + +.thumb_func +qfp_fcmp: + lsls r2,r0,#1 + lsrs r2,#24 + beq 1f + cmp r2,#0xff + bne 2f +1: + lsrs r0,#23 @ clear mantissa if NaN or denormal + lsls r0,#23 +2: + lsls r2,r1,#1 + lsrs r2,#24 + beq 1f + cmp r2,#0xff + bne 2f +1: + lsrs r1,#23 @ clear mantissa if NaN or denormal + lsls r1,#23 +2: + movs r2,#1 @ initialise result + eors r1,r0 + bmi 4f @ opposite signs? then can proceed on basis of sign of x + eors r1,r0 @ restore y + bpl 1f + rsbs r2,#0 @ both negative? flip comparison +1: + cmp r0,r1 + bgt 2f + blt 3f +5: + movs r2,#0 +3: + rsbs r2,#0 +2: + subs r0,r2,#0 + bx r14 +4: + orrs r1,r0 + adds r1,r1 + beq 5b + cmp r0,#0 + bge 2b + b 3b + +.ltorg + +@ convert float to signed int, rounding towards -Inf, clamping +.thumb_func +qfp_float2int: + movs r1,#0 @ fall through + +@ convert float in r0 to signed fixed point in r0, clamping +.thumb_func +qfp_float2fix: + push {r4,r14} + bl unpackx + movs r3,r2 + adds r3,#130 + bmi 6f @ -0? + add r2,r1 @ incorporate binary point position into exponent + subs r2,#23 @ r2 is now amount of left shift required + blt 1f @ requires right shift? + cmp r2,#7 @ overflow? + ble 4f +3: @ overflow + asrs r1,r0,#31 @ +ve:0 -ve:0xffffffff + mvns r1,r1 @ +ve:0xffffffff -ve:0 + movs r0,#1 + lsls r0,#31 +5: + eors r0,r1 @ +ve:0x7fffffff -ve:0x80000000 (unsigned path: 0xffffffff) + pop {r4,r15} +1: + rsbs r2,#0 @ right shift for r0, >0 + cmp r2,#32 + blt 2f @ more than 32 bits of right shift? + movs r2,#32 +2: + asrs r0,r0,r2 + pop {r4,r15} +6: + movs r0,#0 + pop {r4,r15} + +@ unsigned version +.thumb_func +qfp_float2uint: + movs r1,#0 @ fall through +.thumb_func +qfp_float2ufix: + push {r4,r14} + bl unpackx + add r2,r1 @ incorporate binary point position into exponent + movs r1,r0 + bmi 5b @ negative? return zero + subs r2,#23 @ r2 is now amount of left shift required + blt 1b @ requires right shift? + mvns r1,r0 @ ready to return 0xffffffff + cmp r2,#8 @ overflow? + bgt 5b +4: + lsls r0,r0,r2 @ result fits, left shifted + pop {r4,r15} + +@ convert signed int to float, rounding +.thumb_func +qfp_int2float: + movs r1,#0 @ fall through + +@ convert signed fix to float, rounding; number of r0 bits after point in r1 +.thumb_func +qfp_fix2float: + push {r4,r5,r14} +1: + movs r2,#29 + subs r2,r1 @ fix exponent +packretns: @ pack and return, sticky bits=0 + movs r5,#0 +packret: @ common return point: "pack and return" + bl packx +ret_pop45: + pop {r4,r5,r15} + + +@ unsigned version +.thumb_func +qfp_uint2float: + movs r1,#0 @ fall through +.thumb_func +qfp_ufix2float: + push {r4,r5,r14} + cmp r0,#0 + bge 1b @ treat <2^31 as signed + movs r2,#30 + subs r2,r1 @ fix exponent + lsls r5,r0,#31 @ one sticky bit + lsrs r0,#1 + b packret + +.thumb_func +qfp_fexp: + push {r4,r5,r14} + movs r1,#24 + bl qfp_float2fix @ Q24: covers entire valid input range + asrs r1,r0,#16 @ Q8 + ldr r2,=#5909 @ log_2(e) Q12 + muls r2,r1 @ estimate exponent of result Q20 (always an underestimate) + asrs r2,#20 @ Q0 + lsls r1,r0,#6 @ Q30 + ldr r0,=#0x2c5c85fe @ ln(2) Q30 + muls r0,r2 @ accurate contribution of estimated exponent + subs r1,r0 @ residual to be exponentiated, guaranteed ≥0, < about 0.75 Q30 + +@ here +@ r1: mantissa to exponentiate, 0...~0.75 Q30 +@ r2: first exponent estimate + + movs r5,#1 @ shift + adr r3,ftab_exp @ could use alternate words from dtab_exp to save space if required + movs r0,#1 + lsls r0,#29 @ x=1 Q29 + +3: + ldmia r3!,{r4} + subs r4,r1,r4 + bmi 1f + movs r1,r4 @ keep result of subtraction + movs r4,r0 + lsrs r4,r5 + adcs r0,r4 @ x+=x>>i with rounding + +1: + adds r5,#1 + cmp r5,#15 + bne 3b + +@ here +@ r0: exp a Q29 1..2+ +@ r1: ε (residual x where x=a+ε), < 2^-14 Q30 +@ r2: first exponent estimate +@ and we wish to calculate exp x=exp a exp ε~(exp a)(1+ε) + + lsrs r3,r0,#15 @ exp a Q14 + muls r3,r1 @ ε exp a Q44 + lsrs r3,#15 @ ε exp a Q29 + adcs r0,r3 @ (1+ε) exp a Q29 with rounding + + b packretns @ pack result + +.thumb_func +qfp_fln: + push {r4,r5,r14} + asrs r1,r0,#23 + bmi 3f @ -ve argument? + beq 3f @ 0 argument? + cmp r1,#0xff + beq 4f @ +Inf/NaN + bl unpackx + adds r2,#1 + ldr r3,=#0x2c5c85fe @ ln(2) Q30 + lsrs r1,r3,#14 @ ln(2) Q16 + muls r1,r2 @ result estimate Q16 + asrs r1,#16 @ integer contribution to result + muls r3,r2 + lsls r4,r1,#30 + subs r3,r4 @ fractional contribution to result Q30, signed + lsls r0,#8 @ Q31 + +@ here +@ r0: mantissa Q31 +@ r1: integer contribution to result +@ r3: fractional contribution to result Q30, signed + + movs r5,#1 @ shift + adr r4,ftab_exp @ could use alternate words from dtab_exp to save space if required + +2: + movs r2,r0 + lsrs r2,r5 + adcs r2,r0 @ x+(x>>i) with rounding + bcs 1f @ >=2? + movs r0,r2 @ keep result + ldr r2,[r4] + subs r3,r2 +1: + adds r4,#4 + adds r5,#1 + cmp r5,#15 + bne 2b + +@ here +@ r0: residual x, nearly 2 Q31 +@ r1: integer contribution to result +@ r3: fractional part of result Q30 + + asrs r0,#2 + adds r0,r3,r0 + + cmp r1,#0 + bne 2f + + asrs r0,#1 + lsls r1,#29 + adds r0,r1 + movs r2,#0 + b packretns + +2: + lsls r1,#24 + asrs r0,#6 @ Q24 + adcs r0,r1 @ with rounding + movs r2,#5 + b packretns + +3: + ldr r0,=#0xff800000 @ -Inf + pop {r4,r5,r15} +4: + ldr r0,=#0x7f800000 @ +Inf + pop {r4,r5,r15} + +.align 2 +ftab_exp: +.word 0x19f323ed @ log 1+2^-1 Q30 +.word 0x0e47fbe4 @ log 1+2^-2 Q30 +.word 0x0789c1dc @ log 1+2^-3 Q30 +.word 0x03e14618 @ log 1+2^-4 Q30 +.word 0x01f829b1 @ log 1+2^-5 Q30 +.word 0x00fe0546 @ log 1+2^-6 Q30 +.word 0x007f80aa @ log 1+2^-7 Q30 +.word 0x003fe015 @ log 1+2^-8 Q30 +.word 0x001ff803 @ log 1+2^-9 Q30 +.word 0x000ffe00 @ log 1+2^-10 Q30 +.word 0x0007ff80 @ log 1+2^-11 Q30 +.word 0x0003ffe0 @ log 1+2^-12 Q30 +.word 0x0001fff8 @ log 1+2^-13 Q30 +.word 0x0000fffe @ log 1+2^-14 Q30 + +.align 2 +.thumb_func +qfp_fsub: + ldr r2,=#0x80000000 + eors r1,r2 @ flip sign on second argument +@ drop into fadd, on .align2:ed boundary + +.thumb_func +qfp_fadd: + push {r4,r5,r6,r14} + asrs r4,r0,#31 + lsls r2,r0,#1 + lsrs r2,#24 @ x exponent + beq fa_xe0 + cmp r2,#255 + beq fa_xe255 +fa_xe: + asrs r5,r1,#31 + lsls r3,r1,#1 + lsrs r3,#24 @ y exponent + beq fa_ye0 + cmp r3,#255 + beq fa_ye255 +fa_ye: + ldr r6,=#0x007fffff + ands r0,r0,r6 @ extract mantissa bits + ands r1,r1,r6 + adds r6,#1 @ r6=0x00800000 + orrs r0,r0,r6 @ set implied 1 + orrs r1,r1,r6 + eors r0,r0,r4 @ complement... + eors r1,r1,r5 + subs r0,r0,r4 @ ... and add 1 if sign bit is set: 2's complement + subs r1,r1,r5 + subs r5,r3,r2 @ ye-xe + subs r4,r2,r3 @ xe-ye + bmi fa_ygtx +@ here xe>=ye + cmp r4,#30 + bge fa_xmgty @ xe much greater than ye? + adds r5,#32 + movs r3,r2 @ save exponent +@ here y in r1 must be shifted down r4 places to align with x in r0 + movs r2,r1 + lsls r2,r2,r5 @ keep the bits we will shift off the bottom of r1 + asrs r1,r1,r4 + b fa_0 + +.ltorg + +fa_ymgtx: + movs r2,#0 @ result is just y + movs r0,r1 + b fa_1 +fa_xmgty: + movs r3,r2 @ result is just x + movs r2,#0 + b fa_1 + +fa_ygtx: +@ here ye>xe + cmp r5,#30 + bge fa_ymgtx @ ye much greater than xe? + adds r4,#32 +@ here x in r0 must be shifted down r5 places to align with y in r1 + movs r2,r0 + lsls r2,r2,r4 @ keep the bits we will shift off the bottom of r0 + asrs r0,r0,r5 +fa_0: + adds r0,r1 @ result is now in r0:r2, possibly highly denormalised or zero; exponent in r3 + beq fa_9 @ if zero, inputs must have been of identical magnitude and opposite sign, so return +0 +fa_1: + lsrs r1,r0,#31 @ sign bit + beq fa_8 + mvns r0,r0 + rsbs r2,r2,#0 + bne fa_8 + adds r0,#1 +fa_8: + adds r6,r6 +@ r6=0x01000000 + cmp r0,r6 + bhs fa_2 +fa_3: + adds r2,r2 @ normalisation loop + adcs r0,r0 + subs r3,#1 @ adjust exponent + cmp r0,r6 + blo fa_3 +fa_2: +@ here r0:r2 is the result mantissa 0x01000000<=r0<0x02000000, r3 the exponent, and r1 the sign bit + lsrs r0,#1 + bcc fa_4 +@ rounding bits here are 1:r2 + adds r0,#1 @ round up + cmp r2,#0 + beq fa_5 @ sticky bits all zero? +fa_4: + cmp r3,#254 + bhs fa_6 @ exponent too large or negative? + lsls r1,#31 @ pack everything + add r0,r1 + lsls r3,#23 + add r0,r3 +fa_end: + pop {r4,r5,r6,r15} + +fa_9: + cmp r2,#0 @ result zero? + beq fa_end @ return +0 + b fa_1 + +fa_5: + lsrs r0,#1 + lsls r0,#1 @ round to even + b fa_4 + +fa_6: + bge fa_7 +@ underflow +@ can handle denormals here + lsls r0,r1,#31 @ result is signed zero + pop {r4,r5,r6,r15} +fa_7: +@ overflow + lsls r0,r1,#8 + adds r0,#255 + lsls r0,#23 @ result is signed infinity + pop {r4,r5,r6,r15} + + +fa_xe0: +@ can handle denormals here + subs r2,#32 + adds r2,r4 @ exponent -32 for +Inf, -33 for -Inf + b fa_xe + +fa_xe255: +@ can handle NaNs here + lsls r2,#8 + add r2,r2,r4 @ exponent ~64k for +Inf, ~64k-1 for -Inf + b fa_xe + +fa_ye0: +@ can handle denormals here + subs r3,#32 + adds r3,r5 @ exponent -32 for +Inf, -33 for -Inf + b fa_ye + +fa_ye255: +@ can handle NaNs here + lsls r3,#8 + add r3,r3,r5 @ exponent ~64k for +Inf, ~64k-1 for -Inf + b fa_ye + + +.align 2 +.thumb_func +qfp_fmul: + push {r7,r14} + mov r2,r0 + eors r2,r1 @ sign of result + lsrs r2,#31 + lsls r2,#31 + mov r14,r2 + lsls r0,#1 + lsls r1,#1 + lsrs r2,r0,#24 @ xe + beq fm_xe0 + cmp r2,#255 + beq fm_xe255 +fm_xe: + lsrs r3,r1,#24 @ ye + beq fm_ye0 + cmp r3,#255 + beq fm_ye255 +fm_ye: + adds r7,r2,r3 @ exponent of result (will possibly be incremented) + subs r7,#128 @ adjust bias for packing + lsls r0,#8 @ x mantissa + lsls r1,#8 @ y mantissa + lsrs r0,#9 + lsrs r1,#9 + + adds r2,r0,r1 @ for later + mov r12,r2 + lsrs r2,r0,#7 @ x[22..7] Q16 + lsrs r3,r1,#7 @ y[22..7] Q16 + muls r2,r2,r3 @ result [45..14] Q32: never an overestimate and worst case error is 2*(2^7-1)*(2^23-2^7)+(2^7-1)^2 = 2130690049 < 2^31 + muls r0,r0,r1 @ result [31..0] Q46 + lsrs r2,#18 @ result [45..32] Q14 + bcc 1f + cmp r0,#0 + bmi 1f + adds r2,#1 @ fix error in r2 +1: + lsls r3,r0,#9 @ bits off bottom of result + lsrs r0,#23 @ Q23 + lsls r2,#9 + adds r0,r2 @ cut'n'shut + add r0,r12 @ implied 1*(x+y) to compensate for no insertion of implied 1s +@ result-1 in r3:r0 Q23+32, i.e., in range [0,3) + + lsrs r1,r0,#23 + bne fm_0 @ branch if we need to shift down one place +@ here 1<=result<2 + cmp r7,#254 + bhs fm_3a @ catches both underflow and overflow + lsls r3,#1 @ sticky bits at top of R3, rounding bit in carry + bcc fm_1 @ no rounding + beq fm_2 @ rounding tie? + adds r0,#1 @ round up +fm_1: + adds r7,#1 @ for implied 1 + lsls r7,#23 @ pack result + add r0,r7 + add r0,r14 + pop {r7,r15} +fm_2: @ rounding tie + adds r0,#1 +fm_3: + lsrs r0,#1 + lsls r0,#1 @ clear bottom bit + b fm_1 + +@ here 1<=result-1<3 +fm_0: + adds r7,#1 @ increment exponent + cmp r7,#254 + bhs fm_3b @ catches both underflow and overflow + lsrs r0,#1 @ shift mantissa down + bcc fm_1a @ no rounding + adds r0,#1 @ assume we will round up + cmp r3,#0 @ sticky bits + beq fm_3c @ rounding tie? +fm_1a: + adds r7,r7 + adds r7,#1 @ for implied 1 + lsls r7,#22 @ pack result + add r0,r7 + add r0,r14 + pop {r7,r15} + +fm_3c: + lsrs r0,#1 + lsls r0,#1 @ clear bottom bit + b fm_1a + +fm_xe0: + subs r2,#16 +fm_xe255: + lsls r2,#8 + b fm_xe +fm_ye0: + subs r3,#16 +fm_ye255: + lsls r3,#8 + b fm_ye + +@ here the result is under- or overflowing +fm_3b: + bge fm_4 @ branch on overflow +@ trap case where result is denormal 0x007fffff + 0.5ulp or more + adds r7,#1 @ exponent=-1? + bne fm_5 +@ corrected mantissa will be >= 3.FFFFFC (0x1fffffe Q23) +@ so r0 >= 2.FFFFFC (0x17ffffe Q23) + adds r0,#2 + lsrs r0,#23 + cmp r0,#3 + bne fm_5 + b fm_6 + +fm_3a: + bge fm_4 @ branch on overflow +@ trap case where result is denormal 0x007fffff + 0.5ulp or more + adds r7,#1 @ exponent=-1? + bne fm_5 + adds r0,#1 @ mantissa=0xffffff (i.e., r0=0x7fffff)? + lsrs r0,#23 + beq fm_5 +fm_6: + movs r0,#1 @ return smallest normal + lsls r0,#23 + add r0,r14 + pop {r7,r15} + +fm_5: + mov r0,r14 + pop {r7,r15} +fm_4: + movs r0,#0xff + lsls r0,#23 + add r0,r14 + pop {r7,r15} + +@ This version of the division algorithm uses external divider hardware to estimate the +@ reciprocal of the divisor to about 14 bits; then a multiplication step to get a first +@ quotient estimate; then the remainder based on this estimate is used to calculate a +@ correction to the quotient. The result is good to about 27 bits and so we only need +@ to calculate the exact remainder when close to a rounding boundary. +.align 2 +.thumb_func +qfp_fdiv: + push {r4,r5,r6,r14} +fdiv_n: + + movs r4,#1 + lsls r4,#23 @ implied 1 position + lsls r2,r1,#9 @ clear out sign and exponent + lsrs r2,r2,#9 + orrs r2,r2,r4 @ divisor mantissa Q23 with implied 1 + +@ here +@ r0=packed dividend +@ r1=packed divisor +@ r2=divisor mantissa Q23 +@ r4=1<<23 + +// see divtest.c + lsrs r3,r2,#18 @ x2=x>>18; // Q5 32..63 + adr r5,rcpapp-32 + ldrb r3,[r5,r3] @ u=lut5[x2-32]; // Q8 + lsls r5,r2,#5 + muls r5,r5,r3 + asrs r5,#14 @ e=(i32)(u*(x<<5))>>14; // Q22 + asrs r6,r5,#11 + muls r6,r6,r6 @ e2=(e>>11)*(e>>11); // Q22 + subs r5,r6 + muls r5,r5,r3 @ c=(e-e2)*u; // Q30 + lsls r6,r3,#8 + asrs r5,#13 + adds r5,#1 + asrs r5,#1 + subs r5,r6,r5 @ u0=(u<<8)-((c+0x2000)>>14); // Q16 + +@ here +@ r0=packed dividend +@ r1=packed divisor +@ r2=divisor mantissa Q23 +@ r4=1<<23 +@ r5=reciprocal estimate Q16 + + lsrs r6,r0,#23 + uxtb r3,r6 @ dividend exponent + lsls r0,#9 + lsrs r0,#9 + orrs r0,r0,r4 @ dividend mantissa Q23 + + lsrs r1,#23 + eors r6,r1 @ sign of result in bit 8 + lsrs r6,#8 + lsls r6,#31 @ sign of result in bit 31, other bits clear + +@ here +@ r0=dividend mantissa Q23 +@ r1=divisor sign+exponent +@ r2=divisor mantissa Q23 +@ r3=dividend exponent +@ r5=reciprocal estimate Q16 +@ r6b31=sign of result + + uxtb r1,r1 @ divisor exponent + cmp r1,#0 + beq retinf + cmp r1,#255 + beq 20f @ divisor is infinite + cmp r3,#0 + beq retzero + cmp r3,#255 + beq retinf + subs r3,r1 @ initial result exponent (no bias) + adds r3,#125 @ add bias + + lsrs r1,r0,#8 @ dividend mantissa Q15 + +@ here +@ r0=dividend mantissa Q23 +@ r1=dividend mantissa Q15 +@ r2=divisor mantissa Q23 +@ r3=initial result exponent +@ r5=reciprocal estimate Q16 +@ r6b31=sign of result + + muls r1,r5 + + lsrs r1,#16 @ Q15 qu0=(q15)(u*y0); + lsls r0,r0,#15 @ dividend Q38 + movs r4,r2 + muls r4,r1 @ Q38 qu0*x + subs r4,r0,r4 @ Q38 re0=(y<<15)-qu0*x; note this remainder is signed + asrs r4,#10 + muls r4,r5 @ Q44 qu1=(re0>>10)*u; this quotient correction is also signed + asrs r4,#16 @ Q28 + lsls r1,#13 + adds r1,r1,r4 @ Q28 qu=(qu0<<13)+(qu1>>16); + +@ here +@ r0=dividend mantissa Q38 +@ r1=quotient Q28 +@ r2=divisor mantissa Q23 +@ r3=initial result exponent +@ r6b31=sign of result + + lsrs r4,r1,#28 + bne 1f +@ here the quotient is less than 1<<28 (i.e., result mantissa <1.0) + + adds r1,#5 + lsrs r4,r1,#4 @ rounding + small reduction in systematic bias + bcc 2f @ skip if we are not near a rounding boundary + lsrs r1,#3 @ quotient Q25 + lsls r0,#10 @ dividend mantissa Q48 + muls r1,r1,r2 @ quotient*divisor Q48 + subs r0,r0,r1 @ remainder Q48 + bmi 2f + b 3f + +1: +@ here the quotient is at least 1<<28 (i.e., result mantissa >=1.0) + + adds r3,#1 @ bump exponent (and shift mantissa down one more place) + adds r1,#9 + lsrs r4,r1,#5 @ rounding + small reduction in systematic bias + bcc 2f @ skip if we are not near a rounding boundary + + lsrs r1,#4 @ quotient Q24 + lsls r0,#9 @ dividend mantissa Q47 + muls r1,r1,r2 @ quotient*divisor Q47 + subs r0,r0,r1 @ remainder Q47 + bmi 2f +3: + adds r4,#1 @ increment quotient as we are above the rounding boundary + +@ here +@ r3=result exponent +@ r4=correctly rounded quotient Q23 in range [1,2] *note closed interval* +@ r6b31=sign of result + +2: + cmp r3,#254 + bhs 10f @ this catches both underflow and overflow + lsls r1,r3,#23 + adds r0,r4,r1 + adds r0,r6 + pop {r4,r5,r6,r15} + +@ here divisor is infinite; dividend exponent in r3 +20: + cmp r3,#255 + bne retzero + +retinf: + movs r0,#255 +21: + lsls r0,#23 + orrs r0,r6 + pop {r4,r5,r6,r15} + +10: + bge retinf @ overflow? + adds r1,r3,#1 + bne retzero @ exponent <-1? return 0 +@ here exponent is exactly -1 + lsrs r1,r4,#25 + bcc retzero @ mantissa is not 01000000? +@ return minimum normal + movs r0,#1 + lsls r0,#23 + orrs r0,r6 + pop {r4,r5,r6,r15} + +retzero: + movs r0,r6 + pop {r4,r5,r6,r15} + +@ x2=[32:1:63]/32; +@ round(256 ./(x2+1/64)) +.align 2 +rcpapp: +.byte 252,245,237,231,224,218,213,207,202,197,193,188,184,180,176,172 +.byte 169,165,162,159,156,153,150,148,145,142,140,138,135,133,131,129 + +@ The square root routine uses an initial approximation to the reciprocal of the square root of the argument based +@ on the top four bits of the mantissa (possibly shifted one place to make the exponent even). It then performs two +@ Newton-Raphson iterations, resulting in about 14 bits of accuracy. This reciprocal is then multiplied by +@ the original argument to produce an approximation to the result, again with about 14 bits of accuracy. +@ Then a remainder is calculated, and multiplied by the reciprocal estiamte to generate a correction term +@ giving a final answer to about 28 bits of accuracy. A final remainder calculation rounds to the correct +@ result if necessary. +@ Again, the fixed-point calculation is carefully implemented to preserve accuracy, and similar comments to those +@ made above on the fast division routine apply. +@ The reciprocal square root calculation has been tested for all possible (possibly shifted) input mantissa values. +.align 2 +.thumb_func +qfp_fsqrt: + push {r4} + lsls r1,r0,#1 + bcs sq_0 @ negative? + lsls r1,#8 + lsrs r1,#9 @ mantissa + movs r2,#1 + lsls r2,#23 + adds r1,r2 @ insert implied 1 + lsrs r2,r0,#23 @ extract exponent + beq sq_2 @ zero? + cmp r2,#255 @ infinite? + beq sq_1 + adds r2,#125 @ correction for packing + asrs r2,#1 @ exponent/2, LSB into carry + bcc 1f + lsls r1,#1 @ was even: double mantissa; mantissa y now 1..4 Q23 +1: + adr r4,rsqrtapp-4@ first four table entries are never accessed because of the mantissa's leading 1 + lsrs r3,r1,#21 @ y Q2 + ldrb r4,[r4,r3] @ initial approximation to reciprocal square root a0 Q8 + + lsrs r0,r1,#7 @ y Q16: first Newton-Raphson iteration + muls r0,r4 @ a0*y Q24 + muls r0,r4 @ r0=p0=a0*y*y Q32 + asrs r0,#12 @ r0 Q20 + muls r0,r4 @ dy0=a0*r0 Q28 + asrs r0,#13 @ dy0 Q15 + lsls r4,#8 @ a0 Q16 + subs r4,r0 @ a1=a0-dy0/2 Q16-Q15/2 -> Q16 + adds r4,#170 @ mostly remove systematic error in this approximation: gains approximately 1 bit + + movs r0,r4 @ second Newton-Raphson iteration + muls r0,r0 @ a1*a1 Q32 + lsrs r0,#15 @ a1*a1 Q17 + lsrs r3,r1,#8 @ y Q15 + muls r0,r3 @ r1=p1=a1*a1*y Q32 + asrs r0,#12 @ r1 Q20 + muls r0,r4 @ dy1=a1*r1 Q36 + asrs r0,#21 @ dy1 Q15 + subs r4,r0 @ a2=a1-dy1/2 Q16-Q15/2 -> Q16 + + muls r3,r4 @ a3=y*a2 Q31 + lsrs r3,#15 @ a3 Q16 +@ here a2 is an approximation to the reciprocal square root +@ and a3 is an approximation to the square root + movs r0,r3 + muls r0,r0 @ a3*a3 Q32 + lsls r1,#9 @ y Q32 + subs r0,r1,r0 @ r2=y-a3*a3 Q32 remainder + asrs r0,#5 @ r2 Q27 + muls r4,r0 @ r2*a2 Q43 + lsls r3,#7 @ a3 Q23 + asrs r0,r4,#15 @ r2*a2 Q28 + adds r0,#16 @ rounding to Q24 + asrs r0,r0,#6 @ r2*a2 Q22 + add r3,r0 @ a4 Q23: candidate final result + bcc sq_3 @ near rounding boundary? skip if no rounding needed + mov r4,r3 + adcs r4,r4 @ a4+0.5ulp Q24 + muls r4,r4 @ Q48 + lsls r1,#16 @ y Q48 + subs r1,r4 @ remainder Q48 + bmi sq_3 + adds r3,#1 @ round up +sq_3: + lsls r2,#23 @ pack exponent + adds r0,r2,r3 +sq_6: + pop {r4} + bx r14 + +sq_0: + lsrs r1,#24 + beq sq_2 @ -0: return it +@ here negative and not -0: return -Inf + asrs r0,#31 +sq_5: + lsls r0,#23 + b sq_6 +sq_1: @ +Inf + lsrs r0,#23 + b sq_5 +sq_2: + lsrs r0,#31 + lsls r0,#31 + b sq_6 + +@ round(sqrt(2^22./[72:16:248])) +rsqrtapp: +.byte 0xf1,0xda,0xc9,0xbb, 0xb0,0xa6,0x9e,0x97, 0x91,0x8b,0x86,0x82 + +qfp_lib_end: diff --git a/Drivers/qfplib-m0-full-20240105/qfplib-port.h b/Drivers/qfplib-m0-full-20240105/qfplib-port.h new file mode 100644 index 0000000..4d57810 --- /dev/null +++ b/Drivers/qfplib-m0-full-20240105/qfplib-port.h @@ -0,0 +1,382 @@ +#include "qfplib-m0-full.h" + +float qfp_fpow(float b, float e) +{ + return qfp_fexp(qfp_fmul(e, qfp_fln(b))); +} + +float qfp_flog10(float x) +{ + extern float ln10; + return qfp_fdiv(qfp_fln(x), ln10); +} + +//__attribute__((naked, section(".data"))) +//inline float qfp_fadd_asm(float, float) +//{ +//asm(R"( +//.syntax unified +// push {r4,r5,r6,r14} +// asrs r4,r0,#31 +// lsls r2,r0,#1 +// lsrs r2,#24 @ x exponent +// beq fa_xe0 +// cmp r2,#255 +// beq fa_xe255 +//fa_xe: +// asrs r5,r1,#31 +// lsls r3,r1,#1 +// lsrs r3,#24 @ y exponent +// beq fa_ye0 +// cmp r3,#255 +// beq fa_ye255 +//fa_ye: +// ldr r6,=#0x007fffff +// ands r0,r0,r6 @ extract mantissa bits +// ands r1,r1,r6 +// adds r6,#1 @ r6=0x00800000 +// orrs r0,r0,r6 @ set implied 1 +// orrs r1,r1,r6 +// eors r0,r0,r4 @ complement... +// eors r1,r1,r5 +// subs r0,r0,r4 @ ... and add 1 if sign bit is set: 2's complement +// subs r1,r1,r5 +// subs r5,r3,r2 @ ye-xe +// subs r4,r2,r3 @ xe-ye +// bmi fa_ygtx +//@ here xe>=ye +// cmp r4,#30 +// bge fa_xmgty @ xe much greater than ye? +// adds r5,#32 +// movs r3,r2 @ save exponent +//@ here y in r1 must be shifted down r4 places to align with x in r0 +// movs r2,r1 +// lsls r2,r2,r5 @ keep the bits we will shift off the bottom of r1 +// asrs r1,r1,r4 +// b fa_0 +// +//.ltorg +// +//fa_ymgtx: +// movs r2,#0 @ result is just y +// movs r0,r1 +// b fa_1 +//fa_xmgty: +// movs r3,r2 @ result is just x +// movs r2,#0 +// b fa_1 +// +//fa_ygtx: +//@ here ye>xe +// cmp r5,#30 +// bge fa_ymgtx @ ye much greater than xe? +// adds r4,#32 +//@ here x in r0 must be shifted down r5 places to align with y in r1 +// movs r2,r0 +// lsls r2,r2,r4 @ keep the bits we will shift off the bottom of r0 +// asrs r0,r0,r5 +//fa_0: +// adds r0,r1 @ result is now in r0:r2, possibly highly denormalised or zero; exponent in r3 +// beq fa_9 @ if zero, inputs must have been of identical magnitude and opposite sign, so return +0 +//fa_1: +// lsrs r1,r0,#31 @ sign bit +// beq fa_8 +// mvns r0,r0 +// rsbs r2,r2,#0 +// bne fa_8 +// adds r0,#1 +//fa_8: +// adds r6,r6 +//@ r6=0x01000000 +// cmp r0,r6 +// bhs fa_2 +//fa_3: +// adds r2,r2 @ normalisation loop +// adcs r0,r0 +// subs r3,#1 @ adjust exponent +// cmp r0,r6 +// blo fa_3 +//fa_2: +//@ here r0:r2 is the result mantissa 0x01000000<=r0<0x02000000, r3 the exponent, and r1 the sign bit +// lsrs r0,#1 +// bcc fa_4 +//@ rounding bits here are 1:r2 +// adds r0,#1 @ round up +// cmp r2,#0 +// beq fa_5 @ sticky bits all zero? +//fa_4: +// cmp r3,#254 +// bhs fa_6 @ exponent too large or negative? +// lsls r1,#31 @ pack everything +// add r0,r1 +// lsls r3,#23 +// add r0,r3 +//fa_end: +// pop {r4,r5,r6,r15} +// +//fa_9: +// cmp r2,#0 @ result zero? +// beq fa_end @ return +0 +// b fa_1 +// +//fa_5: +// lsrs r0,#1 +// lsls r0,#1 @ round to even +// b fa_4 +// +//fa_6: +// bge fa_7 +//@ underflow +//@ can handle denormals here +// lsls r0,r1,#31 @ result is signed zero +// pop {r4,r5,r6,r15} +//fa_7: +//@ overflow +// lsls r0,r1,#8 +// adds r0,#255 +// lsls r0,#23 @ result is signed infinity +// pop {r4,r5,r6,r15} +// +// +//fa_xe0: +//@ can handle denormals here +// subs r2,#32 +// adds r2,r4 @ exponent -32 for +Inf, -33 for -Inf +// b fa_xe +// +//fa_xe255: +//@ can handle NaNs here +// lsls r2,#8 +// add r2,r2,r4 @ exponent ~64k for +Inf, ~64k-1 for -Inf +// b fa_xe +// +//fa_ye0: +//@ can handle denormals here +// subs r3,#32 +// adds r3,r5 @ exponent -32 for +Inf, -33 for -Inf +// b fa_ye +// +//fa_ye255: +//@ can handle NaNs here +// lsls r3,#8 +// add r3,r3,r5 @ exponent ~64k for +Inf, ~64k-1 for -Inf +// b fa_ye +// )"); +//} +// +//__attribute__((naked, section(".data"))) +//inline float qfp_fmul_asm(float, float) +//{ +//asm(R"( +//.syntax unified +// push {r7,r14} +// mov r2,r0 +// eors r2,r1 @ sign of result +// lsrs r2,#31 +// lsls r2,#31 +// mov r14,r2 +// lsls r0,#1 +// lsls r1,#1 +// lsrs r2,r0,#24 @ xe +// beq fm_xe0 +// cmp r2,#255 +// beq fm_xe255 +//fm_xe: +// lsrs r3,r1,#24 @ ye +// beq fm_ye0 +// cmp r3,#255 +// beq fm_ye255 +//fm_ye: +// adds r7,r2,r3 @ exponent of result (will possibly be incremented) +// subs r7,#128 @ adjust bias for packing +// lsls r0,#8 @ x mantissa +// lsls r1,#8 @ y mantissa +// lsrs r0,#9 +// lsrs r1,#9 +// +// adds r2,r0,r1 @ for later +// mov r12,r2 +// lsrs r2,r0,#7 @ x[22..7] Q16 +// lsrs r3,r1,#7 @ y[22..7] Q16 +// muls r2,r2,r3 @ result [45..14] Q32: never an overestimate and worst case error is 2*(2^7-1)*(2^23-2^7)+(2^7-1)^2 = 2130690049 < 2^31 +// muls r0,r0,r1 @ result [31..0] Q46 +// lsrs r2,#18 @ result [45..32] Q14 +// bcc 1f +// cmp r0,#0 +// bmi 1f +// adds r2,#1 @ fix error in r2 +//1: +// lsls r3,r0,#9 @ bits off bottom of result +// lsrs r0,#23 @ Q23 +// lsls r2,#9 +// adds r0,r2 @ cut'n'shut +// add r0,r12 @ implied 1*(x+y) to compensate for no insertion of implied 1s +//@ result-1 in r3:r0 Q23+32, i.e., in range [0,3) +// +// lsrs r1,r0,#23 +// bne fm_0 @ branch if we need to shift down one place +//@ here 1<=result<2 +// cmp r7,#254 +// bhs fm_3a @ catches both underflow and overflow +// lsls r3,#1 @ sticky bits at top of R3, rounding bit in carry +// bcc fm_1 @ no rounding +// beq fm_2 @ rounding tie? +// adds r0,#1 @ round up +//fm_1: +// adds r7,#1 @ for implied 1 +// lsls r7,#23 @ pack result +// add r0,r7 +// add r0,r14 +// pop {r7,r15} +//fm_2: @ rounding tie +// adds r0,#1 +//fm_3: +// lsrs r0,#1 +// lsls r0,#1 @ clear bottom bit +// b fm_1 +// +//@ here 1<=result-1<3 +//fm_0: +// adds r7,#1 @ increment exponent +// cmp r7,#254 +// bhs fm_3b @ catches both underflow and overflow +// lsrs r0,#1 @ shift mantissa down +// bcc fm_1a @ no rounding +// adds r0,#1 @ assume we will round up +// cmp r3,#0 @ sticky bits +// beq fm_3c @ rounding tie? +//fm_1a: +// adds r7,r7 +// adds r7,#1 @ for implied 1 +// lsls r7,#22 @ pack result +// add r0,r7 +// add r0,r14 +// pop {r7,r15} +// +//fm_3c: +// lsrs r0,#1 +// lsls r0,#1 @ clear bottom bit +// b fm_1a +// +//fm_xe0: +// subs r2,#16 +//fm_xe255: +// lsls r2,#8 +// b fm_xe +//fm_ye0: +// subs r3,#16 +//fm_ye255: +// lsls r3,#8 +// b fm_ye +// +//@ here the result is under- or overflowing +//fm_3b: +// bge fm_4 @ branch on overflow +//@ trap case where result is denormal 0x007fffff + 0.5ulp or more +// adds r7,#1 @ exponent=-1? +// bne fm_5 +//@ corrected mantissa will be >= 3.FFFFFC (0x1fffffe Q23) +//@ so r0 >= 2.FFFFFC (0x17ffffe Q23) +// adds r0,#2 +// lsrs r0,#23 +// cmp r0,#3 +// bne fm_5 +// b fm_6 +// +//fm_3a: +// bge fm_4 @ branch on overflow +//@ trap case where result is denormal 0x007fffff + 0.5ulp or more +// adds r7,#1 @ exponent=-1? +// bne fm_5 +// adds r0,#1 @ mantissa=0xffffff (i.e., r0=0x7fffff)? +// lsrs r0,#23 +// beq fm_5 +//fm_6: +// movs r0,#1 @ return smallest normal +// lsls r0,#23 +// add r0,r14 +// pop {r7,r15} +// +//fm_5: +// mov r0,r14 +// pop {r7,r15} +//fm_4: +// movs r0,#0xff +// lsls r0,#23 +// add r0,r14 +// pop {r7,r15} +//)"); +//} +// +//__attribute__((naked, section(".data"))) +//inline float qfp_int2float_asm(int) +//{ +//asm(R"( +//.syntax unified +// movs r1,#0 @ fall through +// push {r4,r5,r6,r14} +// movs r2,#29 +// subs r2,r1 @ fix exponent +// movs r5,#0 +// bl qfp_int2float_packx +// pop {r4,r5,r6,r15} +//qfp_int2float_packx: +// lsrs r4,r0,#31 @ save sign bit +// lsls r4,r4,#31 @ sign now in b31 +// bpl 2f @ skip if positive +// cmp r5,#0 +// beq 11f +// adds r0,#1 @ fiddle carry in to following rsb if sticky bits are non-zero +//11: +// rsbs r0,#0 @ can now treat r0 as unsigned +// bmi 3f @ catch r0=0x80000000 case +//2: +// subs r2,#1 @ normalisation loop +// adds r0,r0 +// beq 1f @ zero? special case +// bpl 2b @ normalise so leading "1" in bit 31 +//3: +// adds r2,#129 @ (mis-)offset exponent +// bne 12f @ special case: highest denormal can round to lowest normal +// adds r0,#0x80 @ in special case, need to add 256 to r0 for rounding +// bcs 4f @ tripped carry? then have leading 1 in C as required +//12: +// adds r0,#0x80 @ rounding +// bcs 4f @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits) +// cmp r5,#0 +// beq 7f @ sticky bits zero? +//8: +// lsls r0,#1 @ remove leading 1 +//9: +// subs r2,#1 @ compensate exponent on this path +//4: +// cmp r2,#254 +// bge 5f @ overflow? +// adds r2,#1 @ correct exponent offset +// ble 10f @ denormal/underflow? +// lsrs r0,#9 @ align mantissa +// lsls r2,#23 @ align exponent +// orrs r0,r2 @ assemble exponent and mantissa +//6: +// orrs r0,r4 @ apply sign +//1: +// bx r14 +// +//5: +// movs r0,#0xff @ create infinity +// lsls r0,#23 +// b 6b +// +//10: +// movs r0,#0 @ create zero +// bx r14 +// +//7: @ sticky bit rounding case +// lsls r5,r0,#24 @ check bottom 8 bits of r0 +// bne 8b @ in rounding-tie case? +// lsrs r0,#9 @ ensure even result +// lsls r0,#10 +// b 9b +//)"); +//} + diff --git a/Makefile b/Makefile index c27f09e..49efb5e 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ TARGET = microphone # debug build? DEBUG = 1 # optimization -OPT = -Og +OPT = -O2 ####################################### @@ -61,7 +61,8 @@ Core/Src/syscalls.c # ASM sources ASM_SOURCES = \ -startup_stm32u083xx.s +startup_stm32u083xx.s \ +Drivers/qfplib-m0-full-20240105/qfplib-m0-full.s # ASM sources ASMM_SOURCES = @@ -121,7 +122,8 @@ C_INCLUDES = \ -IDrivers/STM32U0xx_HAL_Driver/Inc \ -IDrivers/STM32U0xx_HAL_Driver/Inc/Legacy \ -IDrivers/CMSIS/Device/ST/STM32U0xx/Include \ --IDrivers/CMSIS/Include +-IDrivers/CMSIS/Include \ +-IDrivers/qfplib-m0-full-20240105 # compile gcc flags