diff options
Diffstat (limited to 'Drivers/qfplib-m0-full-20240105/qfplib-port.h')
-rw-r--r-- | Drivers/qfplib-m0-full-20240105/qfplib-port.h | 382 |
1 files changed, 382 insertions, 0 deletions
diff --git a/Drivers/qfplib-m0-full-20240105/qfplib-port.h b/Drivers/qfplib-m0-full-20240105/qfplib-port.h new file mode 100644 index 0000000..4d57810 --- /dev/null +++ b/Drivers/qfplib-m0-full-20240105/qfplib-port.h @@ -0,0 +1,382 @@ +#include "qfplib-m0-full.h" + +float qfp_fpow(float b, float e) +{ + return qfp_fexp(qfp_fmul(e, qfp_fln(b))); +} + +float qfp_flog10(float x) +{ + extern float ln10; + return qfp_fdiv(qfp_fln(x), ln10); +} + +//__attribute__((naked, section(".data"))) +//inline float qfp_fadd_asm(float, float) +//{ +//asm(R"( +//.syntax unified +// push {r4,r5,r6,r14} +// asrs r4,r0,#31 +// lsls r2,r0,#1 +// lsrs r2,#24 @ x exponent +// beq fa_xe0 +// cmp r2,#255 +// beq fa_xe255 +//fa_xe: +// asrs r5,r1,#31 +// lsls r3,r1,#1 +// lsrs r3,#24 @ y exponent +// beq fa_ye0 +// cmp r3,#255 +// beq fa_ye255 +//fa_ye: +// ldr r6,=#0x007fffff +// ands r0,r0,r6 @ extract mantissa bits +// ands r1,r1,r6 +// adds r6,#1 @ r6=0x00800000 +// orrs r0,r0,r6 @ set implied 1 +// orrs r1,r1,r6 +// eors r0,r0,r4 @ complement... +// eors r1,r1,r5 +// subs r0,r0,r4 @ ... and add 1 if sign bit is set: 2's complement +// subs r1,r1,r5 +// subs r5,r3,r2 @ ye-xe +// subs r4,r2,r3 @ xe-ye +// bmi fa_ygtx +//@ here xe>=ye +// cmp r4,#30 +// bge fa_xmgty @ xe much greater than ye? +// adds r5,#32 +// movs r3,r2 @ save exponent +//@ here y in r1 must be shifted down r4 places to align with x in r0 +// movs r2,r1 +// lsls r2,r2,r5 @ keep the bits we will shift off the bottom of r1 +// asrs r1,r1,r4 +// b fa_0 +// +//.ltorg +// +//fa_ymgtx: +// movs r2,#0 @ result is just y +// movs r0,r1 +// b fa_1 +//fa_xmgty: +// movs r3,r2 @ result is just x +// movs r2,#0 +// b fa_1 +// +//fa_ygtx: +//@ here ye>xe +// cmp r5,#30 +// bge fa_ymgtx @ ye much greater than xe? +// adds r4,#32 +//@ here x in r0 must be shifted down r5 places to align with y in r1 +// movs r2,r0 +// lsls r2,r2,r4 @ keep the bits we will shift off the bottom of r0 +// asrs r0,r0,r5 +//fa_0: +// adds r0,r1 @ result is now in r0:r2, possibly highly denormalised or zero; exponent in r3 +// beq fa_9 @ if zero, inputs must have been of identical magnitude and opposite sign, so return +0 +//fa_1: +// lsrs r1,r0,#31 @ sign bit +// beq fa_8 +// mvns r0,r0 +// rsbs r2,r2,#0 +// bne fa_8 +// adds r0,#1 +//fa_8: +// adds r6,r6 +//@ r6=0x01000000 +// cmp r0,r6 +// bhs fa_2 +//fa_3: +// adds r2,r2 @ normalisation loop +// adcs r0,r0 +// subs r3,#1 @ adjust exponent +// cmp r0,r6 +// blo fa_3 +//fa_2: +//@ here r0:r2 is the result mantissa 0x01000000<=r0<0x02000000, r3 the exponent, and r1 the sign bit +// lsrs r0,#1 +// bcc fa_4 +//@ rounding bits here are 1:r2 +// adds r0,#1 @ round up +// cmp r2,#0 +// beq fa_5 @ sticky bits all zero? +//fa_4: +// cmp r3,#254 +// bhs fa_6 @ exponent too large or negative? +// lsls r1,#31 @ pack everything +// add r0,r1 +// lsls r3,#23 +// add r0,r3 +//fa_end: +// pop {r4,r5,r6,r15} +// +//fa_9: +// cmp r2,#0 @ result zero? +// beq fa_end @ return +0 +// b fa_1 +// +//fa_5: +// lsrs r0,#1 +// lsls r0,#1 @ round to even +// b fa_4 +// +//fa_6: +// bge fa_7 +//@ underflow +//@ can handle denormals here +// lsls r0,r1,#31 @ result is signed zero +// pop {r4,r5,r6,r15} +//fa_7: +//@ overflow +// lsls r0,r1,#8 +// adds r0,#255 +// lsls r0,#23 @ result is signed infinity +// pop {r4,r5,r6,r15} +// +// +//fa_xe0: +//@ can handle denormals here +// subs r2,#32 +// adds r2,r4 @ exponent -32 for +Inf, -33 for -Inf +// b fa_xe +// +//fa_xe255: +//@ can handle NaNs here +// lsls r2,#8 +// add r2,r2,r4 @ exponent ~64k for +Inf, ~64k-1 for -Inf +// b fa_xe +// +//fa_ye0: +//@ can handle denormals here +// subs r3,#32 +// adds r3,r5 @ exponent -32 for +Inf, -33 for -Inf +// b fa_ye +// +//fa_ye255: +//@ can handle NaNs here +// lsls r3,#8 +// add r3,r3,r5 @ exponent ~64k for +Inf, ~64k-1 for -Inf +// b fa_ye +// )"); +//} +// +//__attribute__((naked, section(".data"))) +//inline float qfp_fmul_asm(float, float) +//{ +//asm(R"( +//.syntax unified +// push {r7,r14} +// mov r2,r0 +// eors r2,r1 @ sign of result +// lsrs r2,#31 +// lsls r2,#31 +// mov r14,r2 +// lsls r0,#1 +// lsls r1,#1 +// lsrs r2,r0,#24 @ xe +// beq fm_xe0 +// cmp r2,#255 +// beq fm_xe255 +//fm_xe: +// lsrs r3,r1,#24 @ ye +// beq fm_ye0 +// cmp r3,#255 +// beq fm_ye255 +//fm_ye: +// adds r7,r2,r3 @ exponent of result (will possibly be incremented) +// subs r7,#128 @ adjust bias for packing +// lsls r0,#8 @ x mantissa +// lsls r1,#8 @ y mantissa +// lsrs r0,#9 +// lsrs r1,#9 +// +// adds r2,r0,r1 @ for later +// mov r12,r2 +// lsrs r2,r0,#7 @ x[22..7] Q16 +// lsrs r3,r1,#7 @ y[22..7] Q16 +// muls r2,r2,r3 @ result [45..14] Q32: never an overestimate and worst case error is 2*(2^7-1)*(2^23-2^7)+(2^7-1)^2 = 2130690049 < 2^31 +// muls r0,r0,r1 @ result [31..0] Q46 +// lsrs r2,#18 @ result [45..32] Q14 +// bcc 1f +// cmp r0,#0 +// bmi 1f +// adds r2,#1 @ fix error in r2 +//1: +// lsls r3,r0,#9 @ bits off bottom of result +// lsrs r0,#23 @ Q23 +// lsls r2,#9 +// adds r0,r2 @ cut'n'shut +// add r0,r12 @ implied 1*(x+y) to compensate for no insertion of implied 1s +//@ result-1 in r3:r0 Q23+32, i.e., in range [0,3) +// +// lsrs r1,r0,#23 +// bne fm_0 @ branch if we need to shift down one place +//@ here 1<=result<2 +// cmp r7,#254 +// bhs fm_3a @ catches both underflow and overflow +// lsls r3,#1 @ sticky bits at top of R3, rounding bit in carry +// bcc fm_1 @ no rounding +// beq fm_2 @ rounding tie? +// adds r0,#1 @ round up +//fm_1: +// adds r7,#1 @ for implied 1 +// lsls r7,#23 @ pack result +// add r0,r7 +// add r0,r14 +// pop {r7,r15} +//fm_2: @ rounding tie +// adds r0,#1 +//fm_3: +// lsrs r0,#1 +// lsls r0,#1 @ clear bottom bit +// b fm_1 +// +//@ here 1<=result-1<3 +//fm_0: +// adds r7,#1 @ increment exponent +// cmp r7,#254 +// bhs fm_3b @ catches both underflow and overflow +// lsrs r0,#1 @ shift mantissa down +// bcc fm_1a @ no rounding +// adds r0,#1 @ assume we will round up +// cmp r3,#0 @ sticky bits +// beq fm_3c @ rounding tie? +//fm_1a: +// adds r7,r7 +// adds r7,#1 @ for implied 1 +// lsls r7,#22 @ pack result +// add r0,r7 +// add r0,r14 +// pop {r7,r15} +// +//fm_3c: +// lsrs r0,#1 +// lsls r0,#1 @ clear bottom bit +// b fm_1a +// +//fm_xe0: +// subs r2,#16 +//fm_xe255: +// lsls r2,#8 +// b fm_xe +//fm_ye0: +// subs r3,#16 +//fm_ye255: +// lsls r3,#8 +// b fm_ye +// +//@ here the result is under- or overflowing +//fm_3b: +// bge fm_4 @ branch on overflow +//@ trap case where result is denormal 0x007fffff + 0.5ulp or more +// adds r7,#1 @ exponent=-1? +// bne fm_5 +//@ corrected mantissa will be >= 3.FFFFFC (0x1fffffe Q23) +//@ so r0 >= 2.FFFFFC (0x17ffffe Q23) +// adds r0,#2 +// lsrs r0,#23 +// cmp r0,#3 +// bne fm_5 +// b fm_6 +// +//fm_3a: +// bge fm_4 @ branch on overflow +//@ trap case where result is denormal 0x007fffff + 0.5ulp or more +// adds r7,#1 @ exponent=-1? +// bne fm_5 +// adds r0,#1 @ mantissa=0xffffff (i.e., r0=0x7fffff)? +// lsrs r0,#23 +// beq fm_5 +//fm_6: +// movs r0,#1 @ return smallest normal +// lsls r0,#23 +// add r0,r14 +// pop {r7,r15} +// +//fm_5: +// mov r0,r14 +// pop {r7,r15} +//fm_4: +// movs r0,#0xff +// lsls r0,#23 +// add r0,r14 +// pop {r7,r15} +//)"); +//} +// +//__attribute__((naked, section(".data"))) +//inline float qfp_int2float_asm(int) +//{ +//asm(R"( +//.syntax unified +// movs r1,#0 @ fall through +// push {r4,r5,r6,r14} +// movs r2,#29 +// subs r2,r1 @ fix exponent +// movs r5,#0 +// bl qfp_int2float_packx +// pop {r4,r5,r6,r15} +//qfp_int2float_packx: +// lsrs r4,r0,#31 @ save sign bit +// lsls r4,r4,#31 @ sign now in b31 +// bpl 2f @ skip if positive +// cmp r5,#0 +// beq 11f +// adds r0,#1 @ fiddle carry in to following rsb if sticky bits are non-zero +//11: +// rsbs r0,#0 @ can now treat r0 as unsigned +// bmi 3f @ catch r0=0x80000000 case +//2: +// subs r2,#1 @ normalisation loop +// adds r0,r0 +// beq 1f @ zero? special case +// bpl 2b @ normalise so leading "1" in bit 31 +//3: +// adds r2,#129 @ (mis-)offset exponent +// bne 12f @ special case: highest denormal can round to lowest normal +// adds r0,#0x80 @ in special case, need to add 256 to r0 for rounding +// bcs 4f @ tripped carry? then have leading 1 in C as required +//12: +// adds r0,#0x80 @ rounding +// bcs 4f @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits) +// cmp r5,#0 +// beq 7f @ sticky bits zero? +//8: +// lsls r0,#1 @ remove leading 1 +//9: +// subs r2,#1 @ compensate exponent on this path +//4: +// cmp r2,#254 +// bge 5f @ overflow? +// adds r2,#1 @ correct exponent offset +// ble 10f @ denormal/underflow? +// lsrs r0,#9 @ align mantissa +// lsls r2,#23 @ align exponent +// orrs r0,r2 @ assemble exponent and mantissa +//6: +// orrs r0,r4 @ apply sign +//1: +// bx r14 +// +//5: +// movs r0,#0xff @ create infinity +// lsls r0,#23 +// b 6b +// +//10: +// movs r0,#0 @ create zero +// bx r14 +// +//7: @ sticky bit rounding case +// lsls r5,r0,#24 @ check bottom 8 bits of r0 +// bne 8b @ in rounding-tie case? +// lsrs r0,#9 @ ensure even result +// lsls r0,#10 +// b 9b +//)"); +//} + |