#include "qfplib-m0-full.h" __RAM_FUNC float qfp_fpow(float b, float e) { return qfp_fexp(qfp_fmul(e, qfp_fln(b))); } __RAM_FUNC float qfp_flog10(float x) { extern float ln10; return qfp_fdiv(qfp_fln(x), ln10); } //__attribute__((naked, section(".data"))) //inline float qfp_fadd_asm(float, float) //{ //asm(R"( //.syntax unified // push {r4,r5,r6,r14} // asrs r4,r0,#31 // lsls r2,r0,#1 // lsrs r2,#24 @ x exponent // beq fa_xe0 // cmp r2,#255 // beq fa_xe255 //fa_xe: // asrs r5,r1,#31 // lsls r3,r1,#1 // lsrs r3,#24 @ y exponent // beq fa_ye0 // cmp r3,#255 // beq fa_ye255 //fa_ye: // ldr r6,=#0x007fffff // ands r0,r0,r6 @ extract mantissa bits // ands r1,r1,r6 // adds r6,#1 @ r6=0x00800000 // orrs r0,r0,r6 @ set implied 1 // orrs r1,r1,r6 // eors r0,r0,r4 @ complement... // eors r1,r1,r5 // subs r0,r0,r4 @ ... and add 1 if sign bit is set: 2's complement // subs r1,r1,r5 // subs r5,r3,r2 @ ye-xe // subs r4,r2,r3 @ xe-ye // bmi fa_ygtx //@ here xe>=ye // cmp r4,#30 // bge fa_xmgty @ xe much greater than ye? // adds r5,#32 // movs r3,r2 @ save exponent //@ here y in r1 must be shifted down r4 places to align with x in r0 // movs r2,r1 // lsls r2,r2,r5 @ keep the bits we will shift off the bottom of r1 // asrs r1,r1,r4 // b fa_0 // //.ltorg // //fa_ymgtx: // movs r2,#0 @ result is just y // movs r0,r1 // b fa_1 //fa_xmgty: // movs r3,r2 @ result is just x // movs r2,#0 // b fa_1 // //fa_ygtx: //@ here ye>xe // cmp r5,#30 // bge fa_ymgtx @ ye much greater than xe? // adds r4,#32 //@ here x in r0 must be shifted down r5 places to align with y in r1 // movs r2,r0 // lsls r2,r2,r4 @ keep the bits we will shift off the bottom of r0 // asrs r0,r0,r5 //fa_0: // adds r0,r1 @ result is now in r0:r2, possibly highly denormalised or zero; exponent in r3 // beq fa_9 @ if zero, inputs must have been of identical magnitude and opposite sign, so return +0 //fa_1: // lsrs r1,r0,#31 @ sign bit // beq fa_8 // mvns r0,r0 // rsbs r2,r2,#0 // bne fa_8 // adds r0,#1 //fa_8: // adds r6,r6 //@ r6=0x01000000 // cmp r0,r6 // bhs fa_2 //fa_3: // adds r2,r2 @ normalisation loop // adcs r0,r0 // subs r3,#1 @ adjust exponent // cmp r0,r6 // blo fa_3 //fa_2: //@ here r0:r2 is the result mantissa 0x01000000<=r0<0x02000000, r3 the exponent, and r1 the sign bit // lsrs r0,#1 // bcc fa_4 //@ rounding bits here are 1:r2 // adds r0,#1 @ round up // cmp r2,#0 // beq fa_5 @ sticky bits all zero? //fa_4: // cmp r3,#254 // bhs fa_6 @ exponent too large or negative? // lsls r1,#31 @ pack everything // add r0,r1 // lsls r3,#23 // add r0,r3 //fa_end: // pop {r4,r5,r6,r15} // //fa_9: // cmp r2,#0 @ result zero? // beq fa_end @ return +0 // b fa_1 // //fa_5: // lsrs r0,#1 // lsls r0,#1 @ round to even // b fa_4 // //fa_6: // bge fa_7 //@ underflow //@ can handle denormals here // lsls r0,r1,#31 @ result is signed zero // pop {r4,r5,r6,r15} //fa_7: //@ overflow // lsls r0,r1,#8 // adds r0,#255 // lsls r0,#23 @ result is signed infinity // pop {r4,r5,r6,r15} // // //fa_xe0: //@ can handle denormals here // subs r2,#32 // adds r2,r4 @ exponent -32 for +Inf, -33 for -Inf // b fa_xe // //fa_xe255: //@ can handle NaNs here // lsls r2,#8 // add r2,r2,r4 @ exponent ~64k for +Inf, ~64k-1 for -Inf // b fa_xe // //fa_ye0: //@ can handle denormals here // subs r3,#32 // adds r3,r5 @ exponent -32 for +Inf, -33 for -Inf // b fa_ye // //fa_ye255: //@ can handle NaNs here // lsls r3,#8 // add r3,r3,r5 @ exponent ~64k for +Inf, ~64k-1 for -Inf // b fa_ye // )"); //} // //__attribute__((naked, section(".data"))) //inline float qfp_fmul_asm(float, float) //{ //asm(R"( //.syntax unified // push {r7,r14} // mov r2,r0 // eors r2,r1 @ sign of result // lsrs r2,#31 // lsls r2,#31 // mov r14,r2 // lsls r0,#1 // lsls r1,#1 // lsrs r2,r0,#24 @ xe // beq fm_xe0 // cmp r2,#255 // beq fm_xe255 //fm_xe: // lsrs r3,r1,#24 @ ye // beq fm_ye0 // cmp r3,#255 // beq fm_ye255 //fm_ye: // adds r7,r2,r3 @ exponent of result (will possibly be incremented) // subs r7,#128 @ adjust bias for packing // lsls r0,#8 @ x mantissa // lsls r1,#8 @ y mantissa // lsrs r0,#9 // lsrs r1,#9 // // adds r2,r0,r1 @ for later // mov r12,r2 // lsrs r2,r0,#7 @ x[22..7] Q16 // lsrs r3,r1,#7 @ y[22..7] Q16 // muls r2,r2,r3 @ result [45..14] Q32: never an overestimate and worst case error is 2*(2^7-1)*(2^23-2^7)+(2^7-1)^2 = 2130690049 < 2^31 // muls r0,r0,r1 @ result [31..0] Q46 // lsrs r2,#18 @ result [45..32] Q14 // bcc 1f // cmp r0,#0 // bmi 1f // adds r2,#1 @ fix error in r2 //1: // lsls r3,r0,#9 @ bits off bottom of result // lsrs r0,#23 @ Q23 // lsls r2,#9 // adds r0,r2 @ cut'n'shut // add r0,r12 @ implied 1*(x+y) to compensate for no insertion of implied 1s //@ result-1 in r3:r0 Q23+32, i.e., in range [0,3) // // lsrs r1,r0,#23 // bne fm_0 @ branch if we need to shift down one place //@ here 1<=result<2 // cmp r7,#254 // bhs fm_3a @ catches both underflow and overflow // lsls r3,#1 @ sticky bits at top of R3, rounding bit in carry // bcc fm_1 @ no rounding // beq fm_2 @ rounding tie? // adds r0,#1 @ round up //fm_1: // adds r7,#1 @ for implied 1 // lsls r7,#23 @ pack result // add r0,r7 // add r0,r14 // pop {r7,r15} //fm_2: @ rounding tie // adds r0,#1 //fm_3: // lsrs r0,#1 // lsls r0,#1 @ clear bottom bit // b fm_1 // //@ here 1<=result-1<3 //fm_0: // adds r7,#1 @ increment exponent // cmp r7,#254 // bhs fm_3b @ catches both underflow and overflow // lsrs r0,#1 @ shift mantissa down // bcc fm_1a @ no rounding // adds r0,#1 @ assume we will round up // cmp r3,#0 @ sticky bits // beq fm_3c @ rounding tie? //fm_1a: // adds r7,r7 // adds r7,#1 @ for implied 1 // lsls r7,#22 @ pack result // add r0,r7 // add r0,r14 // pop {r7,r15} // //fm_3c: // lsrs r0,#1 // lsls r0,#1 @ clear bottom bit // b fm_1a // //fm_xe0: // subs r2,#16 //fm_xe255: // lsls r2,#8 // b fm_xe //fm_ye0: // subs r3,#16 //fm_ye255: // lsls r3,#8 // b fm_ye // //@ here the result is under- or overflowing //fm_3b: // bge fm_4 @ branch on overflow //@ trap case where result is denormal 0x007fffff + 0.5ulp or more // adds r7,#1 @ exponent=-1? // bne fm_5 //@ corrected mantissa will be >= 3.FFFFFC (0x1fffffe Q23) //@ so r0 >= 2.FFFFFC (0x17ffffe Q23) // adds r0,#2 // lsrs r0,#23 // cmp r0,#3 // bne fm_5 // b fm_6 // //fm_3a: // bge fm_4 @ branch on overflow //@ trap case where result is denormal 0x007fffff + 0.5ulp or more // adds r7,#1 @ exponent=-1? // bne fm_5 // adds r0,#1 @ mantissa=0xffffff (i.e., r0=0x7fffff)? // lsrs r0,#23 // beq fm_5 //fm_6: // movs r0,#1 @ return smallest normal // lsls r0,#23 // add r0,r14 // pop {r7,r15} // //fm_5: // mov r0,r14 // pop {r7,r15} //fm_4: // movs r0,#0xff // lsls r0,#23 // add r0,r14 // pop {r7,r15} //)"); //} // //__attribute__((naked, section(".data"))) //inline float qfp_int2float_asm(int) //{ //asm(R"( //.syntax unified // movs r1,#0 @ fall through // push {r4,r5,r6,r14} // movs r2,#29 // subs r2,r1 @ fix exponent // movs r5,#0 // bl qfp_int2float_packx // pop {r4,r5,r6,r15} //qfp_int2float_packx: // lsrs r4,r0,#31 @ save sign bit // lsls r4,r4,#31 @ sign now in b31 // bpl 2f @ skip if positive // cmp r5,#0 // beq 11f // adds r0,#1 @ fiddle carry in to following rsb if sticky bits are non-zero //11: // rsbs r0,#0 @ can now treat r0 as unsigned // bmi 3f @ catch r0=0x80000000 case //2: // subs r2,#1 @ normalisation loop // adds r0,r0 // beq 1f @ zero? special case // bpl 2b @ normalise so leading "1" in bit 31 //3: // adds r2,#129 @ (mis-)offset exponent // bne 12f @ special case: highest denormal can round to lowest normal // adds r0,#0x80 @ in special case, need to add 256 to r0 for rounding // bcs 4f @ tripped carry? then have leading 1 in C as required //12: // adds r0,#0x80 @ rounding // bcs 4f @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits) // cmp r5,#0 // beq 7f @ sticky bits zero? //8: // lsls r0,#1 @ remove leading 1 //9: // subs r2,#1 @ compensate exponent on this path //4: // cmp r2,#254 // bge 5f @ overflow? // adds r2,#1 @ correct exponent offset // ble 10f @ denormal/underflow? // lsrs r0,#9 @ align mantissa // lsls r2,#23 @ align exponent // orrs r0,r2 @ assemble exponent and mantissa //6: // orrs r0,r4 @ apply sign //1: // bx r14 // //5: // movs r0,#0xff @ create infinity // lsls r0,#23 // b 6b // //10: // movs r0,#0 @ create zero // bx r14 // //7: @ sticky bit rounding case // lsls r5,r0,#24 @ check bottom 8 bits of r0 // bne 8b @ in rounding-tie case? // lsrs r0,#9 @ ensure even result // lsls r0,#10 // b 9b //)"); //}