go os-less; filter in sram; consider dynamic freq

main
Clyne 3 months ago
parent 47cec84be1
commit 7fad01e939
Signed by: clyne
GPG Key ID: 3267C8EBF3F9AFC7

@ -5,7 +5,7 @@
# Compiler options here.
ifeq ($(USE_OPT),)
USE_OPT = -O3 -ggdb -fomit-frame-pointer -falign-functions=16 -fno-stack-protector
USE_OPT = -O3 -fomit-frame-pointer -falign-functions=16 -fno-stack-protector
endif
# C specific options here (added to USE_OPT).
@ -102,10 +102,11 @@ include $(CHIBIOS)/os/common/startup/ARMCMx/compilers/GCC/mk/startup_stm32g0xx.m
include $(CHIBIOS)/os/hal/hal.mk
include $(CHIBIOS)/os/hal/ports/STM32/STM32G0xx/platform.mk
#include $(CHIBIOS)/os/hal/boards/ST_NUCLEO64_G071RB/board.mk
include $(CHIBIOS)/os/hal/osal/rt-nil/osal.mk
#include $(CHIBIOS)/os/hal/osal/rt-nil/osal.mk
include $(CHIBIOS)/os/hal/osal/os-less/ARMCMx/osal.mk
# RTOS files (optional).
include $(CHIBIOS)/os/nil/nil.mk
include $(CHIBIOS)/os/common/ports/ARMv6-M/compilers/GCC/mk/port.mk
#include $(CHIBIOS)/os/nil/nil.mk
#include $(CHIBIOS)/os/common/ports/ARMv6-M/compilers/GCC/mk/port.mk
## Auto-build files in ./source recursively.
#include $(CHIBIOS)/tools/mk/autobuild.mk
## Other files (optional).

@ -27,7 +27,7 @@ MEMORY
flash5 (rx) : org = 0x00000000, len = 0
flash6 (rx) : org = 0x00000000, len = 0
flash7 (rx) : org = 0x00000000, len = 0
ram0 (wx) : org = 0x20000000, len = 8k
ram0 (rwx) : org = 0x20000000, len = 8k
ram1 (wx) : org = 0x00000000, len = 0
ram2 (wx) : org = 0x00000000, len = 0
ram3 (wx) : org = 0x00000000, len = 0
@ -79,7 +79,7 @@ REGION_ALIAS("DATA_RAM_LMA", flash0);
REGION_ALIAS("BSS_RAM", ram0);
/* RAM region to be used for the default heap.*/
REGION_ALIAS("HEAP_RAM", ram0);
REGION_ALIAS("HEAP_RAM", ram1);
/* Generic rules inclusion.*/
INCLUDE rules.ld

@ -48,7 +48,7 @@
* (0..CH_CFG_MAX_THREADS-1).
*/
#if !defined(CH_CFG_MAX_THREADS)
#define CH_CFG_MAX_THREADS 2
#define CH_CFG_MAX_THREADS 1
#endif
/**

@ -15,10 +15,10 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include "hal.h"
#include "ch.h"
#include "sos-iir-filter.h"
#include <algorithm>
#include <atomic>
#include <array>
#include <cstring>
#include <ranges>
@ -40,13 +40,11 @@ static constexpr unsigned I2S_STRIDE = 16;
static const auto MIC_REF_AMPL = sos_t((1 << (MIC_BITS - 1)) - 1) *
qfp_fpow(10.f, MIC_SENSITIVITY / 20.f);
static SEMAPHORE_DECL(i2sReady, 0);
static THD_WORKING_AREA(waThread1, 128);
static std::atomic_bool i2sReady;
static std::array<uint32_t, I2S_BUFSIZ> i2sBuffer;
static sos_t Leq_sum_sqr (0.f);
static unsigned Leq_samples = 0;
static THD_FUNCTION(Thread1, arg);
static void i2sCallback(I2SDriver *i2s);
static constexpr unsigned I2SPRval = 16'000'000 / SAMPLE_RATE / 32 / 2;
@ -62,23 +60,35 @@ static constexpr I2SConfig i2sConfig = {
/* I2SPR */ (I2SPRval / 2) | ((I2SPRval & 1) ? SPI_I2SPR_ODD : 0)
};
THD_TABLE_BEGIN
THD_TABLE_THREAD(0, "main", waThread1, Thread1, NULL)
THD_TABLE_END
//static const halclkcfg_t halClockDefault = {
// .pwr_cr1 = PWR_CR1_VOS_0,
// .pwr_cr2 = STM32_PWR_CR2,
// .rcc_cr = RCC_CR_PLLON | RCC_CR_HSION,
// .rcc_cfgr = (6 << RCC_CFGR_PPRE_Pos) | (1 << RCC_CFGR_HPRE_Pos) | RCC_CFGR_SW_PLL,
// .rcc_pllcfgr = (STM32_PLLR_VALUE << RCC_PLLCFGR_PLLR_Pos) | RCC_PLLCFGR_PLLREN |
// (STM32_PLLN_VALUE << RCC_PLLCFGR_PLLN_Pos) |
// ((STM32_PLLM_VALUE - 1) << RCC_PLLCFGR_PLLM_Pos) |
// RCC_PLLCFGR_PLLSRC_HSI,
// .flash_acr = FLASH_ACR_PRFTEN | FLASH_ACR_ICEN | (2 << FLASH_ACR_LATENCY_Pos)
//};
//
//static const halclkcfg_t halClockSleep = {
// .pwr_cr1 = PWR_CR1_VOS_0,
// .pwr_cr2 = STM32_PWR_CR2,
// .rcc_cr = RCC_CR_PLLON | RCC_CR_HSION,
// .rcc_cfgr = (0 << RCC_CFGR_PPRE_Pos) | (10 << RCC_CFGR_HPRE_Pos) | RCC_CFGR_SW_PLL,
// .rcc_pllcfgr = (STM32_PLLR_VALUE << RCC_PLLCFGR_PLLR_Pos) | RCC_PLLCFGR_PLLREN |
// (STM32_PLLN_VALUE << RCC_PLLCFGR_PLLN_Pos) |
// ((STM32_PLLM_VALUE - 1) << RCC_PLLCFGR_PLLM_Pos) |
// RCC_PLLCFGR_PLLSRC_HSI,
// .flash_acr = FLASH_ACR_PRFTEN | FLASH_ACR_ICEN | (2 << FLASH_ACR_LATENCY_Pos)
//};
int main(void)
{
halInit();
chSysInit();
for (;;)
asm("wfi");
}
THD_FUNCTION(Thread1, arg)
{
(void)arg;
chThdSleepMilliseconds(2000);
osalSysEnable();
osalThreadSleepMilliseconds(2000);
palSetPadMode(GPIOB, 7, PAL_MODE_OUTPUT_PUSHPULL);
palSetPadMode(GPIOF, 2, PAL_MODE_UNCONNECTED);
@ -89,16 +99,28 @@ THD_FUNCTION(Thread1, arg)
sdStart(&SD2, NULL);
sdWrite(&SD2, (uint8_t *)"Noisemeter\n", 11);
chThdSleepMilliseconds(100);
osalThreadSleepMilliseconds(2);
i2sStart(&I2SD1, &i2sConfig);
i2sStartExchange(&I2SD1);
i2sReady.store(false);
uint8_t strbuf[7] = { 0, 0, 0, 'd', 'B', '\n', '\0' };
for (;;) {
palSetPad(GPIOB, 7);
chSemWait(&i2sReady);
//if (halClockSwitchMode(&halClockSleep)) {
// sdWrite(&SD2, (uint8_t *)"sleepf\n", 7);
// osalThreadSleepMilliseconds(5000);
//}
while (!i2sReady.load())
asm("wfi");
i2sReady.store(false);
//if (halClockSwitchMode(&halClockDefault)) {
// sdWrite(&SD2, (uint8_t *)"sleepf\n", 7);
// osalThreadSleepMilliseconds(5000);
//}
palSetPad(GPIOB, 7);
const sos_t Leq_RMS = qfp_fsqrt(Leq_sum_sqr / qfp_uint2float(Leq_samples));
const sos_t Leq_dB = MIC_OFFSET_DB + MIC_REF_DB + sos_t(20.f) *
qfp_flog10(Leq_RMS / MIC_REF_AMPL);
@ -110,24 +132,29 @@ THD_FUNCTION(Thread1, arg)
strbuf[1] = n % 10 + '0'; n /= 10;
strbuf[0] = n ? n + '0' : ' ';
sdWrite(&SD2, strbuf, sizeof(strbuf));
osalThreadSleepMilliseconds(2);
palClearPad(GPIOB, 7);
}
}
__attribute__((section(".data")))
int32_t fixsample(uint32_t s) {
return (int32_t)(((s & 0xFFFF) << 16) | (s >> 16)) >> (32 - MIC_BITS);
}
__attribute__((section(".data")))
void i2sCallback(I2SDriver *i2s)
{
//halClockSwitchMode(&halClockDefault);
palSetPad(GPIOB, 7);
const auto halfsize = i2sBuffer.size() / 2;
const auto offset = i2sIsBufferComplete(i2s) ? halfsize : 0;
auto samples = reinterpret_cast<sos_t *>(i2sBuffer.data() + offset);
const auto source = i2sBuffer.data() + (i2sIsBufferComplete(i2s) ? halfsize : 0);
auto samples = reinterpret_cast<sos_t *>(source);
std::ranges::copy(
std::views::counted(i2sBuffer.begin() + offset, halfsize / I2S_STRIDE)
std::views::counted(source, halfsize / I2S_STRIDE)
| std::ranges::views::stride(2)
| std::views::transform([](uint32_t s) { return sos_t(qfp_int2float(fixsample(s))); }),
| std::views::transform([](uint32_t s) { return sos_t(qfp_int2float_asm(fixsample(s))); }),
samples);
auto samps = std::views::counted(samples, halfsize / (2 * I2S_STRIDE));
@ -138,8 +165,10 @@ void i2sCallback(I2SDriver *i2s)
// Wakeup main thread for dB calculation every second
if (Leq_samples >= SAMPLE_RATE / I2S_STRIDE) {
chSemSignalI(&i2sReady);
i2sReady.store(true);
}
palClearPad(GPIOB, 7);
//halClockSwitchMode(&halClockSleep);
}

@ -0,0 +1,68 @@
/*
ChibiOS - Copyright (C) 2006..2018 Giovanni Di Sirio
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/**
* @file templates/halconf.h
* @brief Bare-metal OSAL configuration header.
*
* @addtogroup OSAL_CONF
* @{
*/
#ifndef OSALCONF_H
#define OSALCONF_H
/**
* @brief Frequency in Hertz of the system tick.
*/
#if !defined(OSAL_ST_FREQUENCY) || defined(__DOXYGEN__)
#define OSAL_ST_FREQUENCY 20
#endif
/**
* @brief Enables OSAL assertions.
*/
#if !defined(OSAL_DBG_ENABLE_ASSERTS) || defined(__DOXYGEN__)
#define OSAL_DBG_ENABLE_ASSERTS FALSE
#endif
/**
* @brief Enables OSAL functions parameters checks.
*/
#if !defined(OSAL_DBG_ENABLE_CHECKS) || defined(__DOXYGEN__)
#define OSAL_DBG_ENABLE_CHECKS FALSE
#endif
/**
* @brief OSAL initialization hook.
*/
#if !defined(OSAL_INIT_HOOK) || defined(__DOXYGEN__)
#define OSAL_INIT_HOOK() { \
}
#endif
/**
* @brief Idle loop hook macro.
*/
#if !defined(OSAL_IDLE_HOOK) || defined(__DOXYGEN__)
#define OSAL_IDLE_HOOK() { \
asm("wfi"); \
}
#endif
#endif /* OSALCONF_H */
/** @} */

@ -0,0 +1,381 @@
inline float qfp_fpow(float b, float e)
{
return qfp_fexp(qfp_fmul(e, qfp_fln(b)));
}
inline float qfp_flog10(float x)
{
static const auto ln10 = qfp_fln(10.f);
return qfp_fdiv(qfp_fln(x), ln10);
}
__attribute__((naked, section(".data")))
inline float qfp_fadd_asm(float, float)
{
asm(R"(
.syntax unified
push {r4,r5,r6,r14}
asrs r4,r0,#31
lsls r2,r0,#1
lsrs r2,#24 @ x exponent
beq fa_xe0
cmp r2,#255
beq fa_xe255
fa_xe:
asrs r5,r1,#31
lsls r3,r1,#1
lsrs r3,#24 @ y exponent
beq fa_ye0
cmp r3,#255
beq fa_ye255
fa_ye:
ldr r6,=#0x007fffff
ands r0,r0,r6 @ extract mantissa bits
ands r1,r1,r6
adds r6,#1 @ r6=0x00800000
orrs r0,r0,r6 @ set implied 1
orrs r1,r1,r6
eors r0,r0,r4 @ complement...
eors r1,r1,r5
subs r0,r0,r4 @ ... and add 1 if sign bit is set: 2's complement
subs r1,r1,r5
subs r5,r3,r2 @ ye-xe
subs r4,r2,r3 @ xe-ye
bmi fa_ygtx
@ here xe>=ye
cmp r4,#30
bge fa_xmgty @ xe much greater than ye?
adds r5,#32
movs r3,r2 @ save exponent
@ here y in r1 must be shifted down r4 places to align with x in r0
movs r2,r1
lsls r2,r2,r5 @ keep the bits we will shift off the bottom of r1
asrs r1,r1,r4
b fa_0
.ltorg
fa_ymgtx:
movs r2,#0 @ result is just y
movs r0,r1
b fa_1
fa_xmgty:
movs r3,r2 @ result is just x
movs r2,#0
b fa_1
fa_ygtx:
@ here ye>xe
cmp r5,#30
bge fa_ymgtx @ ye much greater than xe?
adds r4,#32
@ here x in r0 must be shifted down r5 places to align with y in r1
movs r2,r0
lsls r2,r2,r4 @ keep the bits we will shift off the bottom of r0
asrs r0,r0,r5
fa_0:
adds r0,r1 @ result is now in r0:r2, possibly highly denormalised or zero; exponent in r3
beq fa_9 @ if zero, inputs must have been of identical magnitude and opposite sign, so return +0
fa_1:
lsrs r1,r0,#31 @ sign bit
beq fa_8
mvns r0,r0
rsbs r2,r2,#0
bne fa_8
adds r0,#1
fa_8:
adds r6,r6
@ r6=0x01000000
cmp r0,r6
bhs fa_2
fa_3:
adds r2,r2 @ normalisation loop
adcs r0,r0
subs r3,#1 @ adjust exponent
cmp r0,r6
blo fa_3
fa_2:
@ here r0:r2 is the result mantissa 0x01000000<=r0<0x02000000, r3 the exponent, and r1 the sign bit
lsrs r0,#1
bcc fa_4
@ rounding bits here are 1:r2
adds r0,#1 @ round up
cmp r2,#0
beq fa_5 @ sticky bits all zero?
fa_4:
cmp r3,#254
bhs fa_6 @ exponent too large or negative?
lsls r1,#31 @ pack everything
add r0,r1
lsls r3,#23
add r0,r3
fa_end:
pop {r4,r5,r6,r15}
fa_9:
cmp r2,#0 @ result zero?
beq fa_end @ return +0
b fa_1
fa_5:
lsrs r0,#1
lsls r0,#1 @ round to even
b fa_4
fa_6:
bge fa_7
@ underflow
@ can handle denormals here
lsls r0,r1,#31 @ result is signed zero
pop {r4,r5,r6,r15}
fa_7:
@ overflow
lsls r0,r1,#8
adds r0,#255
lsls r0,#23 @ result is signed infinity
pop {r4,r5,r6,r15}
fa_xe0:
@ can handle denormals here
subs r2,#32
adds r2,r4 @ exponent -32 for +Inf, -33 for -Inf
b fa_xe
fa_xe255:
@ can handle NaNs here
lsls r2,#8
add r2,r2,r4 @ exponent ~64k for +Inf, ~64k-1 for -Inf
b fa_xe
fa_ye0:
@ can handle denormals here
subs r3,#32
adds r3,r5 @ exponent -32 for +Inf, -33 for -Inf
b fa_ye
fa_ye255:
@ can handle NaNs here
lsls r3,#8
add r3,r3,r5 @ exponent ~64k for +Inf, ~64k-1 for -Inf
b fa_ye
)");
}
__attribute__((naked, section(".data")))
inline float qfp_fmul_asm(float, float)
{
asm(R"(
.syntax unified
push {r7,r14}
mov r2,r0
eors r2,r1 @ sign of result
lsrs r2,#31
lsls r2,#31
mov r14,r2
lsls r0,#1
lsls r1,#1
lsrs r2,r0,#24 @ xe
beq fm_xe0
cmp r2,#255
beq fm_xe255
fm_xe:
lsrs r3,r1,#24 @ ye
beq fm_ye0
cmp r3,#255
beq fm_ye255
fm_ye:
adds r7,r2,r3 @ exponent of result (will possibly be incremented)
subs r7,#128 @ adjust bias for packing
lsls r0,#8 @ x mantissa
lsls r1,#8 @ y mantissa
lsrs r0,#9
lsrs r1,#9
adds r2,r0,r1 @ for later
mov r12,r2
lsrs r2,r0,#7 @ x[22..7] Q16
lsrs r3,r1,#7 @ y[22..7] Q16
muls r2,r2,r3 @ result [45..14] Q32: never an overestimate and worst case error is 2*(2^7-1)*(2^23-2^7)+(2^7-1)^2 = 2130690049 < 2^31
muls r0,r0,r1 @ result [31..0] Q46
lsrs r2,#18 @ result [45..32] Q14
bcc 1f
cmp r0,#0
bmi 1f
adds r2,#1 @ fix error in r2
1:
lsls r3,r0,#9 @ bits off bottom of result
lsrs r0,#23 @ Q23
lsls r2,#9
adds r0,r2 @ cut'n'shut
add r0,r12 @ implied 1*(x+y) to compensate for no insertion of implied 1s
@ result-1 in r3:r0 Q23+32, i.e., in range [0,3)
lsrs r1,r0,#23
bne fm_0 @ branch if we need to shift down one place
@ here 1<=result<2
cmp r7,#254
bhs fm_3a @ catches both underflow and overflow
lsls r3,#1 @ sticky bits at top of R3, rounding bit in carry
bcc fm_1 @ no rounding
beq fm_2 @ rounding tie?
adds r0,#1 @ round up
fm_1:
adds r7,#1 @ for implied 1
lsls r7,#23 @ pack result
add r0,r7
add r0,r14
pop {r7,r15}
fm_2: @ rounding tie
adds r0,#1
fm_3:
lsrs r0,#1
lsls r0,#1 @ clear bottom bit
b fm_1
@ here 1<=result-1<3
fm_0:
adds r7,#1 @ increment exponent
cmp r7,#254
bhs fm_3b @ catches both underflow and overflow
lsrs r0,#1 @ shift mantissa down
bcc fm_1a @ no rounding
adds r0,#1 @ assume we will round up
cmp r3,#0 @ sticky bits
beq fm_3c @ rounding tie?
fm_1a:
adds r7,r7
adds r7,#1 @ for implied 1
lsls r7,#22 @ pack result
add r0,r7
add r0,r14
pop {r7,r15}
fm_3c:
lsrs r0,#1
lsls r0,#1 @ clear bottom bit
b fm_1a
fm_xe0:
subs r2,#16
fm_xe255:
lsls r2,#8
b fm_xe
fm_ye0:
subs r3,#16
fm_ye255:
lsls r3,#8
b fm_ye
@ here the result is under- or overflowing
fm_3b:
bge fm_4 @ branch on overflow
@ trap case where result is denormal 0x007fffff + 0.5ulp or more
adds r7,#1 @ exponent=-1?
bne fm_5
@ corrected mantissa will be >= 3.FFFFFC (0x1fffffe Q23)
@ so r0 >= 2.FFFFFC (0x17ffffe Q23)
adds r0,#2
lsrs r0,#23
cmp r0,#3
bne fm_5
b fm_6
fm_3a:
bge fm_4 @ branch on overflow
@ trap case where result is denormal 0x007fffff + 0.5ulp or more
adds r7,#1 @ exponent=-1?
bne fm_5
adds r0,#1 @ mantissa=0xffffff (i.e., r0=0x7fffff)?
lsrs r0,#23
beq fm_5
fm_6:
movs r0,#1 @ return smallest normal
lsls r0,#23
add r0,r14
pop {r7,r15}
fm_5:
mov r0,r14
pop {r7,r15}
fm_4:
movs r0,#0xff
lsls r0,#23
add r0,r14
pop {r7,r15}
)");
}
__attribute__((naked, section(".data")))
inline float qfp_int2float_asm(int)
{
asm(R"(
.syntax unified
movs r1,#0 @ fall through
push {r4,r5,r6,r14}
movs r2,#29
subs r2,r1 @ fix exponent
movs r5,#0
bl qfp_int2float_packx
pop {r4,r5,r6,r15}
qfp_int2float_packx:
lsrs r4,r0,#31 @ save sign bit
lsls r4,r4,#31 @ sign now in b31
bpl 2f @ skip if positive
cmp r5,#0
beq 11f
adds r0,#1 @ fiddle carry in to following rsb if sticky bits are non-zero
11:
rsbs r0,#0 @ can now treat r0 as unsigned
bmi 3f @ catch r0=0x80000000 case
2:
subs r2,#1 @ normalisation loop
adds r0,r0
beq 1f @ zero? special case
bpl 2b @ normalise so leading "1" in bit 31
3:
adds r2,#129 @ (mis-)offset exponent
bne 12f @ special case: highest denormal can round to lowest normal
adds r0,#0x80 @ in special case, need to add 256 to r0 for rounding
bcs 4f @ tripped carry? then have leading 1 in C as required
12:
adds r0,#0x80 @ rounding
bcs 4f @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits)
cmp r5,#0
beq 7f @ sticky bits zero?
8:
lsls r0,#1 @ remove leading 1
9:
subs r2,#1 @ compensate exponent on this path
4:
cmp r2,#254
bge 5f @ overflow?
adds r2,#1 @ correct exponent offset
ble 10f @ denormal/underflow?
lsrs r0,#9 @ align mantissa
lsls r2,#23 @ align exponent
orrs r0,r2 @ assemble exponent and mantissa
6:
orrs r0,r4 @ apply sign
1:
bx r14
5:
movs r0,#0xff @ create infinity
lsls r0,#23
b 6b
10:
movs r0,#0 @ create zero
bx r14
7: @ sticky bit rounding case
lsls r5,r0,#24 @ check bottom 8 bits of r0
bne 8b @ in rounding-tie case?
lsrs r0,#9 @ ensure even result
lsls r0,#10
b 9b
)");
}

@ -28,17 +28,7 @@
extern "C" {
#include <qfplib-m0-full.h>
}
float qfp_fpow(float b, float e)
{
return qfp_fexp(qfp_fmul(e, qfp_fln(b)));
}
float qfp_flog10(float x)
{
static const auto ln10 = qfp_fln(10.f);
return qfp_fdiv(qfp_fln(x), ln10);
#include "qfplib-port.h"
}
class sos_t
@ -49,7 +39,7 @@ public:
constexpr sos_t(float v_ = 0.f): v(v_) {}
sos_t operator+(auto x) const noexcept {
return qfp_fadd(v, x);
return qfp_fadd_asm(v, x);
}
sos_t operator-(const sos_t& o) const noexcept {
@ -57,7 +47,7 @@ public:
}
sos_t operator*(auto x) const noexcept {
return qfp_fmul(v, x);
return qfp_fmul_asm(v, x);
}
sos_t operator/(auto x) const noexcept {

Loading…
Cancel
Save