From 1b176cf6cd75c8031a140961655cdd3c16589a68 Mon Sep 17 00:00:00 2001 From: Clyne Sullivan <clyne@bitgloo.com> Date: Sun, 9 Jan 2022 12:28:19 -0500 Subject: small changes; sig gen square(), triangle(), pulse() --- examples/4_fir_pro.cpp | 478 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 478 insertions(+) create mode 100644 examples/4_fir_pro.cpp (limited to 'examples/4_fir_pro.cpp') diff --git a/examples/4_fir_pro.cpp b/examples/4_fir_pro.cpp new file mode 100644 index 0000000..b1a6832 --- /dev/null +++ b/examples/4_fir_pro.cpp @@ -0,0 +1,478 @@ +#include <cstdint> +using float32_t = float; + +typedef struct +{ + uint16_t numTaps; /**< number of filter coefficients in the filter. */ + float32_t *pState; /**< points to the state variable array. The array is of length numTaps+blockSize-1. */ + float32_t *pCoeffs; /**< points to the coefficient array. The array is of length numTaps. */ +} arm_fir_instance_f32; + +static void arm_fir_f32(const arm_fir_instance_f32 * S, float32_t * pSrc, float32_t * pDst, uint32_t blockSize); + +Sample *process_data(Samples samples) +{ + // 1. Define our array sizes (Be sure to set Run > Set buffer size... to below value!) + constexpr unsigned int buffer_size = 500; + constexpr unsigned int filter_size = 100; + + // 2. Define our filter and the working arrays + static float filter[filter_size] = { + .01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f, + .01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f, + .01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f, + .01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f, + .01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f, + .01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f, + .01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f, + .01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f, + .01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f, + .01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f,.01f + }; + static float input[buffer_size]; + static float output[buffer_size]; + static float working[buffer_size + filter_size]; + + // 3. Scale 0-4095 interger sample values to +/- 1.0 floats + for (unsigned int i = 0; i < samples.size(); i++) + input[i] = (samples[i] - 2048) / 2048.f; + + // 4. Compute the FIR + arm_fir_instance_f32 fir { filter_size, working, filter }; + arm_fir_f32(&fir, input, output, samples.size()); + + // 5. Convert float results back to 0-4095 range for output + for (unsigned int i = 0; i < samples.size(); i++) + samples[i] = output[i] * 2048.f + 2048; + + return samples.data(); +} + +// Below taken from the CMSIS DSP Library (find it on GitHub) +void arm_fir_f32( + const arm_fir_instance_f32 * S, + float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize) +{ + float32_t *pState = S->pState; /* State pointer */ + float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + float32_t *pStateCurnt; /* Points to the current sample of the state */ + float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */ + float32_t acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7; /* Accumulators */ + float32_t x0, x1, x2, x3, x4, x5, x6, x7, c0; /* Temporary variables to hold state and coefficient values */ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + uint32_t i, tapCnt, blkCnt; /* Loop counters */ + float32_t p0,p1,p2,p3,p4,p5,p6,p7; /* Temporary product values */ + + /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = &(S->pState[(numTaps - 1u)]); + + /* Apply loop unrolling and compute 8 output values simultaneously. + * The variables acc0 ... acc7 hold output values that are being computed: + * + * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] + * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1] + * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2] + * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3] + */ + blkCnt = blockSize >> 3; + + /* First part of the processing with loop unrolling. Compute 8 outputs at a time. + ** a second loop below computes the remaining 1 to 7 samples. */ + while(blkCnt > 0u) + { + /* Copy four new input samples into the state buffer */ + *pStateCurnt++ = *pSrc++; + *pStateCurnt++ = *pSrc++; + *pStateCurnt++ = *pSrc++; + *pStateCurnt++ = *pSrc++; + + /* Set all accumulators to zero */ + acc0 = 0.0f; + acc1 = 0.0f; + acc2 = 0.0f; + acc3 = 0.0f; + acc4 = 0.0f; + acc5 = 0.0f; + acc6 = 0.0f; + acc7 = 0.0f; + + /* Initialize state pointer */ + px = pState; + + /* Initialize coeff pointer */ + pb = (pCoeffs); + + /* This is separated from the others to avoid + * a call to __aeabi_memmove which would be slower + */ + *pStateCurnt++ = *pSrc++; + *pStateCurnt++ = *pSrc++; + *pStateCurnt++ = *pSrc++; + *pStateCurnt++ = *pSrc++; + + /* Read the first seven samples from the state buffer: x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */ + x0 = *px++; + x1 = *px++; + x2 = *px++; + x3 = *px++; + x4 = *px++; + x5 = *px++; + x6 = *px++; + + /* Loop unrolling. Process 8 taps at a time. */ + tapCnt = numTaps >> 3u; + + /* Loop over the number of taps. Unroll by a factor of 8. + ** Repeat until we've computed numTaps-8 coefficients. */ + while(tapCnt > 0u) + { + /* Read the b[numTaps-1] coefficient */ + c0 = *(pb++); + + /* Read x[n-numTaps-3] sample */ + x7 = *(px++); + + /* acc0 += b[numTaps-1] * x[n-numTaps] */ + p0 = x0 * c0; + + /* acc1 += b[numTaps-1] * x[n-numTaps-1] */ + p1 = x1 * c0; + + /* acc2 += b[numTaps-1] * x[n-numTaps-2] */ + p2 = x2 * c0; + + /* acc3 += b[numTaps-1] * x[n-numTaps-3] */ + p3 = x3 * c0; + + /* acc4 += b[numTaps-1] * x[n-numTaps-4] */ + p4 = x4 * c0; + + /* acc1 += b[numTaps-1] * x[n-numTaps-5] */ + p5 = x5 * c0; + + /* acc2 += b[numTaps-1] * x[n-numTaps-6] */ + p6 = x6 * c0; + + /* acc3 += b[numTaps-1] * x[n-numTaps-7] */ + p7 = x7 * c0; + + /* Read the b[numTaps-2] coefficient */ + c0 = *(pb++); + + /* Read x[n-numTaps-4] sample */ + x0 = *(px++); + + acc0 += p0; + acc1 += p1; + acc2 += p2; + acc3 += p3; + acc4 += p4; + acc5 += p5; + acc6 += p6; + acc7 += p7; + + + /* Perform the multiply-accumulate */ + p0 = x1 * c0; + p1 = x2 * c0; + p2 = x3 * c0; + p3 = x4 * c0; + p4 = x5 * c0; + p5 = x6 * c0; + p6 = x7 * c0; + p7 = x0 * c0; + + /* Read the b[numTaps-3] coefficient */ + c0 = *(pb++); + + /* Read x[n-numTaps-5] sample */ + x1 = *(px++); + + acc0 += p0; + acc1 += p1; + acc2 += p2; + acc3 += p3; + acc4 += p4; + acc5 += p5; + acc6 += p6; + acc7 += p7; + + /* Perform the multiply-accumulates */ + p0 = x2 * c0; + p1 = x3 * c0; + p2 = x4 * c0; + p3 = x5 * c0; + p4 = x6 * c0; + p5 = x7 * c0; + p6 = x0 * c0; + p7 = x1 * c0; + + /* Read the b[numTaps-4] coefficient */ + c0 = *(pb++); + + /* Read x[n-numTaps-6] sample */ + x2 = *(px++); + + acc0 += p0; + acc1 += p1; + acc2 += p2; + acc3 += p3; + acc4 += p4; + acc5 += p5; + acc6 += p6; + acc7 += p7; + + /* Perform the multiply-accumulates */ + p0 = x3 * c0; + p1 = x4 * c0; + p2 = x5 * c0; + p3 = x6 * c0; + p4 = x7 * c0; + p5 = x0 * c0; + p6 = x1 * c0; + p7 = x2 * c0; + + /* Read the b[numTaps-4] coefficient */ + c0 = *(pb++); + + /* Read x[n-numTaps-6] sample */ + x3 = *(px++); + + acc0 += p0; + acc1 += p1; + acc2 += p2; + acc3 += p3; + acc4 += p4; + acc5 += p5; + acc6 += p6; + acc7 += p7; + + /* Perform the multiply-accumulates */ + p0 = x4 * c0; + p1 = x5 * c0; + p2 = x6 * c0; + p3 = x7 * c0; + p4 = x0 * c0; + p5 = x1 * c0; + p6 = x2 * c0; + p7 = x3 * c0; + + /* Read the b[numTaps-4] coefficient */ + c0 = *(pb++); + + /* Read x[n-numTaps-6] sample */ + x4 = *(px++); + + acc0 += p0; + acc1 += p1; + acc2 += p2; + acc3 += p3; + acc4 += p4; + acc5 += p5; + acc6 += p6; + acc7 += p7; + + /* Perform the multiply-accumulates */ + p0 = x5 * c0; + p1 = x6 * c0; + p2 = x7 * c0; + p3 = x0 * c0; + p4 = x1 * c0; + p5 = x2 * c0; + p6 = x3 * c0; + p7 = x4 * c0; + + /* Read the b[numTaps-4] coefficient */ + c0 = *(pb++); + + /* Read x[n-numTaps-6] sample */ + x5 = *(px++); + + acc0 += p0; + acc1 += p1; + acc2 += p2; + acc3 += p3; + acc4 += p4; + acc5 += p5; + acc6 += p6; + acc7 += p7; + + /* Perform the multiply-accumulates */ + p0 = x6 * c0; + p1 = x7 * c0; + p2 = x0 * c0; + p3 = x1 * c0; + p4 = x2 * c0; + p5 = x3 * c0; + p6 = x4 * c0; + p7 = x5 * c0; + + /* Read the b[numTaps-4] coefficient */ + c0 = *(pb++); + + /* Read x[n-numTaps-6] sample */ + x6 = *(px++); + + acc0 += p0; + acc1 += p1; + acc2 += p2; + acc3 += p3; + acc4 += p4; + acc5 += p5; + acc6 += p6; + acc7 += p7; + + /* Perform the multiply-accumulates */ + p0 = x7 * c0; + p1 = x0 * c0; + p2 = x1 * c0; + p3 = x2 * c0; + p4 = x3 * c0; + p5 = x4 * c0; + p6 = x5 * c0; + p7 = x6 * c0; + + tapCnt--; + + acc0 += p0; + acc1 += p1; + acc2 += p2; + acc3 += p3; + acc4 += p4; + acc5 += p5; + acc6 += p6; + acc7 += p7; + } + + /* If the filter length is not a multiple of 8, compute the remaining filter taps */ + tapCnt = numTaps % 0x8u; + + while(tapCnt > 0u) + { + /* Read coefficients */ + c0 = *(pb++); + + /* Fetch 1 state variable */ + x7 = *(px++); + + /* Perform the multiply-accumulates */ + p0 = x0 * c0; + p1 = x1 * c0; + p2 = x2 * c0; + p3 = x3 * c0; + p4 = x4 * c0; + p5 = x5 * c0; + p6 = x6 * c0; + p7 = x7 * c0; + + /* Reuse the present sample states for next sample */ + x0 = x1; + x1 = x2; + x2 = x3; + x3 = x4; + x4 = x5; + x5 = x6; + x6 = x7; + + acc0 += p0; + acc1 += p1; + acc2 += p2; + acc3 += p3; + acc4 += p4; + acc5 += p5; + acc6 += p6; + acc7 += p7; + + /* Decrement the loop counter */ + tapCnt--; + } + + /* Advance the state pointer by 8 to process the next group of 8 samples */ + pState = pState + 8; + + /* The results in the 8 accumulators, store in the destination buffer. */ + *pDst++ = acc0; + *pDst++ = acc1; + *pDst++ = acc2; + *pDst++ = acc3; + *pDst++ = acc4; + *pDst++ = acc5; + *pDst++ = acc6; + *pDst++ = acc7; + + blkCnt--; + } + + /* If the blockSize is not a multiple of 8, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x8u; + + while(blkCnt > 0u) + { + /* Copy one sample at a time into state buffer */ + *pStateCurnt++ = *pSrc++; + + /* Set the accumulator to zero */ + acc0 = 0.0f; + + /* Initialize state pointer */ + px = pState; + + /* Initialize Coefficient pointer */ + pb = (pCoeffs); + + i = numTaps; + + /* Perform the multiply-accumulates */ + do + { + acc0 += *px++ * *pb++; + i--; + + } while(i > 0u); + + /* The result is store in the destination buffer. */ + *pDst++ = acc0; + + /* Advance state pointer by 1 for the next sample */ + pState = pState + 1; + + blkCnt--; + } + + /* Processing is complete. + ** Now copy the last numTaps - 1 samples to the start of the state buffer. + ** This prepares the state buffer for the next function call. */ + + /* Points to the start of the state buffer */ + pStateCurnt = S->pState; + + tapCnt = (numTaps - 1u) >> 2u; + + /* copy data */ + while(tapCnt > 0u) + { + *pStateCurnt++ = *pState++; + *pStateCurnt++ = *pState++; + *pStateCurnt++ = *pState++; + *pStateCurnt++ = *pState++; + + /* Decrement the loop counter */ + tapCnt--; + } + + /* Calculate remaining number of copies */ + tapCnt = (numTaps - 1u) % 0x4u; + + /* Copy the remaining q31_t data */ + while(tapCnt > 0u) + { + *pStateCurnt++ = *pState++; + + /* Decrement the loop counter */ + tapCnt--; + } +} -- cgit v1.2.3 From f211f9628854b417000192c59d6ab22b946119b1 Mon Sep 17 00:00:00 2001 From: Clyne Sullivan <clyne@bitgloo.com> Date: Sun, 22 May 2022 13:37:49 -0400 Subject: make helper funcs inline; drop std::span for algo --- examples/1_convolve_simple.cpp | 9 +++--- examples/2_convolve_overlap_save.cpp | 8 ++--- examples/3_fir.cpp | 8 ++--- examples/4_fir_pro.cpp | 10 +++---- examples/5_fir_differentiator.cpp | 8 ++--- examples/6_iir_test.cpp | 18 ++++++++--- examples/7_iir_echo.cpp | 16 +++++++--- source/stmdsp/stmdsp_code.hpp | 58 ++++++++++++++++++------------------ 8 files changed, 77 insertions(+), 58 deletions(-) (limited to 'examples/4_fir_pro.cpp') diff --git a/examples/1_convolve_simple.cpp b/examples/1_convolve_simple.cpp index 8de05d3..95877f1 100644 --- a/examples/1_convolve_simple.cpp +++ b/examples/1_convolve_simple.cpp @@ -7,10 +7,10 @@ * transient response is not calculated. */ -Sample *process_data(Samples samples) +Sample* process_data(Samples samples) { - // Define our output buffer. SIZE is the largest size of the 'samples' buffer. - static Sample buffer[samples.size()]; + // Define our output buffer. + static Samples buffer; // Define our filter constexpr unsigned int filter_size = 3; @@ -19,7 +19,8 @@ Sample *process_data(Samples samples) }; // Begin convolving: - for (int n = 0; n < samples.size() - (filter_size - 1); n++) { + // SIZE is the size of the sample buffer. + for (int n = 0; n < SIZE - (filter_size - 1); n++) { buffer[n] = 0; for (int k = 0; k < filter_size; k++) buffer[n] += samples[n + k] * filter[k]; diff --git a/examples/2_convolve_overlap_save.cpp b/examples/2_convolve_overlap_save.cpp index 57c020a..5651f3e 100644 --- a/examples/2_convolve_overlap_save.cpp +++ b/examples/2_convolve_overlap_save.cpp @@ -11,9 +11,9 @@ * computation. */ -Sample *process_data(Samples samples) +Sample* process_data(Samples samples) { - static Sample buffer[samples.size()]; + static Samples buffer; constexpr unsigned int filter_size = 3; float filter[filter_size] = { @@ -23,7 +23,7 @@ Sample *process_data(Samples samples) // Keep a buffer of extra samples for overlap-save static Sample prev[filter_size]; - for (int n = 0; n < samples.size(); n++) { + for (int n = 0; n < SIZE; n++) { buffer[n] = 0; for (int k = 0; k < filter_size; k++) { @@ -40,7 +40,7 @@ Sample *process_data(Samples samples) // Save samples for the next convolution run for (int i = 0; i < filter_size; i++) - prev[i] = samples[samples.size() - filter_size + i]; + prev[i] = samples[SIZE - filter_size + i]; return buffer; } diff --git a/examples/3_fir.cpp b/examples/3_fir.cpp index 3a68500..b6d8751 100644 --- a/examples/3_fir.cpp +++ b/examples/3_fir.cpp @@ -7,9 +7,9 @@ * within the available execution time. Samples are also normalized so that they center around zero. */ -Sample *process_data(Samples samples) +Sample* process_data(Samples samples) { - static Sample buffer[samples.size()]; + static Samples buffer; // Define the filter: constexpr unsigned int filter_size = 3; @@ -21,7 +21,7 @@ Sample *process_data(Samples samples) // Do an overlap-save convolution static Sample prev[filter_size]; - for (int n = 0; n < samples.size(); n++) { + for (int n = 0; n < SIZE; n++) { // Using a float variable for accumulation allows for better code optimization float v = 0; @@ -40,7 +40,7 @@ Sample *process_data(Samples samples) // Save samples for next convolution for (int i = 0; i < filter_size; i++) - prev[i] = samples[samples.size() - filter_size + i]; + prev[i] = samples[SIZE - filter_size + i]; return buffer; } diff --git a/examples/4_fir_pro.cpp b/examples/4_fir_pro.cpp index b1a6832..1771cd5 100644 --- a/examples/4_fir_pro.cpp +++ b/examples/4_fir_pro.cpp @@ -10,7 +10,7 @@ typedef struct static void arm_fir_f32(const arm_fir_instance_f32 * S, float32_t * pSrc, float32_t * pDst, uint32_t blockSize); -Sample *process_data(Samples samples) +Sample* process_data(Samples samples) { // 1. Define our array sizes (Be sure to set Run > Set buffer size... to below value!) constexpr unsigned int buffer_size = 500; @@ -34,18 +34,18 @@ Sample *process_data(Samples samples) static float working[buffer_size + filter_size]; // 3. Scale 0-4095 interger sample values to +/- 1.0 floats - for (unsigned int i = 0; i < samples.size(); i++) + for (unsigned int i = 0; i < SIZE; i++) input[i] = (samples[i] - 2048) / 2048.f; // 4. Compute the FIR arm_fir_instance_f32 fir { filter_size, working, filter }; - arm_fir_f32(&fir, input, output, samples.size()); + arm_fir_f32(&fir, input, output, SIZE); // 5. Convert float results back to 0-4095 range for output - for (unsigned int i = 0; i < samples.size(); i++) + for (unsigned int i = 0; i < SIZE; i++) samples[i] = output[i] * 2048.f + 2048; - return samples.data(); + return samples; } // Below taken from the CMSIS DSP Library (find it on GitHub) diff --git a/examples/5_fir_differentiator.cpp b/examples/5_fir_differentiator.cpp index 72415c6..1500dee 100644 --- a/examples/5_fir_differentiator.cpp +++ b/examples/5_fir_differentiator.cpp @@ -7,23 +7,23 @@ * A scaling factor is applied so that the output's form is more clearly visible. */ -Sample *process_data(Samples samples) +Sample* process_data(Samples samples) { constexpr int scaling_factor = 4; - static Sample output[samples.size()]; + static Samples output; static Sample prev = 2048; // Compute the first output value using the saved sample. output[0] = 2048 + ((samples[0] - prev) * scaling_factor); - for (unsigned int i = 1; i < samples.size(); i++) { + for (unsigned int i = 1; i < SIZE; i++) { // Take the rate of change and scale it. // 2048 is added as the output should be centered in the voltage range. output[i] = 2048 + ((samples[i] - samples[i - 1]) * scaling_factor); } // Save the last sample for the next iteration. - prev = samples[samples.size() - 1]; + prev = samples[SIZE - 1]; return output; } diff --git a/examples/6_iir_test.cpp b/examples/6_iir_test.cpp index 116a680..e0b266d 100644 --- a/examples/6_iir_test.cpp +++ b/examples/6_iir_test.cpp @@ -1,13 +1,23 @@ -Sample *process_data(Samples samples) +/** + * 6_iir_test.cpp + * Written by Clyne Sullivan. + * + * Implements a simple infinite impulse response (IIR) filter using an alpha + * parameter. + * To build upon this example, try setting `alpha` with a parameter knob: + * alpha = param1() / 4095.0 + */ + +Sample* process_data(Samples samples) { constexpr float alpha = 0.7; static Sample prev = 2048; samples[0] = (1 - alpha) * samples[0] + alpha * prev; - for (unsigned int i = 1; i < samples.size(); i++) + for (unsigned int i = 1; i < SIZE; i++) samples[i] = (1 - alpha) * samples[i] + alpha * samples[i - 1]; - prev = samples[samples.size() - 1]; + prev = samples[SIZE - 1]; - return samples.data(); + return samples; } diff --git a/examples/7_iir_echo.cpp b/examples/7_iir_echo.cpp index 57e5605..75bf56e 100644 --- a/examples/7_iir_echo.cpp +++ b/examples/7_iir_echo.cpp @@ -1,9 +1,17 @@ -Sample *process_data(Samples samples) +/** + * 7_iir_echo.cpp + * Written by Clyne Sullivan. + * + * This filter produces an echo of the given input. There are two parameters: + * alpha controls the feedback gain, and D controls the echo/delay length. + */ + +Sample* process_data(Samples samples) { constexpr float alpha = 0.75; constexpr unsigned int D = 100; - static Sample output[samples.size()]; + static Samples output; static Sample prev[D]; // prev[0] = output[0 - D] // Do calculations with previous output @@ -11,12 +19,12 @@ Sample *process_data(Samples samples) output[i] = samples[i] + alpha * (prev[i] - 2048); // Do calculations with current samples - for (unsigned int i = D; i < samples.size(); i++) + for (unsigned int i = D; i < SIZE; i++) output[i] = samples[i] + alpha * (output[i - D] - 2048); // Save outputs for next computation for (unsigned int i = 0; i < D; i++) - prev[i] = output[samples.size() - (D - i)]; + prev[i] = output[SIZE - (D - i)]; return output; } diff --git a/source/stmdsp/stmdsp_code.hpp b/source/stmdsp/stmdsp_code.hpp index 6850459..7ba0ed2 100644 --- a/source/stmdsp/stmdsp_code.hpp +++ b/source/stmdsp/stmdsp_code.hpp @@ -118,67 +118,67 @@ return s; )cpp"; static std::string file_header_l4 = R"cpp( #include <cstdint> -#include <span> using Sample = uint16_t; -using Samples = std::span<Sample, $0>; +using Samples = Sample[$0]; +constexpr unsigned int SIZE = $0; Sample *process_data(Samples samples); extern "C" void process_data_entry() { Sample *samples; asm("mov %0, r0" : "=r" (samples)); - process_data(Samples(samples, $0)); + process_data(samples); } -static float PI = 3.14159265358979L; +static inline float PI = 3.14159265358979L; __attribute__((naked)) -auto sin(float x) { -asm("vmov.f32 r1, s0;" +static inline auto sin(float x) { + asm("vmov.f32 r1, s0;" "eor r0, r0;" "svc 1;" "vmov.f32 s0, r1;" "bx lr"); -return 0; + return 0; } __attribute__((naked)) -auto cos(float x) { -asm("vmov.f32 r1, s0;" +static inline auto cos(float x) { + asm("vmov.f32 r1, s0;" "mov r0, #1;" "svc 1;" "vmov.f32 s0, r1;" "bx lr"); -return 0; + return 0; } __attribute__((naked)) -auto tan(float x) { -asm("vmov.f32 r1, s0;" +static inline auto tan(float x) { + asm("vmov.f32 r1, s0;" "mov r0, #2;" "svc 1;" "vmov.f32 s0, r1;" "bx lr"); -return 0; + return 0; } __attribute__((naked)) -auto sqrt(float) { -asm("vsqrt.f32 s0, s0; bx lr"); -return 0; +static inline auto sqrt(float) { + asm("vsqrt.f32 s0, s0; bx lr"); + return 0; } -auto readpot1() { -Sample s; -asm("push {r4-r11}; eor r0, r0; svc 3; mov %0, r0; pop {r4-r11}" : "=r"(s)); -return s; +static inline auto param1() { + Sample s; + asm("eor r0, r0; svc 3; mov %0, r0" : "=r" (s) :: "r0"); + return s; } -auto readpot2() { -Sample s; -asm("push {r4-r11}; mov r0, #1; svc 3; mov %0, r0; pop {r4-r11}" : "=r"(s)); -return s; +static inline auto param2() { + Sample s; + asm("mov r0, #1; svc 3; mov %0, r0" : "=r" (s) :: "r0"); + return s; } -//void puts(const char *s) { -// 's' will already be in r0. -//asm("push {r4-r6}; svc 4; pop {r4-r6}"); +//static inline void puts(const char *s) { +// // 's' will already be in r0. +// asm("push {r4-r6}; svc 4; pop {r4-r6}"); //} // End stmdspgui header code @@ -187,9 +187,9 @@ return s; static std::string file_content = -R"cpp(Sample *process_data(Samples samples) +R"cpp(Sample* process_data(Samples samples) { - return samples.data(); + return samples; } )cpp"; -- cgit v1.2.3