initial upload
parent
3541f3df14
commit
abaea884db
@ -0,0 +1,2 @@
|
|||||||
|
all: main.cpp
|
||||||
|
g++ main.cpp -std=c++20 -lSDL2 -lpthread -lOpenCL -g3 -ggdb -O0
|
@ -1,2 +1,20 @@
|
|||||||
# happy-fractal
|
# happy-fractal
|
||||||
A study of efficient and precise fractal rendering.
|
|
||||||
|
This is a casual project investigating Mandelbrot rendering. It began with the task of rendering the fractal, then achieving a smooth framerate (60+ FPS); now, I am substituting custom data types to allow for deeper zooms.
|
||||||
|
|
||||||
|
This project is written in modern C++, using SDL2 for rendering. OpenCL has been adopted for GPU-accelerated rendering.
|
||||||
|
|
||||||
|
The [r128](https://github.com/fahickman/r128) library was modified to provide a Q4.124 fixed-point data type. Rendering is slow, but more precise than native `double`. With Q4.124, we can get very close to the precision of [XaoS](https://xaos-project.github.io/), which I believe uses [80-bit extended-precision floating-point](https://en.wikipedia.org/wiki/Extended_precision#x86_extended_precision_format).
|
||||||
|
|
||||||
|
In the future, this may either see optimization for faster and smoother Q4.124 rendering, or a further increase in precision.
|
||||||
|
|
||||||
|
## Controls, notes
|
||||||
|
|
||||||
|
Point your mouse to where you would like to zoom from. Left click to zoom in, right click to zoom out.
|
||||||
|
|
||||||
|
The scroll wheel adjusts the zoom speed.
|
||||||
|
|
||||||
|
The source code has a BENCHMARK flag, which times an automated zoom to a given point.
|
||||||
|
|
||||||
|
To switch the source code between `double` and Q4.124, change the `Float` type accordingly and set the appropriate OpenCL kernel (in `main()`, add or remove the `_r128` suffix).
|
||||||
|
|
||||||
|
@ -0,0 +1,509 @@
|
|||||||
|
/**
|
||||||
|
* happy-fractal - A study of efficient and precise fractal rendering.
|
||||||
|
* Copyright (C) 2022 Clyne Sullivan
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// If defined, program auto-zooms and measures runtime.
|
||||||
|
//#define BENCHMARK
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
#include <chrono>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstring>
|
||||||
|
#include <fstream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <iostream>
|
||||||
|
#include <memory>
|
||||||
|
#include <sstream>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <thread>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#define CL_HPP_TARGET_OPENCL_VERSION (300)
|
||||||
|
#define CL_HPP_ENABLE_EXCEPTIONS (1)
|
||||||
|
#include <CL/opencl.hpp>
|
||||||
|
#include <SDL2/SDL.h>
|
||||||
|
|
||||||
|
// The "Float" type determines what data type will store numbers for calculations.
|
||||||
|
// Can use native float or double; or, a custom Q4.124 fixed-point data type.
|
||||||
|
// If fixed point, use "*_r128.c" OpenCL kernel. Otherwise, use regular kernel.
|
||||||
|
|
||||||
|
#define R128_IMPLEMENTATION
|
||||||
|
#include "r128.h"
|
||||||
|
using Float = R128;
|
||||||
|
|
||||||
|
//using Float = double;
|
||||||
|
|
||||||
|
// Sets the window's dimensions. The window is square.
|
||||||
|
constexpr static int WIN_DIM = 800;
|
||||||
|
|
||||||
|
// Not allowed to calculate less iterations than this.
|
||||||
|
constexpr uint32_t MIN_MAX_ITERATIONS = 70;
|
||||||
|
// Not allowed to zoom out farther than this.
|
||||||
|
static const Float MIN_ZOOM (4.0);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A packed Float-pair for storing complex numbers.
|
||||||
|
* Must match a vector type for OpenCL.
|
||||||
|
*/
|
||||||
|
struct Complex {
|
||||||
|
Float real = Float(0);
|
||||||
|
Float imag = Float(0);
|
||||||
|
} __attribute__ ((packed));
|
||||||
|
|
||||||
|
class MandelbrotState
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
// Initializes data and spawns the calculation thread.
|
||||||
|
MandelbrotState();
|
||||||
|
// Joins threads.
|
||||||
|
~MandelbrotState();
|
||||||
|
|
||||||
|
// Prepares to use the given OpenCL kernel for calculations.
|
||||||
|
void initKernel(cl::Context& clcontext, cl::Program& clprogram, const char *kernelname);
|
||||||
|
|
||||||
|
Float zoom() const;
|
||||||
|
|
||||||
|
// Offsets the view's origin by the given Complex, and changes zoom by the given factor.
|
||||||
|
// Returns true if a new calculation has been scheduled (false if one is in progress).
|
||||||
|
bool moveOriginAndZoomBy(Complex c, Float z);
|
||||||
|
|
||||||
|
// Outputs the results of the latest calculation into the given SDL texture.
|
||||||
|
// Returns true if successful.
|
||||||
|
// Returns false if a calculation is in progress. The texture is not updated.
|
||||||
|
bool intoTexture(SDL_Texture *texture);
|
||||||
|
// Requests the initiation of a new calculation.
|
||||||
|
void scheduleRecalculation();
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::thread m_calc_thread;
|
||||||
|
std::atomic_bool m_calcing; // If false, we're ready for a new calculation.
|
||||||
|
std::atomic_flag m_recalc; // Tell calcThread to recalc, or tell main thread recalcing is done.
|
||||||
|
uint32_t m_max_iterations;
|
||||||
|
Float m_zoom;
|
||||||
|
Complex m_origin;
|
||||||
|
|
||||||
|
std::unique_ptr<cl::Kernel> m_cl_kernel;
|
||||||
|
std::unique_ptr<cl::CommandQueue> m_cl_queue;
|
||||||
|
std::unique_ptr<cl::Buffer> m_cl_input;
|
||||||
|
std::unique_ptr<cl::Buffer> m_cl_output;
|
||||||
|
|
||||||
|
// Enters main loop of calcThread.
|
||||||
|
void calcThread();
|
||||||
|
// Calls the OpenCL kernel to compute new results.
|
||||||
|
void calculateBitmap();
|
||||||
|
|
||||||
|
// Determine the max iteration count based on zoom factor.
|
||||||
|
static uint32_t calculateMaxIterations(Float zoom);
|
||||||
|
};
|
||||||
|
|
||||||
|
static bool done = false;
|
||||||
|
static std::atomic_int fps = 0;
|
||||||
|
static std::chrono::time_point<std::chrono::high_resolution_clock> clTime;
|
||||||
|
|
||||||
|
static cl::Context initCLContext();
|
||||||
|
static cl::Program initCLProgram(cl::Context&, const char * const);
|
||||||
|
static void initSDL(SDL_Window **, SDL_Renderer **, SDL_Texture **);
|
||||||
|
static void threadFpsMonitor(MandelbrotState&);
|
||||||
|
static void threadEventMonitor(MandelbrotState&);
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
MandelbrotState Mandelbrot;
|
||||||
|
SDL_Window *window;
|
||||||
|
SDL_Renderer *renderer;
|
||||||
|
SDL_Texture *MandelbrotTexture;
|
||||||
|
|
||||||
|
initSDL(&window, &renderer, &MandelbrotTexture);
|
||||||
|
|
||||||
|
std::ifstream clSource ("opencl/mandelbrot_calc_r128.c");
|
||||||
|
if (!clSource.good())
|
||||||
|
throw std::runtime_error("Failed to open OpenCL kernel!");
|
||||||
|
|
||||||
|
// Dump OpenCL kernel into a std::string.
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << clSource.rdbuf();
|
||||||
|
std::string clSourceStr (oss.str());
|
||||||
|
|
||||||
|
auto clContext = initCLContext();
|
||||||
|
auto clProgram = initCLProgram(clContext, clSourceStr.data());
|
||||||
|
Mandelbrot.initKernel(clContext, clProgram, "mandelbrot_calc");
|
||||||
|
|
||||||
|
// Initiate first calculation so something appears on the screen.
|
||||||
|
Mandelbrot.scheduleRecalculation();
|
||||||
|
|
||||||
|
std::thread fpsMonitor ([&Mandelbrot] { threadFpsMonitor(Mandelbrot); });
|
||||||
|
std::thread eventMonitor ([&Mandelbrot] { threadEventMonitor(Mandelbrot); });
|
||||||
|
|
||||||
|
#ifdef BENCHMARK
|
||||||
|
auto start = std::chrono::high_resolution_clock::now();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
while (!done) {
|
||||||
|
if (Mandelbrot.intoTexture(MandelbrotTexture)) {
|
||||||
|
SDL_RenderClear(renderer);
|
||||||
|
SDL_RenderCopy(renderer, MandelbrotTexture, nullptr, nullptr);
|
||||||
|
SDL_RenderPresent(renderer);
|
||||||
|
|
||||||
|
++fps;
|
||||||
|
} else {
|
||||||
|
std::this_thread::sleep_for(std::chrono::microseconds(10));
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef BENCHMARK
|
||||||
|
if (Mandelbrot.zoom() < Float(1e-5))
|
||||||
|
done = true;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef BENCHMARK
|
||||||
|
std::chrono::duration<double> seconds = std::chrono::high_resolution_clock::now() - start;
|
||||||
|
std::cout << "Calculations took: " << seconds.count() << "s" << std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
eventMonitor.join();
|
||||||
|
fpsMonitor.join();
|
||||||
|
SDL_DestroyRenderer(renderer);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static cl::Platform clplatform;
|
||||||
|
static std::vector<cl::Device> cldevices;
|
||||||
|
|
||||||
|
cl::Context initCLContext()
|
||||||
|
{
|
||||||
|
clplatform = cl::Platform::getDefault();
|
||||||
|
clplatform.getDevices(CL_DEVICE_TYPE_GPU, &cldevices);
|
||||||
|
|
||||||
|
return cl::Context(cldevices.front());
|
||||||
|
}
|
||||||
|
|
||||||
|
cl::Program initCLProgram(cl::Context& clcontext, const char * const source)
|
||||||
|
{
|
||||||
|
cl::Program *prog;
|
||||||
|
|
||||||
|
try {
|
||||||
|
prog = new cl::Program(clcontext, source);
|
||||||
|
prog->build();
|
||||||
|
return *prog;
|
||||||
|
} catch (const cl::Error& err) {
|
||||||
|
const auto& dev = cldevices.front();
|
||||||
|
std::cout << "Build Log: " << prog->getBuildInfo<CL_PROGRAM_BUILD_LOG>(dev) << std::endl;
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void initSDL(SDL_Window **window, SDL_Renderer **renderer, SDL_Texture **texture)
|
||||||
|
{
|
||||||
|
/* Enable standard application logging */
|
||||||
|
SDL_LogSetPriority(SDL_LOG_CATEGORY_APPLICATION, SDL_LOG_PRIORITY_INFO);
|
||||||
|
|
||||||
|
if (SDL_Init(SDL_INIT_VIDEO) < 0) {
|
||||||
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't initialize SDL: %s\n", SDL_GetError());
|
||||||
|
throw std::runtime_error("initSDL failed!");
|
||||||
|
}
|
||||||
|
|
||||||
|
*window = SDL_CreateWindow("Happy Mandelbrot",
|
||||||
|
SDL_WINDOWPOS_UNDEFINED,
|
||||||
|
SDL_WINDOWPOS_UNDEFINED,
|
||||||
|
WIN_DIM, WIN_DIM,
|
||||||
|
SDL_WINDOW_RESIZABLE);
|
||||||
|
if (!*window) {
|
||||||
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't set create window: %s\n", SDL_GetError());
|
||||||
|
throw std::runtime_error("initSDL failed!");
|
||||||
|
}
|
||||||
|
|
||||||
|
*renderer = SDL_CreateRenderer(*window, -1, 0);
|
||||||
|
if (!*renderer) {
|
||||||
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't set create renderer: %s\n", SDL_GetError());
|
||||||
|
throw std::runtime_error("initSDL failed!");
|
||||||
|
}
|
||||||
|
|
||||||
|
SDL_GL_SetSwapInterval(0);
|
||||||
|
|
||||||
|
*texture = SDL_CreateTexture(*renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, WIN_DIM, WIN_DIM);
|
||||||
|
if (!*texture) {
|
||||||
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't set create texture: %s\n", SDL_GetError());
|
||||||
|
throw std::runtime_error("initSDL failed!");
|
||||||
|
}
|
||||||
|
|
||||||
|
atexit(SDL_Quit);
|
||||||
|
}
|
||||||
|
|
||||||
|
void threadFpsMonitor(MandelbrotState& Mandelbrot)
|
||||||
|
{
|
||||||
|
while (!done) {
|
||||||
|
std::cout << "Rendered FPS: " << fps.load() << ", Z: " << (double)Mandelbrot.zoom() << std::endl;
|
||||||
|
fps.store(0);
|
||||||
|
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void threadEventMonitor(MandelbrotState& Mandelbrot)
|
||||||
|
{
|
||||||
|
Float zfactor (1.03);
|
||||||
|
Float zooming (1);
|
||||||
|
Complex newoffset;
|
||||||
|
|
||||||
|
while (!done) {
|
||||||
|
auto next = std::chrono::steady_clock::now() + std::chrono::milliseconds(17);
|
||||||
|
|
||||||
|
for (SDL_Event event; SDL_PollEvent(&event);) {
|
||||||
|
switch (event.type) {
|
||||||
|
case SDL_KEYDOWN:
|
||||||
|
if (event.key.keysym.sym == SDLK_ESCAPE)
|
||||||
|
done = true;
|
||||||
|
break;
|
||||||
|
case SDL_MOUSEBUTTONDOWN:
|
||||||
|
// Calculate desired "normal" change from origin. -0.5 to 0.5.
|
||||||
|
// Zoom scales this result later.
|
||||||
|
newoffset = Complex {
|
||||||
|
Float(event.button.x / (double)WIN_DIM) - Float(0.5),
|
||||||
|
Float(event.button.y / (double)WIN_DIM) - Float(0.5)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Zoom in with left button, zoom out with right.
|
||||||
|
if (event.button.button == SDL_BUTTON_LEFT)
|
||||||
|
zooming *= Float(1) - (zfactor - Float(1));
|
||||||
|
else if (event.button.button == SDL_BUTTON_RIGHT)
|
||||||
|
zooming = zfactor;
|
||||||
|
break;
|
||||||
|
case SDL_MOUSEBUTTONUP:
|
||||||
|
// Stop moving.
|
||||||
|
zooming = 1;
|
||||||
|
newoffset.real = 0;
|
||||||
|
newoffset.imag = 0;
|
||||||
|
break;
|
||||||
|
case SDL_MOUSEMOTION:
|
||||||
|
// Update offset on mouse movement, so zoom continues towards where the user expects.
|
||||||
|
if (zooming != Float(1)) {
|
||||||
|
newoffset.real += Float(event.motion.xrel / (double)WIN_DIM);
|
||||||
|
newoffset.imag += Float(event.motion.yrel / (double)WIN_DIM);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SDL_MOUSEWHEEL:
|
||||||
|
// Increase zoom factor with scroll up, or decrease with scroll down.
|
||||||
|
zfactor = std::max(Float(1), zfactor + Float(0.005) * Float(event.wheel.y));
|
||||||
|
|
||||||
|
// Update zoom speed if zfactor is changed while zooming.
|
||||||
|
if (zooming != Float(1)) {
|
||||||
|
if (zooming < Float(1))
|
||||||
|
zooming *= Float(1) - (zfactor - Float(1));
|
||||||
|
else
|
||||||
|
zooming = zfactor;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SDL_QUIT:
|
||||||
|
done = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef BENCHMARK
|
||||||
|
// Constant zoom on a constant point.
|
||||||
|
bool b = Mandelbrot.moveOriginAndZoomBy({}, Float(1) - (zfactor - Float(1)));
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(b ? 8 : 1)); // max 125fps
|
||||||
|
#else
|
||||||
|
if (zooming != Float(1) || newoffset.real != Float(0) || newoffset.imag != Float(0)) {
|
||||||
|
// Scale new offset according to zoom.
|
||||||
|
const auto zoom = Mandelbrot.zoom();
|
||||||
|
Complex c = newoffset;
|
||||||
|
c.real *= zoom * (Float(1) - zooming);
|
||||||
|
c.imag *= zoom * (Float(1) - zooming);
|
||||||
|
|
||||||
|
// Don't sleep as long if a recalculation is already running, so
|
||||||
|
// we can get our new request in sooner once recalculation completes.
|
||||||
|
if (Mandelbrot.moveOriginAndZoomBy(c, zooming))
|
||||||
|
std::this_thread::sleep_until(next);
|
||||||
|
else
|
||||||
|
std::this_thread::sleep_for(std::chrono::microseconds(50));
|
||||||
|
} else {
|
||||||
|
std::this_thread::sleep_until(next);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MandelbrotState::MandelbrotState():
|
||||||
|
m_calcing(false),
|
||||||
|
m_max_iterations(MIN_MAX_ITERATIONS),
|
||||||
|
m_zoom(MIN_ZOOM)
|
||||||
|
{
|
||||||
|
#ifndef BENCHMARK
|
||||||
|
// This is a good starting point.
|
||||||
|
m_origin.real = -1;
|
||||||
|
m_origin.imag = 0;
|
||||||
|
#else
|
||||||
|
m_origin.real = -1.5;
|
||||||
|
m_origin.imag = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Spawn calcThread to begin receiving recalculation requests.
|
||||||
|
m_recalc.clear();
|
||||||
|
m_calc_thread = std::thread([this] { calcThread(); });
|
||||||
|
}
|
||||||
|
|
||||||
|
MandelbrotState::~MandelbrotState() {
|
||||||
|
// calcThread is likely waiting for m_recalc to become true.
|
||||||
|
m_recalc.test_and_set();
|
||||||
|
m_recalc.notify_all();
|
||||||
|
|
||||||
|
// Bring calcThread in if it's still running.
|
||||||
|
if (m_calc_thread.joinable())
|
||||||
|
m_calc_thread.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
void MandelbrotState::initKernel(cl::Context& clcontext, cl::Program& clprogram, const char *kernelname)
|
||||||
|
{
|
||||||
|
m_cl_kernel.reset(new cl::Kernel(clprogram, "mandelbrot_calc"));
|
||||||
|
m_cl_queue.reset(new cl::CommandQueue(clcontext));
|
||||||
|
m_cl_input.reset(new cl::Buffer(clcontext, CL_MEM_READ_ONLY, WIN_DIM * WIN_DIM * sizeof(Complex)));
|
||||||
|
m_cl_output.reset(new cl::Buffer(clcontext, CL_MEM_WRITE_ONLY, WIN_DIM * WIN_DIM * sizeof(uint32_t)));
|
||||||
|
|
||||||
|
// These kernel parameters do not change throughout execution.
|
||||||
|
// Max iteration count does, and is set with each kernel execution.
|
||||||
|
m_cl_kernel->setArg(0, *m_cl_input);
|
||||||
|
m_cl_kernel->setArg(1, *m_cl_output);
|
||||||
|
}
|
||||||
|
|
||||||
|
Float MandelbrotState::zoom() const {
|
||||||
|
return m_zoom;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MandelbrotState::moveOriginAndZoomBy(Complex c, Float z) {
|
||||||
|
if (!m_calcing) {
|
||||||
|
m_origin.real += c.real;
|
||||||
|
m_origin.imag += c.imag;
|
||||||
|
m_zoom = std::min(MIN_ZOOM, m_zoom * z);
|
||||||
|
m_max_iterations = std::max(MIN_MAX_ITERATIONS, calculateMaxIterations(m_zoom));
|
||||||
|
|
||||||
|
scheduleRecalculation();
|
||||||
|
}
|
||||||
|
|
||||||
|
return !m_calcing;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MandelbrotState::intoTexture(SDL_Texture *texture) {
|
||||||
|
if (m_calcing) {
|
||||||
|
// Wait for the calculations to complete.
|
||||||
|
m_recalc.wait(true);
|
||||||
|
|
||||||
|
// Lock the SDL texture, then stream the OpenCL output into it.
|
||||||
|
void *dst;
|
||||||
|
int pitch;
|
||||||
|
SDL_LockTexture(texture, nullptr, &dst, &pitch);
|
||||||
|
m_cl_queue->enqueueReadBuffer(*m_cl_output, CL_TRUE, 0, WIN_DIM * WIN_DIM * sizeof(uint32_t), dst);
|
||||||
|
SDL_UnlockTexture(texture);
|
||||||
|
|
||||||
|
std::chrono::duration<double> diff =
|
||||||
|
std::chrono::high_resolution_clock::now() - clTime;
|
||||||
|
std::cout << "Time: " << diff.count() << "s" << std::endl;
|
||||||
|
|
||||||
|
// Allow user input to modify origin and zoom,
|
||||||
|
// also allowing the next calculation to be scheduled.
|
||||||
|
m_calcing = false;
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void MandelbrotState::scheduleRecalculation() {
|
||||||
|
if (!m_calcing) {
|
||||||
|
// Tell calcThread that it's time to recalculate.
|
||||||
|
m_recalc.test_and_set();
|
||||||
|
m_recalc.notify_one();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void MandelbrotState::calcThread() {
|
||||||
|
while (!done) {
|
||||||
|
// Wait for a recalculation to be requested, indicated by m_recalc becoming true.
|
||||||
|
m_recalc.wait(false);
|
||||||
|
calculateBitmap();
|
||||||
|
|
||||||
|
// Finished. Clear m_recalc, and notify the render thread (checked at MandelbrotState::intoTexture).
|
||||||
|
m_recalc.clear();
|
||||||
|
m_recalc.notify_one();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t MandelbrotState::calculateMaxIterations(Float zoom)
|
||||||
|
{
|
||||||
|
// The max iteration count will increase linearly with zoom factor.
|
||||||
|
// TODO Does the result increase too quickly?
|
||||||
|
|
||||||
|
return MIN_MAX_ITERATIONS * (1.5 - std::log(static_cast<double>(zoom)) / std::log((double)MIN_ZOOM));
|
||||||
|
}
|
||||||
|
|
||||||
|
void MandelbrotState::calculateBitmap()
|
||||||
|
{
|
||||||
|
static std::array<Complex, WIN_DIM * WIN_DIM> points;
|
||||||
|
static std::array<Float, WIN_DIM> row;
|
||||||
|
static std::array<Float, WIN_DIM> col;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Generate a list of every Complex coordinate that needs to be calculated.
|
||||||
|
|
||||||
|
const Float dz (m_zoom * Float(1.0 / WIN_DIM));
|
||||||
|
Complex pt;
|
||||||
|
pt.real = m_origin.real - m_zoom * Float(0.5);
|
||||||
|
pt.imag = m_origin.imag - m_zoom * Float(0.5);
|
||||||
|
|
||||||
|
{
|
||||||
|
auto p = row.begin();
|
||||||
|
Float r = pt.real;
|
||||||
|
for (int i = 0; i < WIN_DIM; ++i) {
|
||||||
|
*p++ = r;
|
||||||
|
r += dz;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto p = col.begin();
|
||||||
|
Float r = pt.imag;
|
||||||
|
for (int i = 0; i < WIN_DIM; ++i) {
|
||||||
|
*p++ = r;
|
||||||
|
r += dz;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ptr = points.begin();
|
||||||
|
for (int j = 0; j < WIN_DIM; ++j) {
|
||||||
|
Complex c;
|
||||||
|
c.imag = col[j];
|
||||||
|
|
||||||
|
for (int i = 0; i < WIN_DIM; ++i) {
|
||||||
|
c.real = row[i];
|
||||||
|
*ptr++ = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Pass the list into the OpenCL kernel, and begin execution.
|
||||||
|
|
||||||
|
while (m_calcing)
|
||||||
|
std::this_thread::yield();
|
||||||
|
|
||||||
|
m_calcing = true;
|
||||||
|
|
||||||
|
clTime = std::chrono::high_resolution_clock::now();
|
||||||
|
m_cl_kernel->setArg(2, m_max_iterations);
|
||||||
|
m_cl_queue->enqueueWriteBuffer(*m_cl_input, CL_TRUE, 0, points.size() * sizeof(Complex), points.data());
|
||||||
|
m_cl_queue->enqueueNDRangeKernel(*m_cl_kernel, cl::NullRange, cl::NDRange(points.size()), cl::NullRange);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,2 @@
|
|||||||
|
all: main.cpp
|
||||||
|
g++ main.cpp -std=c++20 -lSDL2 -lpthread -lOpenCL -g3 -ggdb -O0
|
@ -0,0 +1,494 @@
|
|||||||
|
// fractal - OpenCL-accelerated Mandelbrot renderer.
|
||||||
|
// Written by Clyne Sullivan.
|
||||||
|
|
||||||
|
// If defined, program auto-zooms and measures runtime.
|
||||||
|
//#define BENCHMARK
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
#include <chrono>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstring>
|
||||||
|
#include <fstream>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <iostream>
|
||||||
|
#include <memory>
|
||||||
|
#include <sstream>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <thread>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#define CL_HPP_TARGET_OPENCL_VERSION (300)
|
||||||
|
#define CL_HPP_ENABLE_EXCEPTIONS (1)
|
||||||
|
#include <CL/opencl.hpp>
|
||||||
|
#include <SDL2/SDL.h>
|
||||||
|
|
||||||
|
// The "Float" type determines what data type will store numbers for calculations.
|
||||||
|
// Can use native float or double; or, a custom Q4.124 fixed-point data type.
|
||||||
|
// If fixed point, use "*_r128.c" OpenCL kernel. Otherwise, use regular kernel.
|
||||||
|
|
||||||
|
#define R128_IMPLEMENTATION
|
||||||
|
#include "r128.h"
|
||||||
|
using Float = R128;
|
||||||
|
|
||||||
|
//using Float = double;
|
||||||
|
|
||||||
|
// Sets the window's dimensions. The window is square.
|
||||||
|
constexpr static int WIN_DIM = 800;
|
||||||
|
|
||||||
|
// Not allowed to calculate less iterations than this.
|
||||||
|
constexpr uint32_t MIN_MAX_ITERATIONS = 70;
|
||||||
|
// Not allowed to zoom out farther than this.
|
||||||
|
static const Float MIN_ZOOM (4.0);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A packed Float-pair for storing complex numbers.
|
||||||
|
* Must match a vector type for OpenCL.
|
||||||
|
*/
|
||||||
|
struct Complex {
|
||||||
|
Float real = Float(0);
|
||||||
|
Float imag = Float(0);
|
||||||
|
} __attribute__ ((packed));
|
||||||
|
|
||||||
|
class MandelbrotState
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
// Initializes data and spawns the calculation thread.
|
||||||
|
MandelbrotState();
|
||||||
|
// Joins threads.
|
||||||
|
~MandelbrotState();
|
||||||
|
|
||||||
|
// Prepares to use the given OpenCL kernel for calculations.
|
||||||
|
void initKernel(cl::Context& clcontext, cl::Program& clprogram, const char *kernelname);
|
||||||
|
|
||||||
|
Float zoom() const;
|
||||||
|
|
||||||
|
// Offsets the view's origin by the given Complex, and changes zoom by the given factor.
|
||||||
|
// Returns true if a new calculation has been scheduled (false if one is in progress).
|
||||||
|
bool moveOriginAndZoomBy(Complex c, Float z);
|
||||||
|
|
||||||
|
// Outputs the results of the latest calculation into the given SDL texture.
|
||||||
|
// Returns true if successful.
|
||||||
|
// Returns false if a calculation is in progress. The texture is not updated.
|
||||||
|
bool intoTexture(SDL_Texture *texture);
|
||||||
|
// Requests the initiation of a new calculation.
|
||||||
|
void scheduleRecalculation();
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::thread m_calc_thread;
|
||||||
|
std::atomic_bool m_calcing; // If false, we're ready for a new calculation.
|
||||||
|
std::atomic_flag m_recalc; // Tell calcThread to recalc, or tell main thread recalcing is done.
|
||||||
|
uint32_t m_max_iterations;
|
||||||
|
Float m_zoom;
|
||||||
|
Complex m_origin;
|
||||||
|
|
||||||
|
std::unique_ptr<cl::Kernel> m_cl_kernel;
|
||||||
|
std::unique_ptr<cl::CommandQueue> m_cl_queue;
|
||||||
|
std::unique_ptr<cl::Buffer> m_cl_input;
|
||||||
|
std::unique_ptr<cl::Buffer> m_cl_output;
|
||||||
|
|
||||||
|
// Enters main loop of calcThread.
|
||||||
|
void calcThread();
|
||||||
|
// Calls the OpenCL kernel to compute new results.
|
||||||
|
void calculateBitmap();
|
||||||
|
|
||||||
|
// Determine the max iteration count based on zoom factor.
|
||||||
|
static uint32_t calculateMaxIterations(Float zoom);
|
||||||
|
};
|
||||||
|
|
||||||
|
static bool done = false;
|
||||||
|
static std::atomic_int fps = 0;
|
||||||
|
static std::chrono::time_point<std::chrono::high_resolution_clock> clTime;
|
||||||
|
|
||||||
|
static cl::Context initCLContext();
|
||||||
|
static cl::Program initCLProgram(cl::Context&, const char * const);
|
||||||
|
static void initSDL(SDL_Window **, SDL_Renderer **, SDL_Texture **);
|
||||||
|
static void threadFpsMonitor(MandelbrotState&);
|
||||||
|
static void threadEventMonitor(MandelbrotState&);
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
MandelbrotState Mandelbrot;
|
||||||
|
SDL_Window *window;
|
||||||
|
SDL_Renderer *renderer;
|
||||||
|
SDL_Texture *MandelbrotTexture;
|
||||||
|
|
||||||
|
initSDL(&window, &renderer, &MandelbrotTexture);
|
||||||
|
|
||||||
|
std::ifstream clSource ("opencl/mandelbrot_calc_r128.c");
|
||||||
|
if (!clSource.good())
|
||||||
|
throw std::runtime_error("Failed to open OpenCL kernel!");
|
||||||
|
|
||||||
|
// Dump OpenCL kernel into a std::string.
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << clSource.rdbuf();
|
||||||
|
std::string clSourceStr (oss.str());
|
||||||
|
|
||||||
|
auto clContext = initCLContext();
|
||||||
|
auto clProgram = initCLProgram(clContext, clSourceStr.data());
|
||||||
|
Mandelbrot.initKernel(clContext, clProgram, "mandelbrot_calc");
|
||||||
|
|
||||||
|
// Initiate first calculation so something appears on the screen.
|
||||||
|
Mandelbrot.scheduleRecalculation();
|
||||||
|
|
||||||
|
std::thread fpsMonitor ([&Mandelbrot] { threadFpsMonitor(Mandelbrot); });
|
||||||
|
std::thread eventMonitor ([&Mandelbrot] { threadEventMonitor(Mandelbrot); });
|
||||||
|
|
||||||
|
#ifdef BENCHMARK
|
||||||
|
auto start = std::chrono::high_resolution_clock::now();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
while (!done) {
|
||||||
|
if (Mandelbrot.intoTexture(MandelbrotTexture)) {
|
||||||
|
SDL_RenderClear(renderer);
|
||||||
|
SDL_RenderCopy(renderer, MandelbrotTexture, nullptr, nullptr);
|
||||||
|
SDL_RenderPresent(renderer);
|
||||||
|
|
||||||
|
++fps;
|
||||||
|
} else {
|
||||||
|
std::this_thread::sleep_for(std::chrono::microseconds(10));
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef BENCHMARK
|
||||||
|
if (Mandelbrot.zoom() < Float(1e-5))
|
||||||
|
done = true;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef BENCHMARK
|
||||||
|
std::chrono::duration<double> seconds = std::chrono::high_resolution_clock::now() - start;
|
||||||
|
std::cout << "Calculations took: " << seconds.count() << "s" << std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
eventMonitor.join();
|
||||||
|
fpsMonitor.join();
|
||||||
|
SDL_DestroyRenderer(renderer);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static cl::Platform clplatform;
|
||||||
|
static std::vector<cl::Device> cldevices;
|
||||||
|
|
||||||
|
cl::Context initCLContext()
|
||||||
|
{
|
||||||
|
clplatform = cl::Platform::getDefault();
|
||||||
|
clplatform.getDevices(CL_DEVICE_TYPE_GPU, &cldevices);
|
||||||
|
|
||||||
|
return cl::Context(cldevices.front());
|
||||||
|
}
|
||||||
|
|
||||||
|
cl::Program initCLProgram(cl::Context& clcontext, const char * const source)
|
||||||
|
{
|
||||||
|
cl::Program *prog;
|
||||||
|
|
||||||
|
try {
|
||||||
|
prog = new cl::Program(clcontext, source);
|
||||||
|
prog->build();
|
||||||
|
return *prog;
|
||||||
|
} catch (const cl::Error& err) {
|
||||||
|
const auto& dev = cldevices.front();
|
||||||
|
std::cout << "Build Log: " << prog->getBuildInfo<CL_PROGRAM_BUILD_LOG>(dev) << std::endl;
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void initSDL(SDL_Window **window, SDL_Renderer **renderer, SDL_Texture **texture)
|
||||||
|
{
|
||||||
|
/* Enable standard application logging */
|
||||||
|
SDL_LogSetPriority(SDL_LOG_CATEGORY_APPLICATION, SDL_LOG_PRIORITY_INFO);
|
||||||
|
|
||||||
|
if (SDL_Init(SDL_INIT_VIDEO) < 0) {
|
||||||
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't initialize SDL: %s\n", SDL_GetError());
|
||||||
|
throw std::runtime_error("initSDL failed!");
|
||||||
|
}
|
||||||
|
|
||||||
|
*window = SDL_CreateWindow("Happy Mandelbrot",
|
||||||
|
SDL_WINDOWPOS_UNDEFINED,
|
||||||
|
SDL_WINDOWPOS_UNDEFINED,
|
||||||
|
WIN_DIM, WIN_DIM,
|
||||||
|
SDL_WINDOW_RESIZABLE);
|
||||||
|
if (!*window) {
|
||||||
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't set create window: %s\n", SDL_GetError());
|
||||||
|
throw std::runtime_error("initSDL failed!");
|
||||||
|
}
|
||||||
|
|
||||||
|
*renderer = SDL_CreateRenderer(*window, -1, 0);
|
||||||
|
if (!*renderer) {
|
||||||
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't set create renderer: %s\n", SDL_GetError());
|
||||||
|
throw std::runtime_error("initSDL failed!");
|
||||||
|
}
|
||||||
|
|
||||||
|
SDL_GL_SetSwapInterval(0);
|
||||||
|
|
||||||
|
*texture = SDL_CreateTexture(*renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, WIN_DIM, WIN_DIM);
|
||||||
|
if (!*texture) {
|
||||||
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't set create texture: %s\n", SDL_GetError());
|
||||||
|
throw std::runtime_error("initSDL failed!");
|
||||||
|
}
|
||||||
|
|
||||||
|
atexit(SDL_Quit);
|
||||||
|
}
|
||||||
|
|
||||||
|
void threadFpsMonitor(MandelbrotState& Mandelbrot)
|
||||||
|
{
|
||||||
|
while (!done) {
|
||||||
|
std::cout << "Rendered FPS: " << fps.load() << ", Z: " << (double)Mandelbrot.zoom() << std::endl;
|
||||||
|
fps.store(0);
|
||||||
|
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void threadEventMonitor(MandelbrotState& Mandelbrot)
|
||||||
|
{
|
||||||
|
Float zfactor (1.03);
|
||||||
|
Float zooming (1);
|
||||||
|
Complex newoffset;
|
||||||
|
|
||||||
|
while (!done) {
|
||||||
|
auto next = std::chrono::steady_clock::now() + std::chrono::milliseconds(17);
|
||||||
|
|
||||||
|
for (SDL_Event event; SDL_PollEvent(&event);) {
|
||||||
|
switch (event.type) {
|
||||||
|
case SDL_KEYDOWN:
|
||||||
|
if (event.key.keysym.sym == SDLK_ESCAPE)
|
||||||
|
done = true;
|
||||||
|
break;
|
||||||
|
case SDL_MOUSEBUTTONDOWN:
|
||||||
|
// Calculate desired "normal" change from origin. -0.5 to 0.5.
|
||||||
|
// Zoom scales this result later.
|
||||||
|
newoffset = Complex {
|
||||||
|
Float(event.button.x / (double)WIN_DIM) - Float(0.5),
|
||||||
|
Float(event.button.y / (double)WIN_DIM) - Float(0.5)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Zoom in with left button, zoom out with right.
|
||||||
|
if (event.button.button == SDL_BUTTON_LEFT)
|
||||||
|
zooming *= Float(1) - (zfactor - Float(1));
|
||||||
|
else if (event.button.button == SDL_BUTTON_RIGHT)
|
||||||
|
zooming = zfactor;
|
||||||
|
break;
|
||||||
|
case SDL_MOUSEBUTTONUP:
|
||||||
|
// Stop moving.
|
||||||
|
zooming = 1;
|
||||||
|
newoffset.real = 0;
|
||||||
|
newoffset.imag = 0;
|
||||||
|
break;
|
||||||
|
case SDL_MOUSEMOTION:
|
||||||
|
// Update offset on mouse movement, so zoom continues towards where the user expects.
|
||||||
|
if (zooming != Float(1)) {
|
||||||
|
newoffset.real += Float(event.motion.xrel / (double)WIN_DIM);
|
||||||
|
newoffset.imag += Float(event.motion.yrel / (double)WIN_DIM);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SDL_MOUSEWHEEL:
|
||||||
|
// Increase zoom factor with scroll up, or decrease with scroll down.
|
||||||
|
zfactor = std::max(Float(1), zfactor + Float(0.005) * Float(event.wheel.y));
|
||||||
|
|
||||||
|
// Update zoom speed if zfactor is changed while zooming.
|
||||||
|
if (zooming != Float(1)) {
|
||||||
|
if (zooming < Float(1))
|
||||||
|
zooming *= Float(1) - (zfactor - Float(1));
|
||||||
|
else
|
||||||
|
zooming = zfactor;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SDL_QUIT:
|
||||||
|
done = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef BENCHMARK
|
||||||
|
// Constant zoom on a constant point.
|
||||||
|
bool b = Mandelbrot.moveOriginAndZoomBy({}, Float(1) - (zfactor - Float(1)));
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds(b ? 8 : 1)); // max 125fps
|
||||||
|
#else
|
||||||
|
if (zooming != Float(1) || newoffset.real != Float(0) || newoffset.imag != Float(0)) {
|
||||||
|
// Scale new offset according to zoom.
|
||||||
|
const auto zoom = Mandelbrot.zoom();
|
||||||
|
Complex c = newoffset;
|
||||||
|
c.real *= zoom * (Float(1) - zooming);
|
||||||
|
c.imag *= zoom * (Float(1) - zooming);
|
||||||
|
|
||||||
|
// Don't sleep as long if a recalculation is already running, so
|
||||||
|
// we can get our new request in sooner once recalculation completes.
|
||||||
|
if (Mandelbrot.moveOriginAndZoomBy(c, zooming))
|
||||||
|
std::this_thread::sleep_until(next);
|
||||||
|
else
|
||||||
|
std::this_thread::sleep_for(std::chrono::microseconds(50));
|
||||||
|
} else {
|
||||||
|
std::this_thread::sleep_until(next);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MandelbrotState::MandelbrotState():
|
||||||
|
m_calcing(false),
|
||||||
|
m_max_iterations(MIN_MAX_ITERATIONS),
|
||||||
|
m_zoom(MIN_ZOOM)
|
||||||
|
{
|
||||||
|
#ifndef BENCHMARK
|
||||||
|
// This is a good starting point.
|
||||||
|
m_origin.real = -1;
|
||||||
|
m_origin.imag = 0;
|
||||||
|
#else
|
||||||
|
m_origin.real = -1.5;
|
||||||
|
m_origin.imag = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Spawn calcThread to begin receiving recalculation requests.
|
||||||
|
m_recalc.clear();
|
||||||
|
m_calc_thread = std::thread([this] { calcThread(); });
|
||||||
|
}
|
||||||
|
|
||||||
|
MandelbrotState::~MandelbrotState() {
|
||||||
|
// calcThread is likely waiting for m_recalc to become true.
|
||||||
|
m_recalc.test_and_set();
|
||||||
|
m_recalc.notify_all();
|
||||||
|
|
||||||
|
// Bring calcThread in if it's still running.
|
||||||
|
if (m_calc_thread.joinable())
|
||||||
|
m_calc_thread.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
void MandelbrotState::initKernel(cl::Context& clcontext, cl::Program& clprogram, const char *kernelname)
|
||||||
|
{
|
||||||
|
m_cl_kernel.reset(new cl::Kernel(clprogram, "mandelbrot_calc"));
|
||||||
|
m_cl_queue.reset(new cl::CommandQueue(clcontext));
|
||||||
|
m_cl_input.reset(new cl::Buffer(clcontext, CL_MEM_READ_ONLY, WIN_DIM * WIN_DIM * sizeof(Complex)));
|
||||||
|
m_cl_output.reset(new cl::Buffer(clcontext, CL_MEM_WRITE_ONLY, WIN_DIM * WIN_DIM * sizeof(uint32_t)));
|
||||||
|
|
||||||
|
// These kernel parameters do not change throughout execution.
|
||||||
|
// Max iteration count does, and is set with each kernel execution.
|
||||||
|
m_cl_kernel->setArg(0, *m_cl_input);
|
||||||
|
m_cl_kernel->setArg(1, *m_cl_output);
|
||||||
|
}
|
||||||
|
|
||||||
|
Float MandelbrotState::zoom() const {
|
||||||
|
return m_zoom;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MandelbrotState::moveOriginAndZoomBy(Complex c, Float z) {
|
||||||
|
if (!m_calcing) {
|
||||||
|
m_origin.real += c.real;
|
||||||
|
m_origin.imag += c.imag;
|
||||||
|
m_zoom = std::min(MIN_ZOOM, m_zoom * z);
|
||||||
|
m_max_iterations = std::max(MIN_MAX_ITERATIONS, calculateMaxIterations(m_zoom));
|
||||||
|
|
||||||
|
scheduleRecalculation();
|
||||||
|
}
|
||||||
|
|
||||||
|
return !m_calcing;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MandelbrotState::intoTexture(SDL_Texture *texture) {
|
||||||
|
if (m_calcing) {
|
||||||
|
// Wait for the calculations to complete.
|
||||||
|
m_recalc.wait(true);
|
||||||
|
|
||||||
|
// Lock the SDL texture, then stream the OpenCL output into it.
|
||||||
|
void *dst;
|
||||||
|
int pitch;
|
||||||
|
SDL_LockTexture(texture, nullptr, &dst, &pitch);
|
||||||
|
m_cl_queue->enqueueReadBuffer(*m_cl_output, CL_TRUE, 0, WIN_DIM * WIN_DIM * sizeof(uint32_t), dst);
|
||||||
|
SDL_UnlockTexture(texture);
|
||||||
|
|
||||||
|
std::chrono::duration<double> diff =
|
||||||
|
std::chrono::high_resolution_clock::now() - clTime;
|
||||||
|
std::cout << "Time: " << diff.count() << "s" << std::endl;
|
||||||
|
|
||||||
|
// Allow user input to modify origin and zoom,
|
||||||
|
// also allowing the next calculation to be scheduled.
|
||||||
|
m_calcing = false;
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void MandelbrotState::scheduleRecalculation() {
|
||||||
|
if (!m_calcing) {
|
||||||
|
// Tell calcThread that it's time to recalculate.
|
||||||
|
m_recalc.test_and_set();
|
||||||
|
m_recalc.notify_one();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void MandelbrotState::calcThread() {
|
||||||
|
while (!done) {
|
||||||
|
// Wait for a recalculation to be requested, indicated by m_recalc becoming true.
|
||||||
|
m_recalc.wait(false);
|
||||||
|
calculateBitmap();
|
||||||
|
|
||||||
|
// Finished. Clear m_recalc, and notify the render thread (checked at MandelbrotState::intoTexture).
|
||||||
|
m_recalc.clear();
|
||||||
|
m_recalc.notify_one();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t MandelbrotState::calculateMaxIterations(Float zoom)
|
||||||
|
{
|
||||||
|
// The max iteration count will increase linearly with zoom factor.
|
||||||
|
// TODO Does the result increase too quickly?
|
||||||
|
|
||||||
|
return MIN_MAX_ITERATIONS * (1.5 - std::log(static_cast<double>(zoom)) / std::log((double)MIN_ZOOM));
|
||||||
|
}
|
||||||
|
|
||||||
|
void MandelbrotState::calculateBitmap()
|
||||||
|
{
|
||||||
|
static std::array<Complex, WIN_DIM * WIN_DIM> points;
|
||||||
|
static std::array<Float, WIN_DIM> row;
|
||||||
|
static std::array<Float, WIN_DIM> col;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Generate a list of every Complex coordinate that needs to be calculated.
|
||||||
|
|
||||||
|
const Float dz (m_zoom * Float(1.0 / WIN_DIM));
|
||||||
|
Complex pt;
|
||||||
|
pt.real = m_origin.real - m_zoom * Float(0.5);
|
||||||
|
pt.imag = m_origin.imag - m_zoom * Float(0.5);
|
||||||
|
|
||||||
|
{
|
||||||
|
auto p = row.begin();
|
||||||
|
Float r = pt.real;
|
||||||
|
for (int i = 0; i < WIN_DIM; ++i) {
|
||||||
|
*p++ = r;
|
||||||
|
r += dz;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto p = col.begin();
|
||||||
|
Float r = pt.imag;
|
||||||
|
for (int i = 0; i < WIN_DIM; ++i) {
|
||||||
|
*p++ = r;
|
||||||
|
r += dz;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ptr = points.begin();
|
||||||
|
for (int j = 0; j < WIN_DIM; ++j) {
|
||||||
|
Complex c;
|
||||||
|
c.imag = col[j];
|
||||||
|
|
||||||
|
for (int i = 0; i < WIN_DIM; ++i) {
|
||||||
|
c.real = row[i];
|
||||||
|
*ptr++ = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Pass the list into the OpenCL kernel, and begin execution.
|
||||||
|
|
||||||
|
while (m_calcing)
|
||||||
|
std::this_thread::yield();
|
||||||
|
|
||||||
|
m_calcing = true;
|
||||||
|
|
||||||
|
clTime = std::chrono::high_resolution_clock::now();
|
||||||
|
m_cl_kernel->setArg(2, m_max_iterations);
|
||||||
|
m_cl_queue->enqueueWriteBuffer(*m_cl_input, CL_TRUE, 0, points.size() * sizeof(Complex), points.data());
|
||||||
|
m_cl_queue->enqueueNDRangeKernel(*m_cl_kernel, cl::NullRange, cl::NDRange(points.size()), cl::NullRange);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,35 @@
|
|||||||
|
__kernel void mandelbrot_calc(const __global double2 *c_pt,
|
||||||
|
__global unsigned int *out_it,
|
||||||
|
const unsigned int max_iterations)
|
||||||
|
{
|
||||||
|
const int id = get_global_id(0);
|
||||||
|
const double2 opt = c_pt[id];
|
||||||
|
|
||||||
|
double2 pt = opt;
|
||||||
|
double tmp = pt.x * pt.y;
|
||||||
|
unsigned int iterations = 0;
|
||||||
|
|
||||||
|
double q = (pt.x - 0.25) * (pt.x - 0.25) + pt.y * pt.y;
|
||||||
|
if (q * (q + (pt.x - 0.25)) <= 0.25 * pt.y * pt.y)
|
||||||
|
iterations = max_iterations;
|
||||||
|
|
||||||
|
while (iterations < max_iterations) {
|
||||||
|
pt *= pt;
|
||||||
|
|
||||||
|
if (pt.x + pt.y > 4.0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
pt.x = pt.x - pt.y;
|
||||||
|
pt.y = 2 * tmp;
|
||||||
|
pt += opt;
|
||||||
|
tmp = pt.x * pt.y;
|
||||||
|
|
||||||
|
++iterations;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (iterations == max_iterations)
|
||||||
|
out_it[id] = 0;
|
||||||
|
else
|
||||||
|
out_it[id] = ((iterations & 0xFF) << 16) | ((iterations & 0x07) << 6);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,156 @@
|
|||||||
|
inline ulong2 r128Add(const ulong2 a, const ulong2 b)
|
||||||
|
{
|
||||||
|
ulong2 dst;
|
||||||
|
dst.lo = a.lo + b.lo;
|
||||||
|
dst.hi = a.hi + b.hi + (dst.lo < a.lo);
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ulong2 r128Sub(const ulong2 a, const ulong2 b)
|
||||||
|
{
|
||||||
|
ulong2 dst;
|
||||||
|
dst.lo = a.lo - b.lo;
|
||||||
|
dst.hi = a.hi - b.hi - (dst.lo > a.lo);
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ulong2 r128__umul128(ulong a, ulong b)
|
||||||
|
{
|
||||||
|
ulong alo = a & 0xFFFFFFFF;
|
||||||
|
ulong ahi = a >> 32;
|
||||||
|
ulong blo = b & 0xFFFFFFFF;
|
||||||
|
ulong bhi = b >> 32;
|
||||||
|
ulong p0, p1, p2, p3;
|
||||||
|
|
||||||
|
p0 = alo * blo;
|
||||||
|
p1 = alo * bhi;
|
||||||
|
p2 = ahi * blo;
|
||||||
|
p3 = ahi * bhi;
|
||||||
|
|
||||||
|
ulong2 dst;
|
||||||
|
ulong carry;
|
||||||
|
carry = ((p1 & 0xFFFFFFFF) + (p2 & 0xFFFFFFFF) + (p0 >> 32)) >> 32;
|
||||||
|
|
||||||
|
dst.lo = p0 + ((p1 + p2) << 32);
|
||||||
|
dst.hi = p3 + ((uint)(p1 >> 32) + (uint)(p2 >> 32)) + carry;
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ulong r128__umul128Hi(ulong a, ulong b)
|
||||||
|
{
|
||||||
|
ulong alo = a & 0xFFFFFFFF;
|
||||||
|
ulong ahi = a >> 32;
|
||||||
|
ulong blo = b & 0xFFFFFFFF;
|
||||||
|
ulong bhi = b >> 32;
|
||||||
|
ulong p0, p1, p2, p3;
|
||||||
|
|
||||||
|
p0 = alo * blo;
|
||||||
|
p1 = alo * bhi;
|
||||||
|
p2 = ahi * blo;
|
||||||
|
p3 = ahi * bhi;
|
||||||
|
|
||||||
|
ulong carry = ((p1 & 0xFFFFFFFF) + (p2 & 0xFFFFFFFF) + (p0 >> 32)) >> 32;
|
||||||
|
return p3 + ((uint)(p1 >> 32) + (uint)(p2 >> 32)) + carry;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ulong2 r128Shr(const ulong2 src, int amount)
|
||||||
|
{
|
||||||
|
ulong2 r;
|
||||||
|
r.lo = (src.lo >> amount) | (src.hi << (64 - amount));
|
||||||
|
r.hi = src.hi >> amount;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ulong2 r128__umul(const ulong2 a, const ulong2 b)
|
||||||
|
{
|
||||||
|
ulong2 ahbl, albh, ahbh, sum;
|
||||||
|
|
||||||
|
sum.lo = r128__umul128Hi(a.lo, b.lo);
|
||||||
|
ahbl = r128__umul128(a.hi, b.lo);
|
||||||
|
albh = r128__umul128(a.lo, b.hi);
|
||||||
|
ahbh = r128__umul128(a.hi, b.hi);
|
||||||
|
|
||||||
|
ahbh = r128Shr(ahbh, 60);
|
||||||
|
sum.hi = ahbh.lo;
|
||||||
|
sum = r128Add(sum, ahbl);
|
||||||
|
sum = r128Add(sum, albh);
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline ulong2 r128Mul(const ulong2 a, const ulong2 b)
|
||||||
|
{
|
||||||
|
int sign = 0;
|
||||||
|
ulong2 ta, tb, tc;
|
||||||
|
|
||||||
|
ta = a;
|
||||||
|
tb = b;
|
||||||
|
|
||||||
|
if ((long)ta.hi < 0) {
|
||||||
|
if (ta.lo) {
|
||||||
|
ta.lo = ~ta.lo + 1;
|
||||||
|
ta.hi = ~ta.hi;
|
||||||
|
} else {
|
||||||
|
ta.lo = 0;
|
||||||
|
ta.hi = ~ta.hi + 1;
|
||||||
|
}
|
||||||
|
sign = !sign;
|
||||||
|
}
|
||||||
|
if ((long)tb.hi < 0) {
|
||||||
|
if (tb.lo) {
|
||||||
|
tb.lo = ~tb.lo + 1;
|
||||||
|
tb.hi = ~tb.hi;
|
||||||
|
} else {
|
||||||
|
tb.lo = 0;
|
||||||
|
tb.hi = ~tb.hi + 1;
|
||||||
|
}
|
||||||
|
sign = !sign;
|
||||||
|
}
|
||||||
|
|
||||||
|
tc = r128__umul(ta, tb);
|
||||||
|
|
||||||
|
if (sign) {
|
||||||
|
if (tc.lo) {
|
||||||
|
tc.lo = ~tc.lo + 1;
|
||||||
|
tc.hi = ~tc.hi;
|
||||||
|
} else {
|
||||||
|
tc.lo = 0;
|
||||||
|
tc.hi = ~tc.hi + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tc;
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void mandelbrot_calc(const __global ulong4 *c_pt,
|
||||||
|
__global unsigned int *out_it,
|
||||||
|
const unsigned int max_iterations)
|
||||||
|
{
|
||||||
|
const int id = get_global_id(0);
|
||||||
|
const ulong4 opt = c_pt[id];
|
||||||
|
|
||||||
|
ulong4 pt = opt;
|
||||||
|
ulong2 tmp;
|
||||||
|
unsigned int iterations;
|
||||||
|
|
||||||
|
for (iterations = 0; iterations < max_iterations; ++iterations) {
|
||||||
|
tmp = r128Mul(pt.lo, pt.hi);
|
||||||
|
pt.lo = r128Mul(pt.lo, pt.lo);
|
||||||
|
pt.hi = r128Mul(pt.hi, pt.hi);
|
||||||
|
|
||||||
|
const ulong2 sum = r128Add(pt.lo, pt.hi);
|
||||||
|
if ((long)sum.hi >= 0x4000000000000000)
|
||||||
|
break;
|
||||||
|
|
||||||
|
pt.lo = r128Sub(pt.lo, pt.hi);
|
||||||
|
pt.hi = r128Add(tmp, tmp);
|
||||||
|
pt.lo = r128Add(pt.lo, opt.lo);
|
||||||
|
pt.hi = r128Add(pt.hi, opt.hi);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (iterations == max_iterations)
|
||||||
|
out_it[id] = 0;
|
||||||
|
else
|
||||||
|
out_it[id] = ((iterations & 0xFF) << 16) | ((iterations & 0x07) << 6);
|
||||||
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue