You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
495 lines
16 KiB
C++
495 lines
16 KiB
C++
2 years ago
|
// fractal - OpenCL-accelerated Mandelbrot renderer.
|
||
|
// Written by Clyne Sullivan.
|
||
|
|
||
|
// If defined, program auto-zooms and measures runtime.
|
||
|
//#define BENCHMARK
|
||
|
|
||
|
#include <atomic>
|
||
|
#include <chrono>
|
||
|
#include <cstdint>
|
||
|
#include <cstring>
|
||
|
#include <fstream>
|
||
|
#include <iomanip>
|
||
|
#include <iostream>
|
||
|
#include <memory>
|
||
|
#include <sstream>
|
||
|
#include <stdexcept>
|
||
|
#include <thread>
|
||
|
#include <vector>
|
||
|
|
||
|
#define CL_HPP_TARGET_OPENCL_VERSION (300)
|
||
|
#define CL_HPP_ENABLE_EXCEPTIONS (1)
|
||
|
#include <CL/opencl.hpp>
|
||
|
#include <SDL2/SDL.h>
|
||
|
|
||
|
// The "Float" type determines what data type will store numbers for calculations.
|
||
|
// Can use native float or double; or, a custom Q4.124 fixed-point data type.
|
||
|
// If fixed point, use "*_r128.c" OpenCL kernel. Otherwise, use regular kernel.
|
||
|
|
||
|
#define R128_IMPLEMENTATION
|
||
|
#include "r128.h"
|
||
|
using Float = R128;
|
||
|
|
||
|
//using Float = double;
|
||
|
|
||
|
// Sets the window's dimensions. The window is square.
|
||
|
constexpr static int WIN_DIM = 800;
|
||
|
|
||
|
// Not allowed to calculate less iterations than this.
|
||
|
constexpr uint32_t MIN_MAX_ITERATIONS = 70;
|
||
|
// Not allowed to zoom out farther than this.
|
||
|
static const Float MIN_ZOOM (4.0);
|
||
|
|
||
|
/**
|
||
|
* A packed Float-pair for storing complex numbers.
|
||
|
* Must match a vector type for OpenCL.
|
||
|
*/
|
||
|
struct Complex {
|
||
|
Float real = Float(0);
|
||
|
Float imag = Float(0);
|
||
|
} __attribute__ ((packed));
|
||
|
|
||
|
class MandelbrotState
|
||
|
{
|
||
|
public:
|
||
|
// Initializes data and spawns the calculation thread.
|
||
|
MandelbrotState();
|
||
|
// Joins threads.
|
||
|
~MandelbrotState();
|
||
|
|
||
|
// Prepares to use the given OpenCL kernel for calculations.
|
||
|
void initKernel(cl::Context& clcontext, cl::Program& clprogram, const char *kernelname);
|
||
|
|
||
|
Float zoom() const;
|
||
|
|
||
|
// Offsets the view's origin by the given Complex, and changes zoom by the given factor.
|
||
|
// Returns true if a new calculation has been scheduled (false if one is in progress).
|
||
|
bool moveOriginAndZoomBy(Complex c, Float z);
|
||
|
|
||
|
// Outputs the results of the latest calculation into the given SDL texture.
|
||
|
// Returns true if successful.
|
||
|
// Returns false if a calculation is in progress. The texture is not updated.
|
||
|
bool intoTexture(SDL_Texture *texture);
|
||
|
// Requests the initiation of a new calculation.
|
||
|
void scheduleRecalculation();
|
||
|
|
||
|
private:
|
||
|
std::thread m_calc_thread;
|
||
|
std::atomic_bool m_calcing; // If false, we're ready for a new calculation.
|
||
|
std::atomic_flag m_recalc; // Tell calcThread to recalc, or tell main thread recalcing is done.
|
||
|
uint32_t m_max_iterations;
|
||
|
Float m_zoom;
|
||
|
Complex m_origin;
|
||
|
|
||
|
std::unique_ptr<cl::Kernel> m_cl_kernel;
|
||
|
std::unique_ptr<cl::CommandQueue> m_cl_queue;
|
||
|
std::unique_ptr<cl::Buffer> m_cl_input;
|
||
|
std::unique_ptr<cl::Buffer> m_cl_output;
|
||
|
|
||
|
// Enters main loop of calcThread.
|
||
|
void calcThread();
|
||
|
// Calls the OpenCL kernel to compute new results.
|
||
|
void calculateBitmap();
|
||
|
|
||
|
// Determine the max iteration count based on zoom factor.
|
||
|
static uint32_t calculateMaxIterations(Float zoom);
|
||
|
};
|
||
|
|
||
|
static bool done = false;
|
||
|
static std::atomic_int fps = 0;
|
||
|
static std::chrono::time_point<std::chrono::high_resolution_clock> clTime;
|
||
|
|
||
|
static cl::Context initCLContext();
|
||
|
static cl::Program initCLProgram(cl::Context&, const char * const);
|
||
|
static void initSDL(SDL_Window **, SDL_Renderer **, SDL_Texture **);
|
||
|
static void threadFpsMonitor(MandelbrotState&);
|
||
|
static void threadEventMonitor(MandelbrotState&);
|
||
|
|
||
|
int main(int argc, char **argv)
|
||
|
{
|
||
|
MandelbrotState Mandelbrot;
|
||
|
SDL_Window *window;
|
||
|
SDL_Renderer *renderer;
|
||
|
SDL_Texture *MandelbrotTexture;
|
||
|
|
||
|
initSDL(&window, &renderer, &MandelbrotTexture);
|
||
|
|
||
|
std::ifstream clSource ("opencl/mandelbrot_calc_r128.c");
|
||
|
if (!clSource.good())
|
||
|
throw std::runtime_error("Failed to open OpenCL kernel!");
|
||
|
|
||
|
// Dump OpenCL kernel into a std::string.
|
||
|
std::ostringstream oss;
|
||
|
oss << clSource.rdbuf();
|
||
|
std::string clSourceStr (oss.str());
|
||
|
|
||
|
auto clContext = initCLContext();
|
||
|
auto clProgram = initCLProgram(clContext, clSourceStr.data());
|
||
|
Mandelbrot.initKernel(clContext, clProgram, "mandelbrot_calc");
|
||
|
|
||
|
// Initiate first calculation so something appears on the screen.
|
||
|
Mandelbrot.scheduleRecalculation();
|
||
|
|
||
|
std::thread fpsMonitor ([&Mandelbrot] { threadFpsMonitor(Mandelbrot); });
|
||
|
std::thread eventMonitor ([&Mandelbrot] { threadEventMonitor(Mandelbrot); });
|
||
|
|
||
|
#ifdef BENCHMARK
|
||
|
auto start = std::chrono::high_resolution_clock::now();
|
||
|
#endif
|
||
|
|
||
|
while (!done) {
|
||
|
if (Mandelbrot.intoTexture(MandelbrotTexture)) {
|
||
|
SDL_RenderClear(renderer);
|
||
|
SDL_RenderCopy(renderer, MandelbrotTexture, nullptr, nullptr);
|
||
|
SDL_RenderPresent(renderer);
|
||
|
|
||
|
++fps;
|
||
|
} else {
|
||
|
std::this_thread::sleep_for(std::chrono::microseconds(10));
|
||
|
}
|
||
|
|
||
|
#ifdef BENCHMARK
|
||
|
if (Mandelbrot.zoom() < Float(1e-5))
|
||
|
done = true;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
#ifdef BENCHMARK
|
||
|
std::chrono::duration<double> seconds = std::chrono::high_resolution_clock::now() - start;
|
||
|
std::cout << "Calculations took: " << seconds.count() << "s" << std::endl;
|
||
|
#endif
|
||
|
|
||
|
eventMonitor.join();
|
||
|
fpsMonitor.join();
|
||
|
SDL_DestroyRenderer(renderer);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
static cl::Platform clplatform;
|
||
|
static std::vector<cl::Device> cldevices;
|
||
|
|
||
|
cl::Context initCLContext()
|
||
|
{
|
||
|
clplatform = cl::Platform::getDefault();
|
||
|
clplatform.getDevices(CL_DEVICE_TYPE_GPU, &cldevices);
|
||
|
|
||
|
return cl::Context(cldevices.front());
|
||
|
}
|
||
|
|
||
|
cl::Program initCLProgram(cl::Context& clcontext, const char * const source)
|
||
|
{
|
||
|
cl::Program *prog;
|
||
|
|
||
|
try {
|
||
|
prog = new cl::Program(clcontext, source);
|
||
|
prog->build();
|
||
|
return *prog;
|
||
|
} catch (const cl::Error& err) {
|
||
|
const auto& dev = cldevices.front();
|
||
|
std::cout << "Build Log: " << prog->getBuildInfo<CL_PROGRAM_BUILD_LOG>(dev) << std::endl;
|
||
|
throw err;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void initSDL(SDL_Window **window, SDL_Renderer **renderer, SDL_Texture **texture)
|
||
|
{
|
||
|
/* Enable standard application logging */
|
||
|
SDL_LogSetPriority(SDL_LOG_CATEGORY_APPLICATION, SDL_LOG_PRIORITY_INFO);
|
||
|
|
||
|
if (SDL_Init(SDL_INIT_VIDEO) < 0) {
|
||
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't initialize SDL: %s\n", SDL_GetError());
|
||
|
throw std::runtime_error("initSDL failed!");
|
||
|
}
|
||
|
|
||
|
*window = SDL_CreateWindow("Happy Mandelbrot",
|
||
|
SDL_WINDOWPOS_UNDEFINED,
|
||
|
SDL_WINDOWPOS_UNDEFINED,
|
||
|
WIN_DIM, WIN_DIM,
|
||
|
SDL_WINDOW_RESIZABLE);
|
||
|
if (!*window) {
|
||
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't set create window: %s\n", SDL_GetError());
|
||
|
throw std::runtime_error("initSDL failed!");
|
||
|
}
|
||
|
|
||
|
*renderer = SDL_CreateRenderer(*window, -1, 0);
|
||
|
if (!*renderer) {
|
||
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't set create renderer: %s\n", SDL_GetError());
|
||
|
throw std::runtime_error("initSDL failed!");
|
||
|
}
|
||
|
|
||
|
SDL_GL_SetSwapInterval(0);
|
||
|
|
||
|
*texture = SDL_CreateTexture(*renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, WIN_DIM, WIN_DIM);
|
||
|
if (!*texture) {
|
||
|
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't set create texture: %s\n", SDL_GetError());
|
||
|
throw std::runtime_error("initSDL failed!");
|
||
|
}
|
||
|
|
||
|
atexit(SDL_Quit);
|
||
|
}
|
||
|
|
||
|
void threadFpsMonitor(MandelbrotState& Mandelbrot)
|
||
|
{
|
||
|
while (!done) {
|
||
|
std::cout << "Rendered FPS: " << fps.load() << ", Z: " << (double)Mandelbrot.zoom() << std::endl;
|
||
|
fps.store(0);
|
||
|
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void threadEventMonitor(MandelbrotState& Mandelbrot)
|
||
|
{
|
||
|
Float zfactor (1.03);
|
||
|
Float zooming (1);
|
||
|
Complex newoffset;
|
||
|
|
||
|
while (!done) {
|
||
|
auto next = std::chrono::steady_clock::now() + std::chrono::milliseconds(17);
|
||
|
|
||
|
for (SDL_Event event; SDL_PollEvent(&event);) {
|
||
|
switch (event.type) {
|
||
|
case SDL_KEYDOWN:
|
||
|
if (event.key.keysym.sym == SDLK_ESCAPE)
|
||
|
done = true;
|
||
|
break;
|
||
|
case SDL_MOUSEBUTTONDOWN:
|
||
|
// Calculate desired "normal" change from origin. -0.5 to 0.5.
|
||
|
// Zoom scales this result later.
|
||
|
newoffset = Complex {
|
||
|
Float(event.button.x / (double)WIN_DIM) - Float(0.5),
|
||
|
Float(event.button.y / (double)WIN_DIM) - Float(0.5)
|
||
|
};
|
||
|
|
||
|
// Zoom in with left button, zoom out with right.
|
||
|
if (event.button.button == SDL_BUTTON_LEFT)
|
||
|
zooming *= Float(1) - (zfactor - Float(1));
|
||
|
else if (event.button.button == SDL_BUTTON_RIGHT)
|
||
|
zooming = zfactor;
|
||
|
break;
|
||
|
case SDL_MOUSEBUTTONUP:
|
||
|
// Stop moving.
|
||
|
zooming = 1;
|
||
|
newoffset.real = 0;
|
||
|
newoffset.imag = 0;
|
||
|
break;
|
||
|
case SDL_MOUSEMOTION:
|
||
|
// Update offset on mouse movement, so zoom continues towards where the user expects.
|
||
|
if (zooming != Float(1)) {
|
||
|
newoffset.real += Float(event.motion.xrel / (double)WIN_DIM);
|
||
|
newoffset.imag += Float(event.motion.yrel / (double)WIN_DIM);
|
||
|
}
|
||
|
break;
|
||
|
case SDL_MOUSEWHEEL:
|
||
|
// Increase zoom factor with scroll up, or decrease with scroll down.
|
||
|
zfactor = std::max(Float(1), zfactor + Float(0.005) * Float(event.wheel.y));
|
||
|
|
||
|
// Update zoom speed if zfactor is changed while zooming.
|
||
|
if (zooming != Float(1)) {
|
||
|
if (zooming < Float(1))
|
||
|
zooming *= Float(1) - (zfactor - Float(1));
|
||
|
else
|
||
|
zooming = zfactor;
|
||
|
}
|
||
|
break;
|
||
|
case SDL_QUIT:
|
||
|
done = true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#ifdef BENCHMARK
|
||
|
// Constant zoom on a constant point.
|
||
|
bool b = Mandelbrot.moveOriginAndZoomBy({}, Float(1) - (zfactor - Float(1)));
|
||
|
std::this_thread::sleep_for(std::chrono::milliseconds(b ? 8 : 1)); // max 125fps
|
||
|
#else
|
||
|
if (zooming != Float(1) || newoffset.real != Float(0) || newoffset.imag != Float(0)) {
|
||
|
// Scale new offset according to zoom.
|
||
|
const auto zoom = Mandelbrot.zoom();
|
||
|
Complex c = newoffset;
|
||
|
c.real *= zoom * (Float(1) - zooming);
|
||
|
c.imag *= zoom * (Float(1) - zooming);
|
||
|
|
||
|
// Don't sleep as long if a recalculation is already running, so
|
||
|
// we can get our new request in sooner once recalculation completes.
|
||
|
if (Mandelbrot.moveOriginAndZoomBy(c, zooming))
|
||
|
std::this_thread::sleep_until(next);
|
||
|
else
|
||
|
std::this_thread::sleep_for(std::chrono::microseconds(50));
|
||
|
} else {
|
||
|
std::this_thread::sleep_until(next);
|
||
|
}
|
||
|
#endif
|
||
|
}
|
||
|
}
|
||
|
|
||
|
MandelbrotState::MandelbrotState():
|
||
|
m_calcing(false),
|
||
|
m_max_iterations(MIN_MAX_ITERATIONS),
|
||
|
m_zoom(MIN_ZOOM)
|
||
|
{
|
||
|
#ifndef BENCHMARK
|
||
|
// This is a good starting point.
|
||
|
m_origin.real = -1;
|
||
|
m_origin.imag = 0;
|
||
|
#else
|
||
|
m_origin.real = -1.5;
|
||
|
m_origin.imag = 0;
|
||
|
#endif
|
||
|
|
||
|
// Spawn calcThread to begin receiving recalculation requests.
|
||
|
m_recalc.clear();
|
||
|
m_calc_thread = std::thread([this] { calcThread(); });
|
||
|
}
|
||
|
|
||
|
MandelbrotState::~MandelbrotState() {
|
||
|
// calcThread is likely waiting for m_recalc to become true.
|
||
|
m_recalc.test_and_set();
|
||
|
m_recalc.notify_all();
|
||
|
|
||
|
// Bring calcThread in if it's still running.
|
||
|
if (m_calc_thread.joinable())
|
||
|
m_calc_thread.join();
|
||
|
}
|
||
|
|
||
|
void MandelbrotState::initKernel(cl::Context& clcontext, cl::Program& clprogram, const char *kernelname)
|
||
|
{
|
||
|
m_cl_kernel.reset(new cl::Kernel(clprogram, "mandelbrot_calc"));
|
||
|
m_cl_queue.reset(new cl::CommandQueue(clcontext));
|
||
|
m_cl_input.reset(new cl::Buffer(clcontext, CL_MEM_READ_ONLY, WIN_DIM * WIN_DIM * sizeof(Complex)));
|
||
|
m_cl_output.reset(new cl::Buffer(clcontext, CL_MEM_WRITE_ONLY, WIN_DIM * WIN_DIM * sizeof(uint32_t)));
|
||
|
|
||
|
// These kernel parameters do not change throughout execution.
|
||
|
// Max iteration count does, and is set with each kernel execution.
|
||
|
m_cl_kernel->setArg(0, *m_cl_input);
|
||
|
m_cl_kernel->setArg(1, *m_cl_output);
|
||
|
}
|
||
|
|
||
|
Float MandelbrotState::zoom() const {
|
||
|
return m_zoom;
|
||
|
}
|
||
|
|
||
|
bool MandelbrotState::moveOriginAndZoomBy(Complex c, Float z) {
|
||
|
if (!m_calcing) {
|
||
|
m_origin.real += c.real;
|
||
|
m_origin.imag += c.imag;
|
||
|
m_zoom = std::min(MIN_ZOOM, m_zoom * z);
|
||
|
m_max_iterations = std::max(MIN_MAX_ITERATIONS, calculateMaxIterations(m_zoom));
|
||
|
|
||
|
scheduleRecalculation();
|
||
|
}
|
||
|
|
||
|
return !m_calcing;
|
||
|
}
|
||
|
|
||
|
bool MandelbrotState::intoTexture(SDL_Texture *texture) {
|
||
|
if (m_calcing) {
|
||
|
// Wait for the calculations to complete.
|
||
|
m_recalc.wait(true);
|
||
|
|
||
|
// Lock the SDL texture, then stream the OpenCL output into it.
|
||
|
void *dst;
|
||
|
int pitch;
|
||
|
SDL_LockTexture(texture, nullptr, &dst, &pitch);
|
||
|
m_cl_queue->enqueueReadBuffer(*m_cl_output, CL_TRUE, 0, WIN_DIM * WIN_DIM * sizeof(uint32_t), dst);
|
||
|
SDL_UnlockTexture(texture);
|
||
|
|
||
|
std::chrono::duration<double> diff =
|
||
|
std::chrono::high_resolution_clock::now() - clTime;
|
||
|
std::cout << "Time: " << diff.count() << "s" << std::endl;
|
||
|
|
||
|
// Allow user input to modify origin and zoom,
|
||
|
// also allowing the next calculation to be scheduled.
|
||
|
m_calcing = false;
|
||
|
return true;
|
||
|
} else {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void MandelbrotState::scheduleRecalculation() {
|
||
|
if (!m_calcing) {
|
||
|
// Tell calcThread that it's time to recalculate.
|
||
|
m_recalc.test_and_set();
|
||
|
m_recalc.notify_one();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void MandelbrotState::calcThread() {
|
||
|
while (!done) {
|
||
|
// Wait for a recalculation to be requested, indicated by m_recalc becoming true.
|
||
|
m_recalc.wait(false);
|
||
|
calculateBitmap();
|
||
|
|
||
|
// Finished. Clear m_recalc, and notify the render thread (checked at MandelbrotState::intoTexture).
|
||
|
m_recalc.clear();
|
||
|
m_recalc.notify_one();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
uint32_t MandelbrotState::calculateMaxIterations(Float zoom)
|
||
|
{
|
||
|
// The max iteration count will increase linearly with zoom factor.
|
||
|
// TODO Does the result increase too quickly?
|
||
|
|
||
|
return MIN_MAX_ITERATIONS * (1.5 - std::log(static_cast<double>(zoom)) / std::log((double)MIN_ZOOM));
|
||
|
}
|
||
|
|
||
|
void MandelbrotState::calculateBitmap()
|
||
|
{
|
||
|
static std::array<Complex, WIN_DIM * WIN_DIM> points;
|
||
|
static std::array<Float, WIN_DIM> row;
|
||
|
static std::array<Float, WIN_DIM> col;
|
||
|
|
||
|
//
|
||
|
// Generate a list of every Complex coordinate that needs to be calculated.
|
||
|
|
||
|
const Float dz (m_zoom * Float(1.0 / WIN_DIM));
|
||
|
Complex pt;
|
||
|
pt.real = m_origin.real - m_zoom * Float(0.5);
|
||
|
pt.imag = m_origin.imag - m_zoom * Float(0.5);
|
||
|
|
||
|
{
|
||
|
auto p = row.begin();
|
||
|
Float r = pt.real;
|
||
|
for (int i = 0; i < WIN_DIM; ++i) {
|
||
|
*p++ = r;
|
||
|
r += dz;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
{
|
||
|
auto p = col.begin();
|
||
|
Float r = pt.imag;
|
||
|
for (int i = 0; i < WIN_DIM; ++i) {
|
||
|
*p++ = r;
|
||
|
r += dz;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
auto ptr = points.begin();
|
||
|
for (int j = 0; j < WIN_DIM; ++j) {
|
||
|
Complex c;
|
||
|
c.imag = col[j];
|
||
|
|
||
|
for (int i = 0; i < WIN_DIM; ++i) {
|
||
|
c.real = row[i];
|
||
|
*ptr++ = c;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Pass the list into the OpenCL kernel, and begin execution.
|
||
|
|
||
|
while (m_calcing)
|
||
|
std::this_thread::yield();
|
||
|
|
||
|
m_calcing = true;
|
||
|
|
||
|
clTime = std::chrono::high_resolution_clock::now();
|
||
|
m_cl_kernel->setArg(2, m_max_iterations);
|
||
|
m_cl_queue->enqueueWriteBuffer(*m_cl_input, CL_TRUE, 0, points.size() * sizeof(Complex), points.data());
|
||
|
m_cl_queue->enqueueNDRangeKernel(*m_cl_kernel, cl::NullRange, cl::NDRange(points.size()), cl::NullRange);
|
||
|
}
|
||
|
|