add NO_OPENCL option

it's slower
master
Clyne 2 years ago
parent abaea884db
commit 80c62fa29b

@ -1,41 +1,62 @@
/** // fractal - OpenCL-accelerated Mandelbrot renderer.
* happy-fractal - A study of efficient and precise fractal rendering. // Written by Clyne Sullivan.
* Copyright (C) 2022 Clyne Sullivan
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
// If defined, program auto-zooms and measures runtime. // If defined, program auto-zooms and measures runtime.
//#define BENCHMARK //#define BENCHMARK
// If defined, split calculations across CPU threads instead of using OpenCL.
//#define NO_OPENCL
#include <atomic> #include <atomic>
#include <chrono> #include <chrono>
#include <cstdint> #include <cstdint>
#include <cstring> #include <cstring>
#include <execution>
#include <fstream> #include <fstream>
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <memory> #include <memory>
#include <ranges>
#include <sstream> #include <sstream>
#include <stdexcept> #include <stdexcept>
#include <thread> #include <thread>
#include <vector> #include <vector>
#include <SDL2/SDL.h>
#ifndef NO_OPENCL
// Include OpenCL libraries if they're required.
#define CL_HPP_TARGET_OPENCL_VERSION (300) #define CL_HPP_TARGET_OPENCL_VERSION (300)
#define CL_HPP_ENABLE_EXCEPTIONS (1) #define CL_HPP_ENABLE_EXCEPTIONS (1)
#include <CL/opencl.hpp> #include <CL/opencl.hpp>
#include <SDL2/SDL.h> #else
// Define helper types and functions to allow direct inclusion of the kernel.
#include <map>
struct ulong2 {
uint64_t lo;
uint64_t hi;
} __attribute__ ((packed));
struct ulong4 {
ulong2 lo;
ulong2 hi;
} __attribute__ ((packed));
#define __kernel
#define __global
#define get_global_id(x) (globalIds[std::this_thread::get_id()])
static std::map<std::thread::id, unsigned int> globalIds;
static std::array<uint32_t, WIN_DIM * WIN_DIM> renderOutput;
#include "opencl/mandelbrot_calc_r128.c"
#endif
// Sets the window's dimensions. The window is square.
constexpr static int WIN_DIM = 800;
// For non-OpenCL rendering, the number of threads to split work across.
constexpr static int THREAD_COUNT = 8;
// The "Float" type determines what data type will store numbers for calculations. // The "Float" type determines what data type will store numbers for calculations.
// Can use native float or double; or, a custom Q4.124 fixed-point data type. // Can use native float or double; or, a custom Q4.124 fixed-point data type.
@ -47,9 +68,6 @@ using Float = R128;
//using Float = double; //using Float = double;
// Sets the window's dimensions. The window is square.
constexpr static int WIN_DIM = 800;
// Not allowed to calculate less iterations than this. // Not allowed to calculate less iterations than this.
constexpr uint32_t MIN_MAX_ITERATIONS = 70; constexpr uint32_t MIN_MAX_ITERATIONS = 70;
// Not allowed to zoom out farther than this. // Not allowed to zoom out farther than this.
@ -72,8 +90,10 @@ public:
// Joins threads. // Joins threads.
~MandelbrotState(); ~MandelbrotState();
#ifndef NO_OPENCL
// Prepares to use the given OpenCL kernel for calculations. // Prepares to use the given OpenCL kernel for calculations.
void initKernel(cl::Context& clcontext, cl::Program& clprogram, const char *kernelname); void initKernel(cl::Context& clcontext, cl::Program& clprogram, const char *kernelname);
#endif
Float zoom() const; Float zoom() const;
@ -96,10 +116,12 @@ private:
Float m_zoom; Float m_zoom;
Complex m_origin; Complex m_origin;
#ifndef NO_OPENCL
std::unique_ptr<cl::Kernel> m_cl_kernel; std::unique_ptr<cl::Kernel> m_cl_kernel;
std::unique_ptr<cl::CommandQueue> m_cl_queue; std::unique_ptr<cl::CommandQueue> m_cl_queue;
std::unique_ptr<cl::Buffer> m_cl_input; std::unique_ptr<cl::Buffer> m_cl_input;
std::unique_ptr<cl::Buffer> m_cl_output; std::unique_ptr<cl::Buffer> m_cl_output;
#endif
// Enters main loop of calcThread. // Enters main loop of calcThread.
void calcThread(); void calcThread();
@ -114,8 +136,10 @@ static bool done = false;
static std::atomic_int fps = 0; static std::atomic_int fps = 0;
static std::chrono::time_point<std::chrono::high_resolution_clock> clTime; static std::chrono::time_point<std::chrono::high_resolution_clock> clTime;
#ifndef NO_OPENCL
static cl::Context initCLContext(); static cl::Context initCLContext();
static cl::Program initCLProgram(cl::Context&, const char * const); static cl::Program initCLProgram(cl::Context&, const char * const);
#endif
static void initSDL(SDL_Window **, SDL_Renderer **, SDL_Texture **); static void initSDL(SDL_Window **, SDL_Renderer **, SDL_Texture **);
static void threadFpsMonitor(MandelbrotState&); static void threadFpsMonitor(MandelbrotState&);
static void threadEventMonitor(MandelbrotState&); static void threadEventMonitor(MandelbrotState&);
@ -129,6 +153,7 @@ int main(int argc, char **argv)
initSDL(&window, &renderer, &MandelbrotTexture); initSDL(&window, &renderer, &MandelbrotTexture);
#ifndef NO_OPENCL
std::ifstream clSource ("opencl/mandelbrot_calc_r128.c"); std::ifstream clSource ("opencl/mandelbrot_calc_r128.c");
if (!clSource.good()) if (!clSource.good())
throw std::runtime_error("Failed to open OpenCL kernel!"); throw std::runtime_error("Failed to open OpenCL kernel!");
@ -141,6 +166,7 @@ int main(int argc, char **argv)
auto clContext = initCLContext(); auto clContext = initCLContext();
auto clProgram = initCLProgram(clContext, clSourceStr.data()); auto clProgram = initCLProgram(clContext, clSourceStr.data());
Mandelbrot.initKernel(clContext, clProgram, "mandelbrot_calc"); Mandelbrot.initKernel(clContext, clProgram, "mandelbrot_calc");
#endif
// Initiate first calculation so something appears on the screen. // Initiate first calculation so something appears on the screen.
Mandelbrot.scheduleRecalculation(); Mandelbrot.scheduleRecalculation();
@ -180,7 +206,7 @@ int main(int argc, char **argv)
return 0; return 0;
} }
#ifndef NO_OPENCL
static cl::Platform clplatform; static cl::Platform clplatform;
static std::vector<cl::Device> cldevices; static std::vector<cl::Device> cldevices;
@ -206,6 +232,7 @@ cl::Program initCLProgram(cl::Context& clcontext, const char * const source)
throw err; throw err;
} }
} }
#endif // NO_OPENCL
void initSDL(SDL_Window **window, SDL_Renderer **renderer, SDL_Texture **texture) void initSDL(SDL_Window **window, SDL_Renderer **renderer, SDL_Texture **texture)
{ {
@ -367,6 +394,7 @@ MandelbrotState::~MandelbrotState() {
m_calc_thread.join(); m_calc_thread.join();
} }
#ifndef NO_OPENCL
void MandelbrotState::initKernel(cl::Context& clcontext, cl::Program& clprogram, const char *kernelname) void MandelbrotState::initKernel(cl::Context& clcontext, cl::Program& clprogram, const char *kernelname)
{ {
m_cl_kernel.reset(new cl::Kernel(clprogram, "mandelbrot_calc")); m_cl_kernel.reset(new cl::Kernel(clprogram, "mandelbrot_calc"));
@ -379,6 +407,7 @@ void MandelbrotState::initKernel(cl::Context& clcontext, cl::Program& clprogram,
m_cl_kernel->setArg(0, *m_cl_input); m_cl_kernel->setArg(0, *m_cl_input);
m_cl_kernel->setArg(1, *m_cl_output); m_cl_kernel->setArg(1, *m_cl_output);
} }
#endif // NO_OPENCL
Float MandelbrotState::zoom() const { Float MandelbrotState::zoom() const {
return m_zoom; return m_zoom;
@ -406,7 +435,11 @@ bool MandelbrotState::intoTexture(SDL_Texture *texture) {
void *dst; void *dst;
int pitch; int pitch;
SDL_LockTexture(texture, nullptr, &dst, &pitch); SDL_LockTexture(texture, nullptr, &dst, &pitch);
#ifdef NO_OPENCL
std::memcpy(dst, renderOutput.data(), renderOutput.size() * sizeof(uint32_t));
#else
m_cl_queue->enqueueReadBuffer(*m_cl_output, CL_TRUE, 0, WIN_DIM * WIN_DIM * sizeof(uint32_t), dst); m_cl_queue->enqueueReadBuffer(*m_cl_output, CL_TRUE, 0, WIN_DIM * WIN_DIM * sizeof(uint32_t), dst);
#endif
SDL_UnlockTexture(texture); SDL_UnlockTexture(texture);
std::chrono::duration<double> diff = std::chrono::duration<double> diff =
@ -502,8 +535,23 @@ void MandelbrotState::calculateBitmap()
m_calcing = true; m_calcing = true;
clTime = std::chrono::high_resolution_clock::now(); clTime = std::chrono::high_resolution_clock::now();
#ifdef NO_OPENCL
std::vector<std::thread> execs;
for (int t = 0; t < THREAD_COUNT; ++t) {
execs.emplace_back([this, t] {
for (size_t i = t * renderOutput.size() / THREAD_COUNT; i < (t + 1) * renderOutput.size() / THREAD_COUNT; ++i) {
globalIds.insert_or_assign(std::this_thread::get_id(), i);
mandelbrot_calc((ulong4 *)points.data(), renderOutput.data(), m_max_iterations);
}});
}
for (auto& t : execs)
t.join();
#else
m_cl_kernel->setArg(2, m_max_iterations); m_cl_kernel->setArg(2, m_max_iterations);
m_cl_queue->enqueueWriteBuffer(*m_cl_input, CL_TRUE, 0, points.size() * sizeof(Complex), points.data()); m_cl_queue->enqueueWriteBuffer(*m_cl_input, CL_TRUE, 0, points.size() * sizeof(Complex), points.data());
m_cl_queue->enqueueNDRangeKernel(*m_cl_kernel, cl::NullRange, cl::NDRange(points.size()), cl::NullRange); m_cl_queue->enqueueNDRangeKernel(*m_cl_kernel, cl::NullRange, cl::NDRange(points.size()), cl::NullRange);
#endif
} }

Loading…
Cancel
Save