add NO_OPENCL option

it's slower
master
Clyne 2 years ago
parent abaea884db
commit 80c62fa29b

@ -1,41 +1,62 @@
/**
* happy-fractal - A study of efficient and precise fractal rendering.
* Copyright (C) 2022 Clyne Sullivan
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
// fractal - OpenCL-accelerated Mandelbrot renderer.
// Written by Clyne Sullivan.
// If defined, program auto-zooms and measures runtime.
//#define BENCHMARK
// If defined, split calculations across CPU threads instead of using OpenCL.
//#define NO_OPENCL
#include <atomic>
#include <chrono>
#include <cstdint>
#include <cstring>
#include <execution>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <memory>
#include <ranges>
#include <sstream>
#include <stdexcept>
#include <thread>
#include <vector>
#include <SDL2/SDL.h>
#ifndef NO_OPENCL
// Include OpenCL libraries if they're required.
#define CL_HPP_TARGET_OPENCL_VERSION (300)
#define CL_HPP_ENABLE_EXCEPTIONS (1)
#include <CL/opencl.hpp>
#include <SDL2/SDL.h>
#else
// Define helper types and functions to allow direct inclusion of the kernel.
#include <map>
struct ulong2 {
uint64_t lo;
uint64_t hi;
} __attribute__ ((packed));
struct ulong4 {
ulong2 lo;
ulong2 hi;
} __attribute__ ((packed));
#define __kernel
#define __global
#define get_global_id(x) (globalIds[std::this_thread::get_id()])
static std::map<std::thread::id, unsigned int> globalIds;
static std::array<uint32_t, WIN_DIM * WIN_DIM> renderOutput;
#include "opencl/mandelbrot_calc_r128.c"
#endif
// Sets the window's dimensions. The window is square.
constexpr static int WIN_DIM = 800;
// For non-OpenCL rendering, the number of threads to split work across.
constexpr static int THREAD_COUNT = 8;
// The "Float" type determines what data type will store numbers for calculations.
// Can use native float or double; or, a custom Q4.124 fixed-point data type.
@ -47,9 +68,6 @@ using Float = R128;
//using Float = double;
// Sets the window's dimensions. The window is square.
constexpr static int WIN_DIM = 800;
// Not allowed to calculate less iterations than this.
constexpr uint32_t MIN_MAX_ITERATIONS = 70;
// Not allowed to zoom out farther than this.
@ -72,8 +90,10 @@ public:
// Joins threads.
~MandelbrotState();
#ifndef NO_OPENCL
// Prepares to use the given OpenCL kernel for calculations.
void initKernel(cl::Context& clcontext, cl::Program& clprogram, const char *kernelname);
#endif
Float zoom() const;
@ -96,10 +116,12 @@ private:
Float m_zoom;
Complex m_origin;
#ifndef NO_OPENCL
std::unique_ptr<cl::Kernel> m_cl_kernel;
std::unique_ptr<cl::CommandQueue> m_cl_queue;
std::unique_ptr<cl::Buffer> m_cl_input;
std::unique_ptr<cl::Buffer> m_cl_output;
#endif
// Enters main loop of calcThread.
void calcThread();
@ -114,8 +136,10 @@ static bool done = false;
static std::atomic_int fps = 0;
static std::chrono::time_point<std::chrono::high_resolution_clock> clTime;
#ifndef NO_OPENCL
static cl::Context initCLContext();
static cl::Program initCLProgram(cl::Context&, const char * const);
#endif
static void initSDL(SDL_Window **, SDL_Renderer **, SDL_Texture **);
static void threadFpsMonitor(MandelbrotState&);
static void threadEventMonitor(MandelbrotState&);
@ -129,6 +153,7 @@ int main(int argc, char **argv)
initSDL(&window, &renderer, &MandelbrotTexture);
#ifndef NO_OPENCL
std::ifstream clSource ("opencl/mandelbrot_calc_r128.c");
if (!clSource.good())
throw std::runtime_error("Failed to open OpenCL kernel!");
@ -141,6 +166,7 @@ int main(int argc, char **argv)
auto clContext = initCLContext();
auto clProgram = initCLProgram(clContext, clSourceStr.data());
Mandelbrot.initKernel(clContext, clProgram, "mandelbrot_calc");
#endif
// Initiate first calculation so something appears on the screen.
Mandelbrot.scheduleRecalculation();
@ -180,7 +206,7 @@ int main(int argc, char **argv)
return 0;
}
#ifndef NO_OPENCL
static cl::Platform clplatform;
static std::vector<cl::Device> cldevices;
@ -206,6 +232,7 @@ cl::Program initCLProgram(cl::Context& clcontext, const char * const source)
throw err;
}
}
#endif // NO_OPENCL
void initSDL(SDL_Window **window, SDL_Renderer **renderer, SDL_Texture **texture)
{
@ -367,6 +394,7 @@ MandelbrotState::~MandelbrotState() {
m_calc_thread.join();
}
#ifndef NO_OPENCL
void MandelbrotState::initKernel(cl::Context& clcontext, cl::Program& clprogram, const char *kernelname)
{
m_cl_kernel.reset(new cl::Kernel(clprogram, "mandelbrot_calc"));
@ -379,6 +407,7 @@ void MandelbrotState::initKernel(cl::Context& clcontext, cl::Program& clprogram,
m_cl_kernel->setArg(0, *m_cl_input);
m_cl_kernel->setArg(1, *m_cl_output);
}
#endif // NO_OPENCL
Float MandelbrotState::zoom() const {
return m_zoom;
@ -406,7 +435,11 @@ bool MandelbrotState::intoTexture(SDL_Texture *texture) {
void *dst;
int pitch;
SDL_LockTexture(texture, nullptr, &dst, &pitch);
#ifdef NO_OPENCL
std::memcpy(dst, renderOutput.data(), renderOutput.size() * sizeof(uint32_t));
#else
m_cl_queue->enqueueReadBuffer(*m_cl_output, CL_TRUE, 0, WIN_DIM * WIN_DIM * sizeof(uint32_t), dst);
#endif
SDL_UnlockTexture(texture);
std::chrono::duration<double> diff =
@ -502,8 +535,23 @@ void MandelbrotState::calculateBitmap()
m_calcing = true;
clTime = std::chrono::high_resolution_clock::now();
#ifdef NO_OPENCL
std::vector<std::thread> execs;
for (int t = 0; t < THREAD_COUNT; ++t) {
execs.emplace_back([this, t] {
for (size_t i = t * renderOutput.size() / THREAD_COUNT; i < (t + 1) * renderOutput.size() / THREAD_COUNT; ++i) {
globalIds.insert_or_assign(std::this_thread::get_id(), i);
mandelbrot_calc((ulong4 *)points.data(), renderOutput.data(), m_max_iterations);
}});
}
for (auto& t : execs)
t.join();
#else
m_cl_kernel->setArg(2, m_max_iterations);
m_cl_queue->enqueueWriteBuffer(*m_cl_input, CL_TRUE, 0, points.size() * sizeof(Complex), points.data());
m_cl_queue->enqueueNDRangeKernel(*m_cl_kernel, cl::NullRange, cl::NDRange(points.size()), cl::NullRange);
#endif
}

Loading…
Cancel
Save