From 80c62fa29bbc82a4db156b08f60a58b696b1421c Mon Sep 17 00:00:00 2001 From: Clyne Sullivan Date: Sat, 23 Jul 2022 17:32:45 -0400 Subject: [PATCH] add NO_OPENCL option it's slower --- main.cpp | 92 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 70 insertions(+), 22 deletions(-) diff --git a/main.cpp b/main.cpp index 643901c..c2530f2 100644 --- a/main.cpp +++ b/main.cpp @@ -1,41 +1,62 @@ -/** - * happy-fractal - A study of efficient and precise fractal rendering. - * Copyright (C) 2022 Clyne Sullivan - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ +// fractal - OpenCL-accelerated Mandelbrot renderer. +// Written by Clyne Sullivan. // If defined, program auto-zooms and measures runtime. //#define BENCHMARK +// If defined, split calculations across CPU threads instead of using OpenCL. +//#define NO_OPENCL + #include #include #include #include +#include #include #include #include #include +#include #include #include #include #include +#include + +#ifndef NO_OPENCL +// Include OpenCL libraries if they're required. #define CL_HPP_TARGET_OPENCL_VERSION (300) #define CL_HPP_ENABLE_EXCEPTIONS (1) #include -#include +#else +// Define helper types and functions to allow direct inclusion of the kernel. +#include + +struct ulong2 { + uint64_t lo; + uint64_t hi; +} __attribute__ ((packed)); +struct ulong4 { + ulong2 lo; + ulong2 hi; +} __attribute__ ((packed)); + +#define __kernel +#define __global +#define get_global_id(x) (globalIds[std::this_thread::get_id()]) + +static std::map globalIds; +static std::array renderOutput; + +#include "opencl/mandelbrot_calc_r128.c" +#endif + +// Sets the window's dimensions. The window is square. +constexpr static int WIN_DIM = 800; + +// For non-OpenCL rendering, the number of threads to split work across. +constexpr static int THREAD_COUNT = 8; // The "Float" type determines what data type will store numbers for calculations. // Can use native float or double; or, a custom Q4.124 fixed-point data type. @@ -47,9 +68,6 @@ using Float = R128; //using Float = double; -// Sets the window's dimensions. The window is square. -constexpr static int WIN_DIM = 800; - // Not allowed to calculate less iterations than this. constexpr uint32_t MIN_MAX_ITERATIONS = 70; // Not allowed to zoom out farther than this. @@ -72,8 +90,10 @@ public: // Joins threads. ~MandelbrotState(); +#ifndef NO_OPENCL // Prepares to use the given OpenCL kernel for calculations. void initKernel(cl::Context& clcontext, cl::Program& clprogram, const char *kernelname); +#endif Float zoom() const; @@ -96,10 +116,12 @@ private: Float m_zoom; Complex m_origin; +#ifndef NO_OPENCL std::unique_ptr m_cl_kernel; std::unique_ptr m_cl_queue; std::unique_ptr m_cl_input; std::unique_ptr m_cl_output; +#endif // Enters main loop of calcThread. void calcThread(); @@ -114,8 +136,10 @@ static bool done = false; static std::atomic_int fps = 0; static std::chrono::time_point clTime; +#ifndef NO_OPENCL static cl::Context initCLContext(); static cl::Program initCLProgram(cl::Context&, const char * const); +#endif static void initSDL(SDL_Window **, SDL_Renderer **, SDL_Texture **); static void threadFpsMonitor(MandelbrotState&); static void threadEventMonitor(MandelbrotState&); @@ -129,6 +153,7 @@ int main(int argc, char **argv) initSDL(&window, &renderer, &MandelbrotTexture); +#ifndef NO_OPENCL std::ifstream clSource ("opencl/mandelbrot_calc_r128.c"); if (!clSource.good()) throw std::runtime_error("Failed to open OpenCL kernel!"); @@ -141,6 +166,7 @@ int main(int argc, char **argv) auto clContext = initCLContext(); auto clProgram = initCLProgram(clContext, clSourceStr.data()); Mandelbrot.initKernel(clContext, clProgram, "mandelbrot_calc"); +#endif // Initiate first calculation so something appears on the screen. Mandelbrot.scheduleRecalculation(); @@ -180,7 +206,7 @@ int main(int argc, char **argv) return 0; } - +#ifndef NO_OPENCL static cl::Platform clplatform; static std::vector cldevices; @@ -206,6 +232,7 @@ cl::Program initCLProgram(cl::Context& clcontext, const char * const source) throw err; } } +#endif // NO_OPENCL void initSDL(SDL_Window **window, SDL_Renderer **renderer, SDL_Texture **texture) { @@ -367,6 +394,7 @@ MandelbrotState::~MandelbrotState() { m_calc_thread.join(); } +#ifndef NO_OPENCL void MandelbrotState::initKernel(cl::Context& clcontext, cl::Program& clprogram, const char *kernelname) { m_cl_kernel.reset(new cl::Kernel(clprogram, "mandelbrot_calc")); @@ -379,6 +407,7 @@ void MandelbrotState::initKernel(cl::Context& clcontext, cl::Program& clprogram, m_cl_kernel->setArg(0, *m_cl_input); m_cl_kernel->setArg(1, *m_cl_output); } +#endif // NO_OPENCL Float MandelbrotState::zoom() const { return m_zoom; @@ -406,7 +435,11 @@ bool MandelbrotState::intoTexture(SDL_Texture *texture) { void *dst; int pitch; SDL_LockTexture(texture, nullptr, &dst, &pitch); +#ifdef NO_OPENCL + std::memcpy(dst, renderOutput.data(), renderOutput.size() * sizeof(uint32_t)); +#else m_cl_queue->enqueueReadBuffer(*m_cl_output, CL_TRUE, 0, WIN_DIM * WIN_DIM * sizeof(uint32_t), dst); +#endif SDL_UnlockTexture(texture); std::chrono::duration diff = @@ -502,8 +535,23 @@ void MandelbrotState::calculateBitmap() m_calcing = true; clTime = std::chrono::high_resolution_clock::now(); +#ifdef NO_OPENCL + std::vector execs; + + for (int t = 0; t < THREAD_COUNT; ++t) { + execs.emplace_back([this, t] { + for (size_t i = t * renderOutput.size() / THREAD_COUNT; i < (t + 1) * renderOutput.size() / THREAD_COUNT; ++i) { + globalIds.insert_or_assign(std::this_thread::get_id(), i); + mandelbrot_calc((ulong4 *)points.data(), renderOutput.data(), m_max_iterations); + }}); + } + + for (auto& t : execs) + t.join(); +#else m_cl_kernel->setArg(2, m_max_iterations); m_cl_queue->enqueueWriteBuffer(*m_cl_input, CL_TRUE, 0, points.size() * sizeof(Complex), points.data()); m_cl_queue->enqueueNDRangeKernel(*m_cl_kernel, cl::NullRange, cl::NDRange(points.size()), cl::NullRange); +#endif }