aboutsummaryrefslogtreecommitdiffstats
path: root/opencl/mandelbrot_calc_r128.c
diff options
context:
space:
mode:
Diffstat (limited to 'opencl/mandelbrot_calc_r128.c')
-rw-r--r--opencl/mandelbrot_calc_r128.c156
1 files changed, 156 insertions, 0 deletions
diff --git a/opencl/mandelbrot_calc_r128.c b/opencl/mandelbrot_calc_r128.c
new file mode 100644
index 0000000..35df1e2
--- /dev/null
+++ b/opencl/mandelbrot_calc_r128.c
@@ -0,0 +1,156 @@
+inline ulong2 r128Add(const ulong2 a, const ulong2 b)
+{
+ ulong2 dst;
+ dst.lo = a.lo + b.lo;
+ dst.hi = a.hi + b.hi + (dst.lo < a.lo);
+ return dst;
+}
+
+inline ulong2 r128Sub(const ulong2 a, const ulong2 b)
+{
+ ulong2 dst;
+ dst.lo = a.lo - b.lo;
+ dst.hi = a.hi - b.hi - (dst.lo > a.lo);
+ return dst;
+}
+
+inline ulong2 r128__umul128(ulong a, ulong b)
+{
+ ulong alo = a & 0xFFFFFFFF;
+ ulong ahi = a >> 32;
+ ulong blo = b & 0xFFFFFFFF;
+ ulong bhi = b >> 32;
+ ulong p0, p1, p2, p3;
+
+ p0 = alo * blo;
+ p1 = alo * bhi;
+ p2 = ahi * blo;
+ p3 = ahi * bhi;
+
+ ulong2 dst;
+ ulong carry;
+ carry = ((p1 & 0xFFFFFFFF) + (p2 & 0xFFFFFFFF) + (p0 >> 32)) >> 32;
+
+ dst.lo = p0 + ((p1 + p2) << 32);
+ dst.hi = p3 + ((uint)(p1 >> 32) + (uint)(p2 >> 32)) + carry;
+ return dst;
+}
+
+inline ulong r128__umul128Hi(ulong a, ulong b)
+{
+ ulong alo = a & 0xFFFFFFFF;
+ ulong ahi = a >> 32;
+ ulong blo = b & 0xFFFFFFFF;
+ ulong bhi = b >> 32;
+ ulong p0, p1, p2, p3;
+
+ p0 = alo * blo;
+ p1 = alo * bhi;
+ p2 = ahi * blo;
+ p3 = ahi * bhi;
+
+ ulong carry = ((p1 & 0xFFFFFFFF) + (p2 & 0xFFFFFFFF) + (p0 >> 32)) >> 32;
+ return p3 + ((uint)(p1 >> 32) + (uint)(p2 >> 32)) + carry;
+}
+
+inline ulong2 r128Shr(const ulong2 src, int amount)
+{
+ ulong2 r;
+ r.lo = (src.lo >> amount) | (src.hi << (64 - amount));
+ r.hi = src.hi >> amount;
+ return r;
+}
+
+inline ulong2 r128__umul(const ulong2 a, const ulong2 b)
+{
+ ulong2 ahbl, albh, ahbh, sum;
+
+ sum.lo = r128__umul128Hi(a.lo, b.lo);
+ ahbl = r128__umul128(a.hi, b.lo);
+ albh = r128__umul128(a.lo, b.hi);
+ ahbh = r128__umul128(a.hi, b.hi);
+
+ ahbh = r128Shr(ahbh, 60);
+ sum.hi = ahbh.lo;
+ sum = r128Add(sum, ahbl);
+ sum = r128Add(sum, albh);
+
+ return sum;
+}
+
+inline ulong2 r128Mul(const ulong2 a, const ulong2 b)
+{
+ int sign = 0;
+ ulong2 ta, tb, tc;
+
+ ta = a;
+ tb = b;
+
+ if ((long)ta.hi < 0) {
+ if (ta.lo) {
+ ta.lo = ~ta.lo + 1;
+ ta.hi = ~ta.hi;
+ } else {
+ ta.lo = 0;
+ ta.hi = ~ta.hi + 1;
+ }
+ sign = !sign;
+ }
+ if ((long)tb.hi < 0) {
+ if (tb.lo) {
+ tb.lo = ~tb.lo + 1;
+ tb.hi = ~tb.hi;
+ } else {
+ tb.lo = 0;
+ tb.hi = ~tb.hi + 1;
+ }
+ sign = !sign;
+ }
+
+ tc = r128__umul(ta, tb);
+
+ if (sign) {
+ if (tc.lo) {
+ tc.lo = ~tc.lo + 1;
+ tc.hi = ~tc.hi;
+ } else {
+ tc.lo = 0;
+ tc.hi = ~tc.hi + 1;
+ }
+ }
+
+ return tc;
+}
+
+__kernel void mandelbrot_calc(const __global ulong4 *c_pt,
+ __global unsigned int *out_it,
+ const unsigned int max_iterations)
+{
+ const int id = get_global_id(0);
+ const ulong4 opt = c_pt[id];
+
+ ulong4 pt = opt;
+ ulong2 tmp;
+ unsigned int iterations;
+
+ for (iterations = 0; iterations < max_iterations; ++iterations) {
+ tmp = r128Mul(pt.lo, pt.hi);
+ pt.lo = r128Mul(pt.lo, pt.lo);
+ pt.hi = r128Mul(pt.hi, pt.hi);
+
+ const ulong2 sum = r128Add(pt.lo, pt.hi);
+ if ((long)sum.hi >= 0x4000000000000000)
+ break;
+
+ pt.lo = r128Sub(pt.lo, pt.hi);
+ pt.hi = r128Add(tmp, tmp);
+ pt.lo = r128Add(pt.lo, opt.lo);
+ pt.hi = r128Add(pt.hi, opt.hi);
+ }
+
+ if (iterations == max_iterations)
+ out_it[id] = 0;
+ else
+ out_it[id] = ((iterations & 0xFF) << 16) | ((iterations & 0x07) << 6);
+}
+